Revision 8646
Added by ben leinfelder over 10 years ago
src/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizer.java | ||
---|---|---|
1 |
package edu.ucsb.nceas.metacat.annotation; |
|
2 |
|
|
3 |
import java.io.InputStream; |
|
4 |
import java.sql.PreparedStatement; |
|
5 |
import java.sql.SQLException; |
|
6 |
import java.util.ArrayList; |
|
7 |
import java.util.List; |
|
8 |
import java.util.Vector; |
|
9 |
|
|
10 |
import org.apache.log4j.Logger; |
|
11 |
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest; |
|
12 |
import org.dataone.service.types.v1.Identifier; |
|
13 |
import org.dataone.service.types.v1.Session; |
|
14 |
import org.dataone.service.types.v1.Subject; |
|
15 |
import org.ecoinformatics.datamanager.parser.Attribute; |
|
16 |
import org.ecoinformatics.datamanager.parser.DataPackage; |
|
17 |
import org.ecoinformatics.datamanager.parser.Entity; |
|
18 |
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface; |
|
19 |
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser; |
|
20 |
|
|
21 |
import edu.ucsb.nceas.metacat.DBUtil; |
|
22 |
import edu.ucsb.nceas.metacat.DocumentImpl; |
|
23 |
import edu.ucsb.nceas.metacat.IdentifierManager; |
|
24 |
import edu.ucsb.nceas.metacat.McdbDocNotFoundException; |
|
25 |
import edu.ucsb.nceas.metacat.database.DBConnection; |
|
26 |
import edu.ucsb.nceas.metacat.database.DBConnectionPool; |
|
27 |
import edu.ucsb.nceas.metacat.dataone.MNodeService; |
|
28 |
import edu.ucsb.nceas.metacat.properties.PropertyService; |
|
29 |
import edu.ucsb.nceas.metacat.util.DocumentUtil; |
|
30 |
import edu.ucsb.nceas.utilities.SortedProperties; |
|
31 |
|
|
32 |
public class DatapackageSummarizer { |
|
33 |
|
|
34 |
private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class); |
|
35 |
|
|
36 |
public void summarize(List<Identifier> identifiers) throws SQLException { |
|
37 |
|
|
38 |
DBConnection dbconn = null; |
|
39 |
|
|
40 |
try { |
|
41 |
dbconn = DBConnectionPool.getDBConnection("DatapackageSummarizer.summarize"); |
|
42 |
|
|
43 |
PreparedStatement dropStatement = dbconn.prepareStatement("DROP TABLE IF EXISTS entity_summary"); |
|
44 |
dropStatement.execute(); |
|
45 |
|
|
46 |
PreparedStatement createStatement = dbconn.prepareStatement( |
|
47 |
"CREATE TABLE entity_summary (" + |
|
48 |
"guid text, " + |
|
49 |
"title text, " + |
|
50 |
"entity text," + |
|
51 |
"attributeName text," + |
|
52 |
"attributeLabel text," + |
|
53 |
"attributeDefinition text," + |
|
54 |
"attributeType text," + |
|
55 |
"attributeScale text," + |
|
56 |
"attributeUnitType text," + |
|
57 |
"attributeUnit text," + |
|
58 |
"attributeDomain text" + |
|
59 |
")"); |
|
60 |
createStatement.execute(); |
|
61 |
|
|
62 |
PreparedStatement insertStatement = dbconn.prepareStatement( |
|
63 |
"INSERT INTO entity_summary " + |
|
64 |
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); |
|
65 |
|
|
66 |
for (Identifier pid: identifiers) { |
|
67 |
|
|
68 |
logMetacat.debug("Parsing pid: " + pid.getValue()); |
|
69 |
|
|
70 |
try { |
|
71 |
|
|
72 |
// for using the MN API as the MN itself |
|
73 |
MockHttpServletRequest request = new MockHttpServletRequest(null, null, null); |
|
74 |
Session session = new Session(); |
|
75 |
Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0); |
|
76 |
session.setSubject(subject); |
|
77 |
InputStream emlStream = MNodeService.getInstance(request).get(session, pid); |
|
78 |
|
|
79 |
// parse the metadata |
|
80 |
DataPackageParserInterface parser = new Eml200DataPackageParser(); |
|
81 |
parser.parse(emlStream); |
|
82 |
DataPackage dataPackage = parser.getDataPackage(); |
|
83 |
String title = dataPackage.getTitle(); |
|
84 |
logMetacat.debug("Title: " + title); |
|
85 |
|
|
86 |
Entity[] entities = dataPackage.getEntityList(); |
|
87 |
if (entities != null) { |
|
88 |
for (Entity entity: entities) { |
|
89 |
String entityName = entity.getName(); |
|
90 |
logMetacat.debug("Entity name: " + entityName); |
|
91 |
Attribute[] attributes = entity.getAttributeList().getAttributes(); |
|
92 |
for (Attribute attribute: attributes) { |
|
93 |
String attributeName = attribute.getName(); |
|
94 |
String attributeLabel = attribute.getLabel(); |
|
95 |
String attributeDefinition = attribute.getDefinition(); |
|
96 |
String attributeType = attribute.getAttributeType(); |
|
97 |
String attributeScale = attribute.getMeasurementScale(); |
|
98 |
String attributeUnitType = attribute.getUnitType(); |
|
99 |
String attributeUnit = attribute.getUnit(); |
|
100 |
String attributeDomain = attribute.getDomain().getClass().getSimpleName(); |
|
101 |
|
|
102 |
logMetacat.debug("Attribute name: " + attributeName); |
|
103 |
logMetacat.debug("Attribute label: " + attributeLabel); |
|
104 |
logMetacat.debug("Attribute definition: " + attributeDefinition); |
|
105 |
logMetacat.debug("Attribute type: " + attributeType); |
|
106 |
logMetacat.debug("Attribute scale: " + attributeScale); |
|
107 |
logMetacat.debug("Attribute unit type: " + attributeUnitType); |
|
108 |
logMetacat.debug("Attribute unit: " + attributeUnit); |
|
109 |
logMetacat.debug("Attribute domain: " + attributeDomain); |
|
110 |
|
|
111 |
// set the values for this attribute |
|
112 |
insertStatement.setString(1, pid.getValue()); |
|
113 |
insertStatement.setString(2, title); |
|
114 |
insertStatement.setString(3, entityName); |
|
115 |
insertStatement.setString(4, attributeName); |
|
116 |
insertStatement.setString(5, attributeLabel); |
|
117 |
insertStatement.setString(6, attributeDefinition); |
|
118 |
insertStatement.setString(7, attributeType); |
|
119 |
insertStatement.setString(8, attributeScale); |
|
120 |
insertStatement.setString(9, attributeUnitType); |
|
121 |
insertStatement.setString(10, attributeUnit); |
|
122 |
insertStatement.setString(11, attributeDomain); |
|
123 |
insertStatement.execute(); |
|
124 |
|
|
125 |
} |
|
126 |
} |
|
127 |
} |
|
128 |
|
|
129 |
} catch (Exception e) { |
|
130 |
logMetacat.warn("error parsing metadata for: " + pid.getValue(), e); |
|
131 |
} |
|
132 |
} |
|
133 |
} catch (SQLException sqle) { |
|
134 |
// just throw it |
|
135 |
throw sqle; |
|
136 |
} finally { |
|
137 |
if (dbconn != null) { |
|
138 |
DBConnectionPool.returnDBConnection(dbconn, 0); |
|
139 |
dbconn.close(); |
|
140 |
} |
|
141 |
} |
|
142 |
} |
|
143 |
|
|
144 |
public static void main(String[] args) throws Exception { |
|
145 |
|
|
146 |
// set up the properties based on the test/deployed configuration of the workspace |
|
147 |
SortedProperties testProperties = new SortedProperties("test/test.properties"); |
|
148 |
testProperties.load(); |
|
149 |
String metacatContextDir = testProperties.getProperty("metacat.contextDir"); |
|
150 |
PropertyService.getInstance(metacatContextDir + "/WEB-INF"); |
|
151 |
|
|
152 |
// summarize the packages |
|
153 |
DatapackageSummarizer ds = new DatapackageSummarizer(); |
|
154 |
List<Identifier> identifiers = new ArrayList<Identifier>(); |
|
155 |
Vector<String> idList = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, 1); |
|
156 |
Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, 1); |
|
157 |
Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, 1); |
|
158 |
Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, 1); |
|
159 |
|
|
160 |
idList.addAll(idList1); |
|
161 |
idList.addAll(idList2); |
|
162 |
idList.addAll(idList3); |
|
163 |
|
|
164 |
for (String localId : idList) { |
|
165 |
try { |
|
166 |
String guid = IdentifierManager.getInstance().getGUID( |
|
167 |
DocumentUtil.getDocIdFromAccessionNumber(localId), |
|
168 |
DocumentUtil.getRevisionFromAccessionNumber(localId)); |
|
169 |
Identifier pid = new Identifier(); |
|
170 |
pid.setValue(guid); |
|
171 |
identifiers.add(pid); |
|
172 |
} catch (McdbDocNotFoundException nfe) { |
|
173 |
// just skip it |
|
174 |
continue; |
|
175 |
} |
|
176 |
} |
|
177 |
ds.summarize(identifiers); |
|
178 |
System.exit(0); |
|
179 |
} |
|
180 |
|
|
181 |
} |
|
0 | 182 |
Also available in: Unified diff
First pass at a class for summarizing attribute information for analysis. (semtools) https://projects.ecoinformatics.org/ecoinfo/issues/6256