Project

General

Profile

« Previous | Next » 

Revision 8646

First pass at a class for summarizing attribute information for analysis. (semtools) https://projects.ecoinformatics.org/ecoinfo/issues/6256

View differences:

src/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizer.java
1
package edu.ucsb.nceas.metacat.annotation;
2

  
3
import java.io.InputStream;
4
import java.sql.PreparedStatement;
5
import java.sql.SQLException;
6
import java.util.ArrayList;
7
import java.util.List;
8
import java.util.Vector;
9

  
10
import org.apache.log4j.Logger;
11
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
12
import org.dataone.service.types.v1.Identifier;
13
import org.dataone.service.types.v1.Session;
14
import org.dataone.service.types.v1.Subject;
15
import org.ecoinformatics.datamanager.parser.Attribute;
16
import org.ecoinformatics.datamanager.parser.DataPackage;
17
import org.ecoinformatics.datamanager.parser.Entity;
18
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
19
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
20

  
21
import edu.ucsb.nceas.metacat.DBUtil;
22
import edu.ucsb.nceas.metacat.DocumentImpl;
23
import edu.ucsb.nceas.metacat.IdentifierManager;
24
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
25
import edu.ucsb.nceas.metacat.database.DBConnection;
26
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
27
import edu.ucsb.nceas.metacat.dataone.MNodeService;
28
import edu.ucsb.nceas.metacat.properties.PropertyService;
29
import edu.ucsb.nceas.metacat.util.DocumentUtil;
30
import edu.ucsb.nceas.utilities.SortedProperties;
31

  
32
public class DatapackageSummarizer {
33

  
34
	private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class);
35
	
36
	public void summarize(List<Identifier> identifiers) throws SQLException {
37
		
38
		DBConnection dbconn = null;
39

  
40
		try {
41
			dbconn = DBConnectionPool.getDBConnection("DatapackageSummarizer.summarize");
42
			
43
			PreparedStatement dropStatement = dbconn.prepareStatement("DROP TABLE IF EXISTS entity_summary");
44
			dropStatement.execute();
45
	
46
			PreparedStatement createStatement = dbconn.prepareStatement(
47
					"CREATE TABLE entity_summary (" +
48
					"guid text, " +
49
					"title text, " +
50
					"entity text," +
51
					"attributeName text," +
52
					"attributeLabel text," +
53
					"attributeDefinition text," +
54
					"attributeType text," +
55
					"attributeScale text," +
56
					"attributeUnitType text," +
57
					"attributeUnit text," +
58
					"attributeDomain text" +
59
					")");
60
			createStatement.execute();
61
			
62
			PreparedStatement insertStatement = dbconn.prepareStatement(
63
					"INSERT INTO entity_summary " +
64
					"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
65
			
66
			for (Identifier pid: identifiers) {
67
			
68
				logMetacat.debug("Parsing pid: " + pid.getValue());
69
				
70
				try {
71
					
72
					// for using the MN API as the MN itself
73
					MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
74
					Session session = new Session();
75
			        Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
76
			        session.setSubject(subject);
77
					InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
78
			
79
					// parse the metadata
80
					DataPackageParserInterface parser = new Eml200DataPackageParser();
81
					parser.parse(emlStream);
82
					DataPackage dataPackage = parser.getDataPackage();
83
					String title = dataPackage.getTitle();
84
					logMetacat.debug("Title: " + title);
85
					
86
					Entity[] entities = dataPackage.getEntityList();
87
					if (entities != null) {
88
						for (Entity entity: entities) {
89
							String entityName = entity.getName();
90
							logMetacat.debug("Entity name: " + entityName);
91
							Attribute[] attributes = entity.getAttributeList().getAttributes();
92
							for (Attribute attribute: attributes) {
93
								String attributeName = attribute.getName();
94
								String attributeLabel = attribute.getLabel();
95
								String attributeDefinition = attribute.getDefinition();
96
								String attributeType = attribute.getAttributeType();
97
								String attributeScale = attribute.getMeasurementScale();
98
								String attributeUnitType = attribute.getUnitType();
99
								String attributeUnit = attribute.getUnit();
100
								String attributeDomain = attribute.getDomain().getClass().getSimpleName();
101
	
102
								logMetacat.debug("Attribute name: " + attributeName);
103
								logMetacat.debug("Attribute label: " + attributeLabel);
104
								logMetacat.debug("Attribute definition: " + attributeDefinition);
105
								logMetacat.debug("Attribute type: " + attributeType);
106
								logMetacat.debug("Attribute scale: " + attributeScale);
107
								logMetacat.debug("Attribute unit type: " + attributeUnitType);
108
								logMetacat.debug("Attribute unit: " + attributeUnit);
109
								logMetacat.debug("Attribute domain: " + attributeDomain);
110
								
111
								// set the values for this attribute
112
								insertStatement.setString(1, pid.getValue());
113
								insertStatement.setString(2, title);
114
								insertStatement.setString(3, entityName);
115
								insertStatement.setString(4, attributeName);
116
								insertStatement.setString(5, attributeLabel);
117
								insertStatement.setString(6, attributeDefinition);
118
								insertStatement.setString(7, attributeType);
119
								insertStatement.setString(8, attributeScale);
120
								insertStatement.setString(9, attributeUnitType);
121
								insertStatement.setString(10, attributeUnit);
122
								insertStatement.setString(11, attributeDomain);
123
								insertStatement.execute();
124
								
125
							}		
126
						}
127
					}
128
					
129
				} catch (Exception e) {
130
					logMetacat.warn("error parsing metadata for: " + pid.getValue(), e);
131
				}
132
			}
133
		} catch (SQLException sqle) {
134
			// just throw it
135
			throw sqle;
136
		} finally {
137
			if (dbconn != null) {
138
				DBConnectionPool.returnDBConnection(dbconn, 0);
139
				dbconn.close();
140
			}
141
		}
142
	}
143
	
144
	public static void main(String[] args) throws Exception {
145
		
146
		// set up the properties based on the test/deployed configuration of the workspace
147
		SortedProperties testProperties = new SortedProperties("test/test.properties");
148
		testProperties.load();
149
		String metacatContextDir = testProperties.getProperty("metacat.contextDir");
150
		PropertyService.getInstance(metacatContextDir + "/WEB-INF");
151
		
152
		// summarize the packages
153
		DatapackageSummarizer ds = new DatapackageSummarizer();
154
		List<Identifier> identifiers = new ArrayList<Identifier>();
155
		Vector<String> idList = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, 1);
156
		Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, 1);
157
		Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, 1);
158
		Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, 1);
159
		
160
		idList.addAll(idList1);
161
		idList.addAll(idList2);
162
		idList.addAll(idList3);
163
		
164
		for (String localId : idList) {
165
			try {
166
				String guid = IdentifierManager.getInstance().getGUID(
167
						DocumentUtil.getDocIdFromAccessionNumber(localId), 
168
						DocumentUtil.getRevisionFromAccessionNumber(localId));
169
				Identifier pid = new Identifier();
170
				pid.setValue(guid);
171
				identifiers.add(pid);
172
			} catch (McdbDocNotFoundException nfe) {
173
				// just skip it
174
				continue;
175
			}
176
		}
177
		ds.summarize(identifiers);
178
		System.exit(0);
179
	}
180
	
181
}
0 182

  

Also available in: Unified diff