Project

General

Profile

1
package edu.ucsb.nceas.metacat.annotation;
2

    
3
import java.io.InputStream;
4
import java.io.StringWriter;
5
import java.sql.PreparedStatement;
6
import java.sql.SQLException;
7
import java.util.ArrayList;
8
import java.util.Arrays;
9
import java.util.HashMap;
10
import java.util.Iterator;
11
import java.util.List;
12
import java.util.Map;
13
import java.util.Vector;
14

    
15
import org.apache.commons.io.IOUtils;
16
import org.apache.log4j.Logger;
17
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
18
import org.dataone.service.types.v1.Identifier;
19
import org.dataone.service.types.v1.Session;
20
import org.dataone.service.types.v1.Subject;
21
import org.ecoinformatics.datamanager.parser.Attribute;
22
import org.ecoinformatics.datamanager.parser.DataPackage;
23
import org.ecoinformatics.datamanager.parser.Entity;
24
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
25
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
26

    
27
import com.hp.hpl.jena.ontology.AllValuesFromRestriction;
28
import com.hp.hpl.jena.ontology.Individual;
29
import com.hp.hpl.jena.ontology.ObjectProperty;
30
import com.hp.hpl.jena.ontology.OntClass;
31
import com.hp.hpl.jena.ontology.OntDocumentManager;
32
import com.hp.hpl.jena.ontology.OntModel;
33
import com.hp.hpl.jena.ontology.Ontology;
34
import com.hp.hpl.jena.query.Dataset;
35
import com.hp.hpl.jena.query.Query;
36
import com.hp.hpl.jena.query.QueryExecution;
37
import com.hp.hpl.jena.query.QueryExecutionFactory;
38
import com.hp.hpl.jena.query.QueryFactory;
39
import com.hp.hpl.jena.query.QuerySolution;
40
import com.hp.hpl.jena.query.ResultSet;
41
import com.hp.hpl.jena.rdf.model.ModelFactory;
42
import com.hp.hpl.jena.rdf.model.Property;
43
import com.hp.hpl.jena.rdf.model.Resource;
44
import com.hp.hpl.jena.tdb.TDBFactory;
45

    
46
import edu.ucsb.nceas.metacat.DBUtil;
47
import edu.ucsb.nceas.metacat.DocumentImpl;
48
import edu.ucsb.nceas.metacat.IdentifierManager;
49
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
50
import edu.ucsb.nceas.metacat.database.DBConnection;
51
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
52
import edu.ucsb.nceas.metacat.dataone.MNodeService;
53
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
54
import edu.ucsb.nceas.metacat.properties.PropertyService;
55
import edu.ucsb.nceas.metacat.replication.ReplicationService;
56
import edu.ucsb.nceas.metacat.util.DocumentUtil;
57
import edu.ucsb.nceas.utilities.SortedProperties;
58

    
59
public class DatapackageSummarizer {
60

    
61
	private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class);
62
	
63
	public static String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
64
	public static String rdfs = "http://www.w3.org/2000/01/rdf-schema#";
65
	public static String owl = "http://www.w3.org/2002/07/owl#";
66
	public static String oboe = "http://ecoinformatics.org/oboe/oboe.1.0/oboe.owl#";
67
	public static String oboe_core = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#";
68
	public static String oboe_characteristics = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-characteristics.owl#";
69
	public static String oboe_sbc = "http://ecoinformatics.org/oboe-ext/sbclter.1.0/oboe-sbclter.owl#";
70
	public static String oa = "http://www.w3.org/ns/oa#";
71
	public static String oa_source = "http://www.w3.org/ns/oa.rdf";
72
	public static String dcterms = "http://purl.org/dc/terms/";
73
	public static String dcterms_source = "http://dublincore.org/2012/06/14/dcterms.rdf";
74
	public static String foaf = "http://xmlns.com/foaf/0.1/";
75
	public static String foaf_source = "http://xmlns.com/foaf/spec/index.rdf";
76
    public static String prov = "http://www.w3.org/ns/prov#";
77
    public static String prov_source = "http://www.w3.org/ns/prov.owl";
78
    public static String cito =  "http://purl.org/spar/cito/";
79
    
80
	public static String OBOE_SBC = "OBOE-SBC";
81
	
82
	private static boolean cacheInitialized;
83
	
84
	private static void initializeCache() {
85
		if (!cacheInitialized) {
86
			// cache the ontologies we use
87
			OntDocumentManager.getInstance().addModel(oboe, ModelFactory.createOntologyModel().read(oboe));
88
			OntDocumentManager.getInstance().addModel(oboe_sbc, ModelFactory.createOntologyModel().read(oboe_sbc));
89
			OntDocumentManager.getInstance().addModel(oa, ModelFactory.createOntologyModel().read(oa_source));
90
			OntDocumentManager.getInstance().addModel(dcterms, ModelFactory.createOntologyModel().read(dcterms_source));
91
			OntDocumentManager.getInstance().addModel(foaf, ModelFactory.createOntologyModel().read(foaf_source));
92
			OntDocumentManager.getInstance().addModel(prov, ModelFactory.createOntologyModel().read(prov));
93
			OntDocumentManager.getInstance().addModel(cito, ModelFactory.createOntologyModel().read(cito));
94
			cacheInitialized = true;
95
		}
96
	}
97
    
98
    public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception {
99

    
100
    	// generate an annotation for the metadata given
101
		String rdfContent = this.generateAnnotation(metadataPid);
102
		
103
		// load to triple store
104
		//Dataset dataset = TDBFactory.createDataset("./tbd");
105
		Dataset dataset = TDBFactory.createDataset();
106
		//Dataset dataset = DatasetFactory.createMem();
107
		
108
    	// read the annotation into the triplestore
109
		InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8");
110
    	String name = "http://annotation/" + metadataPid.getValue();
111
    	boolean loaded = dataset.containsNamedModel(name);
112
    	if (loaded) {
113
    		dataset.removeNamedModel(name);
114
    		loaded = false;
115
    	}
116
		OntModel ontModel = null;
117
		if (!loaded) {
118
			ontModel = ModelFactory.createOntologyModel();
119
			ontModel.read(source, name);
120
			dataset.addNamedModel(name, ontModel);
121
		}
122
		
123
		// query for fields to add to index
124
        Map<String, List<Object>> fields = new HashMap<String, List<Object>>();
125
		
126
        // TODO: look up the query to use (support multiple like in the indexing project)
127
        List<String> queries = new ArrayList<String>();        
128
        queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
129
        	+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
130
        	+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> " 
131
			+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
132
			+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
133
			+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
134
			+ "SELECT ?standard_sm ?pid "
135
			+ "FROM <$GRAPH_NAME> "
136
			+ "WHERE { "
137
			+ "		?measurement rdf:type oboe-core:Measurement . "
138
			+ "		?measurement rdf:type ?restriction . "
139
			+ "		?restriction owl:onProperty oboe-core:usesStandard . "
140
			+ "		?restriction owl:allValuesFrom ?standard . "
141
			+ "		?standard rdfs:subClassOf+ ?standard_sm . "
142
			+ "		?standard_sm rdfs:subClassOf oboe-core:Standard . "				
143
			+ "		?annotation oa:hasBody ?measurement . "												
144
			+ "		?annotation oa:hasTarget ?target . "
145
			+ "		?target oa:hasSource ?metadata . "
146
			+ "		?metadata dcterms:identifier ?pid . " 
147
			+ "}");
148
        
149
        queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
150
    		+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
151
    		+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
152
    		+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
153
    		+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
154
    		+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
155
    		+ "SELECT ?characteristic_sm ?pid "
156
    		+ "FROM <$GRAPH_NAME>"
157
    		+ "WHERE { "
158
    		+ "		?measurement rdf:type oboe-core:Measurement . "
159
    		+ "		?measurement rdf:type ?restriction . "
160
			+ "		?restriction owl:onProperty oboe-core:ofCharacteristic . "
161
			+ "		?restriction owl:allValuesFrom ?characteristic . "
162
			+ "		?characteristic rdfs:subClassOf+ ?characteristic_sm . "
163
			+ "		?characteristic_sm rdfs:subClassOf oboe-core:Characteristic . "
164
			+ "		?annotation oa:hasBody ?measurement .	"											
165
			+ "		?annotation oa:hasTarget ?target . "
166
			+ "		?target oa:hasSource ?metadata . "
167
			+ "		?metadata dcterms:identifier ?pid . " 
168
			+ "}");
169
        
170
        for (String q: queries) {
171
	        q = q.replaceAll("\\$GRAPH_NAME", name);
172
			Query query = QueryFactory.create(q);
173
			QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
174
			ResultSet results = qexec.execSelect();
175
			
176
			while (results.hasNext()) {
177
				QuerySolution solution = results.next();
178
				System.out.println(solution.toString());
179
				
180
				// find the index document we are trying to augment with the annotation
181
				if (solution.contains("pid")) {
182
					String id = solution.getLiteral("pid").getString();
183
					if (!id.equals(metadataPid.getValue())) {
184
						// skip any solution that does not annotate the given pid
185
						continue;
186
					}
187
					
188
				}
189
				// loop through the solution variables, add an index value for each
190
				Iterator<String> varNameIter = solution.varNames();
191
				while (varNameIter.hasNext()) {
192
					String key = varNameIter.next();
193
					if (key.equals("pid")) {
194
						// don't include the id
195
						continue;
196
					}
197
					String value = solution.get(key).toString();
198
					List<Object> values = fields.get(key);
199
					if (values  == null) {
200
						values = new ArrayList<Object>();
201
					}
202
					values.add(value);
203
					fields.put(key, values);
204
				}
205
			}
206
        }
207

    
208
        // remove the graph to save storage
209
//        ontModel.removeAll();
210
//        ontModel.commit();
211
//        ontModel.close();
212
		dataset.removeNamedModel(name);
213
        
214
		// clean up the triple store
215
		TDBFactory.release(dataset);
216
        
217
		// add to index
218
		MetacatSolrIndex.getInstance().submit(metadataPid, null, fields, true);
219
		
220
		
221
	}
222

    
223
    /**
224
     * Generate annotation for given metadata identifier
225
     * @param metadataPid
226
     */
227
    public String generateAnnotation(Identifier metadataPid) throws Exception {
228
    	
229
    	DataPackage dataPackage = this.getDataPackage(metadataPid);
230
    	
231
		OntModel m = ModelFactory.createOntologyModel();
232
		Ontology ont = m.createOntology("http://annotation/" + metadataPid.getValue());
233
		
234
		// TODO: import the ontologies we use
235
		initializeCache();
236
		
237
		ont.addImport(m.createResource(oboe));
238
		m.addSubModel(OntDocumentManager.getInstance().getModel(oboe));
239
		
240
		ont.addImport(m.createResource(oboe_sbc));
241
		m.addSubModel(OntDocumentManager.getInstance().getModel(oboe_sbc));
242
		
243
		ont.addImport(m.createResource(oa));
244
		m.addSubModel(OntDocumentManager.getInstance().getModel(oa));
245

    
246
		ont.addImport(m.createResource(dcterms));
247
		m.addSubModel(OntDocumentManager.getInstance().getModel(dcterms));
248

    
249
		ont.addImport(m.createResource(foaf));
250
		m.addSubModel(OntDocumentManager.getInstance().getModel(foaf));
251
		
252
		ont.addImport(m.createResource(prov));
253
		//m.addSubModel(ModelFactory.createOntologyModel().read(prov_source));
254

    
255
		ont.addImport(m.createResource(cito));
256
		
257
		// properties
258
		ObjectProperty hasBodyProperty = m.getObjectProperty(oa + "hasBody");
259
		ObjectProperty hasTargetProperty = m.getObjectProperty(oa + "hasTarget");
260
		ObjectProperty hasSourceProperty = m.getObjectProperty(oa + "hasSource");
261
		ObjectProperty hasSelectorProperty = m.getObjectProperty(oa + "hasSelector");
262
		ObjectProperty annotatedByProperty = m.getObjectProperty(oa + "annotatedBy");
263
		Property identifierProperty = m.getProperty(dcterms + "identifier");
264
		Property conformsToProperty = m.getProperty(dcterms + "conformsTo");
265
		Property wasAttributedTo = m.getProperty(prov + "wasAttributedTo");
266
		Property nameProperty = m.getProperty(foaf + "name");
267
		Property rdfValue = m.getProperty(rdf + "value");
268
		
269
		ObjectProperty ofCharacteristic = m.getObjectProperty(oboe_core + "ofCharacteristic");
270
		ObjectProperty usesStandard = m.getObjectProperty(oboe_core + "usesStandard");
271
		ObjectProperty ofEntity = m.getObjectProperty(oboe_core + "ofEntity");
272
		ObjectProperty hasMeasurement = m.getObjectProperty(oboe_core + "hasMeasurement");
273

    
274
		// classes
275
		OntClass entityClass =  m.getOntClass(oboe_core + "Entity");
276
		OntClass observationClass =  m.getOntClass(oboe_core + "Observation");
277
		OntClass measurementClass =  m.getOntClass(oboe_core + "Measurement");
278
		OntClass characteristicClass = m.getOntClass(oboe_core + "Characteristic");
279
		OntClass standardClass =  m.getOntClass(oboe_core + "Standard");
280
		
281
		Resource annotationClass =  m.getOntClass(oa + "Annotation");
282
		Resource specificResourceClass =  m.getOntClass(oa + "SpecificResource");
283
		Resource fragmentSelectorClass =  m.getOntClass(oa + "FragmentSelector");
284
		Resource provEntityClass =  m.getResource(prov + "Entity");
285
		Resource personClass =  m.getResource(prov + "Person");
286
				
287
		// these apply to every attribute annotation
288
		Individual meta1 = m.createIndividual(ont.getURI() + "#meta", provEntityClass);
289
		meta1.addProperty(identifierProperty, metadataPid.getValue());
290

    
291
		// decide who should be credited with the package
292
		Individual p1 = null;
293
		
294
		// look up creators from the EML metadata
295
		List<String> creators = dataPackage.getCreators();
296
		//creators = Arrays.asList("Matthew Jones");
297
		if (creators != null && creators.size() > 0) {	
298
			// use an orcid if we can find one from their system
299
			String orcidUri = OrcidService.lookupOrcid(null, null, creators.toArray(new String[0]));
300
			if (orcidUri != null) {
301
				p1 = m.createIndividual(orcidUri, personClass);
302
				p1.addProperty(identifierProperty, orcidUri);
303
			} else {
304
				p1 = m.createIndividual(ont.getURI() + "#person", personClass);
305
			}
306
			// include the name we have in the metadata
307
			p1.addProperty(nameProperty, creators.get(0));
308
		}
309
		
310
		// attribute the package to this creator if we have one
311
		if (p1 != null) {
312
			meta1.addProperty(wasAttributedTo, p1);
313
		}
314
		
315
		// loop through the tables and attributes
316
		int entityCount = 1;
317
		Entity[] entities = dataPackage.getEntityList();
318
		if (entities != null) {
319
			for (Entity entity: entities) {
320
				String entityName = entity.getName();
321
				
322
				Individual o1 = m.createIndividual(ont.getURI() + "#observation" + entityCount, observationClass);
323
				Resource entityConcept = lookupEntity(entityClass, entity);
324
				if (entityConcept != null) {
325
					AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofEntity, entityConcept);
326
					o1.addOntClass(avfr);
327
				}
328
				
329
				logMetacat.debug("Entity name: " + entityName);
330
				Attribute[] attributes = entity.getAttributeList().getAttributes();
331
				int attributeCount = 1;
332
				if (attributes != null) {
333
					for (Attribute attribute: attributes) {
334
						
335
						// for naming the individuals uniquely
336
						String cnt = entityCount + "_" + attributeCount;
337
						
338
						String attributeName = attribute.getName();
339
						String attributeLabel = attribute.getLabel();
340
						String attributeDefinition = attribute.getDefinition();
341
						String attributeType = attribute.getAttributeType();
342
						String attributeScale = attribute.getMeasurementScale();
343
						String attributeUnitType = attribute.getUnitType();
344
						String attributeUnit = attribute.getUnit();
345
						String attributeDomain = attribute.getDomain().getClass().getSimpleName();
346
		
347
						logMetacat.debug("Attribute name: " + attributeName);
348
						logMetacat.debug("Attribute label: " + attributeLabel);
349
						logMetacat.debug("Attribute definition: " + attributeDefinition);
350
						logMetacat.debug("Attribute type: " + attributeType);
351
						logMetacat.debug("Attribute scale: " + attributeScale);
352
						logMetacat.debug("Attribute unit type: " + attributeUnitType);
353
						logMetacat.debug("Attribute unit: " + attributeUnit);
354
						logMetacat.debug("Attribute domain: " + attributeDomain);
355
					
356
						// look up the characteristic or standard subclasses
357
						Resource standard = this.lookupStandard(standardClass, attribute);
358
						Resource characteristic = this.lookupCharacteristic(characteristicClass, attribute);
359
						
360
						if (standard != null || characteristic != null) {
361
							
362
							// instances
363
							Individual m1 = m.createIndividual(ont.getURI() + "#measurement" + cnt, measurementClass);
364
							Individual a1 = m.createIndividual(ont.getURI() + "#annotation" + cnt, annotationClass);
365
							Individual t1 = m.createIndividual(ont.getURI() + "#target" + cnt, specificResourceClass);
366
							String xpointer = "xpointer(/eml/dataSet/" + entityCount + "/attributeList/" + attributeCount + ")";
367
							Individual s1 = m.createIndividual(ont.getURI() + "#" + xpointer, fragmentSelectorClass);
368
							s1.addLiteral(rdfValue, xpointer);
369
							s1.addProperty(conformsToProperty, "http://www.w3.org/TR/xptr/");
370
							
371
							// statements about the annotation
372
							a1.addProperty(hasBodyProperty, m1);
373
							a1.addProperty(hasTargetProperty, t1);
374
							t1.addProperty(hasSourceProperty, meta1);
375
							t1.addProperty(hasSelectorProperty, s1);
376
							//a1.addProperty(annotatedByProperty, p1);
377
							
378
							// describe the measurement in terms of restrictions
379
							if (standard != null) {
380
								AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, usesStandard, standard);
381
								m1.addOntClass(avfr);
382
							}
383
							if (characteristic != null) {
384
								AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofCharacteristic, characteristic);
385
								m1.addOntClass(avfr);
386
							}
387
							
388
							// attach to the observation
389
							// TODO: evaluate whether the measurement can apply to the given observed entity
390
							o1.addProperty(hasMeasurement, m1);
391
						}
392
						attributeCount++;
393
						
394
					}
395
				}
396
				entityCount++;
397
			}
398
		}
399
		
400
		StringWriter sw = new StringWriter();
401
		// only write the base model
402
		//m.write(sw, "RDF/XML-ABBREV");
403
		m.write(sw, null);
404

    
405
		return sw.toString();
406
		
407
	}
408
	
409
	private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
410
		// what's our unit?
411
		String unit = attribute.getUnit().toLowerCase();
412
		
413
		/*
414
		boolean found = false;
415
		List<String> tokens = Arrays.asList(unit.split(" "));
416
		ExtendedIterator iter = standardClass.listSubClasses(false);
417
		while (iter.hasNext()) {
418
			OntClass subclass = (OntClass) iter.next();
419
			String subclassName = subclass.getLocalName().toLowerCase();
420
			logMetacat.debug("subclass: " + subclassName);
421
			if (tokens.contains(subclassName)) {
422
				found = true;
423
			}
424
			if (subclass.hasLabel(unit, null)) {
425
				found = true;
426
			}
427
			if (found) {
428
				return subclass;
429
			}
430
		}
431
		*/
432
		
433
		// try to look it up if we got this far
434
		return BioPortalService.lookupAnnotationClass(standardClass, unit, OBOE_SBC);
435
	}
436
	
437
	private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
438
		// what are we looking for?
439
		String label = attribute.getLabel().toLowerCase();
440
		String definition = attribute.getDefinition();
441
		String text = label + " " + definition;
442
		
443
		/*
444
		// find something that matches		
445
		boolean found = false;
446
		List<String> tokens = Arrays.asList(label.split(" "));
447
		ExtendedIterator iter = characteristicClass.listSubClasses();
448
		while (iter.hasNext()) {
449
			OntClass subclass = (OntClass) iter.next();
450
			String subclassName = subclass.getLocalName().toLowerCase();
451
			logMetacat.debug("subclass: " + subclassName);
452
			if (tokens.contains(subclassName)) {
453
				found = true;
454
			}
455
			if (subclass.hasLabel(label, null)) {
456
				found = true;
457
			}
458
			if (found) {
459
				return subclass;
460
			}
461
		}
462
		*/
463
		
464
		// try to look it up from the service
465
		return BioPortalService.lookupAnnotationClass(characteristicClass, text, OBOE_SBC);
466
		
467
	}
468
	
469
	private Resource lookupEntity(OntClass entityClass, Entity entity) {
470
		// what's our description like?
471
		String name = entity.getName();
472
		String definition = entity.getDefinition();
473
		
474
		// try to look it up if we got this far
475
		return BioPortalService.lookupAnnotationClass(entityClass, definition, OBOE_SBC);
476
		
477
	}
478
	
479
	private DataPackage getDataPackage(Identifier pid) throws Exception {
480
		// for using the MN API as the MN itself
481
		MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
482
		Session session = new Session();
483
        Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
484
        session.setSubject(subject);
485
		InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
486

    
487
		// parse the metadata
488
		DataPackageParserInterface parser = new Eml200DataPackageParser();
489
		parser.parse(emlStream);
490
		DataPackage dataPackage = parser.getDataPackage();
491
		return dataPackage;
492
	}
493

    
494
	private void summarize(List<Identifier> identifiers) throws SQLException {
495
		
496
		DBConnection dbconn = null;
497

    
498
		try {
499
			dbconn = DBConnectionPool.getDBConnection("DatapackageSummarizer.summarize");
500
			
501
			PreparedStatement dropStatement = dbconn.prepareStatement("DROP TABLE IF EXISTS entity_summary");
502
			dropStatement.execute();
503
	
504
			PreparedStatement createStatement = dbconn.prepareStatement(
505
					"CREATE TABLE entity_summary (" +
506
					"guid text, " +
507
					"title text, " +
508
					"entity text," +
509
					"attributeName text," +
510
					"attributeLabel text," +
511
					"attributeDefinition text," +
512
					"attributeType text," +
513
					"attributeScale text," +
514
					"attributeUnitType text," +
515
					"attributeUnit text," +
516
					"attributeDomain text" +
517
					")");
518
			createStatement.execute();
519
			
520
			PreparedStatement insertStatement = dbconn.prepareStatement(
521
					"INSERT INTO entity_summary " +
522
					"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
523
			
524
			for (Identifier pid: identifiers) {
525
			
526
				logMetacat.debug("Parsing pid: " + pid.getValue());
527
				
528
				try {
529
					
530
					// get the package
531
					DataPackage dataPackage = this.getDataPackage(pid);
532
					String title = dataPackage.getTitle();
533
					logMetacat.debug("Title: " + title);
534
					
535
					Entity[] entities = dataPackage.getEntityList();
536
					if (entities != null) {
537
						for (Entity entity: entities) {
538
							String entityName = entity.getName();
539
							logMetacat.debug("Entity name: " + entityName);
540
							Attribute[] attributes = entity.getAttributeList().getAttributes();
541
							for (Attribute attribute: attributes) {
542
								String attributeName = attribute.getName();
543
								String attributeLabel = attribute.getLabel();
544
								String attributeDefinition = attribute.getDefinition();
545
								String attributeType = attribute.getAttributeType();
546
								String attributeScale = attribute.getMeasurementScale();
547
								String attributeUnitType = attribute.getUnitType();
548
								String attributeUnit = attribute.getUnit();
549
								String attributeDomain = attribute.getDomain().getClass().getSimpleName();
550
	
551
								logMetacat.debug("Attribute name: " + attributeName);
552
								logMetacat.debug("Attribute label: " + attributeLabel);
553
								logMetacat.debug("Attribute definition: " + attributeDefinition);
554
								logMetacat.debug("Attribute type: " + attributeType);
555
								logMetacat.debug("Attribute scale: " + attributeScale);
556
								logMetacat.debug("Attribute unit type: " + attributeUnitType);
557
								logMetacat.debug("Attribute unit: " + attributeUnit);
558
								logMetacat.debug("Attribute domain: " + attributeDomain);
559
								
560
								// set the values for this attribute
561
								insertStatement.setString(1, pid.getValue());
562
								insertStatement.setString(2, title);
563
								insertStatement.setString(3, entityName);
564
								insertStatement.setString(4, attributeName);
565
								insertStatement.setString(5, attributeLabel);
566
								insertStatement.setString(6, attributeDefinition);
567
								insertStatement.setString(7, attributeType);
568
								insertStatement.setString(8, attributeScale);
569
								insertStatement.setString(9, attributeUnitType);
570
								insertStatement.setString(10, attributeUnit);
571
								insertStatement.setString(11, attributeDomain);
572
								insertStatement.execute();
573
								
574
							}		
575
						}
576
					}
577
					
578
				} catch (Exception e) {
579
					logMetacat.warn("error parsing metadata for: " + pid.getValue(), e);
580
				}
581
			}
582
		} catch (SQLException sqle) {
583
			// just throw it
584
			throw sqle;
585
		} finally {
586
			if (dbconn != null) {
587
				DBConnectionPool.returnDBConnection(dbconn, 0);
588
				dbconn.close();
589
			}
590
		}
591
	}
592
	
593
	public static void main(String[] args) throws Exception {
594
		// set up the properties based on the test/deployed configuration of the workspace
595
			SortedProperties testProperties = new SortedProperties("test/test.properties");
596
			testProperties.load();
597
			String metacatContextDir = testProperties.getProperty("metacat.contextDir");
598
			PropertyService.getInstance(metacatContextDir + "/WEB-INF");
599
			
600
			testGenerate();
601
//			testSummary();
602
			System.exit(0);
603
	}
604
	
605
	public static void testGenerate() throws Exception {
606
		Identifier metadataPid = new Identifier();
607
		metadataPid.setValue("tao.1.4");
608
		DatapackageSummarizer ds = new DatapackageSummarizer();
609
		String rdfString = ds.generateAnnotation(metadataPid);
610
		logMetacat.info("RDF annotation: \n" + rdfString);
611
		
612
	}
613
	
614
	public static void testSummary() throws Exception {
615
		
616
		// summarize the packages
617
		DatapackageSummarizer ds = new DatapackageSummarizer();
618
		List<Identifier> identifiers = new ArrayList<Identifier>();
619
		Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
620

    
621
		// select the metadata ids we want to summarize
622
		boolean includeReplicas = true;
623
		Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
624
		if (includeReplicas ) {
625
			codeIter = serverCodes.keySet().iterator();
626
		}
627
		
628
		Vector<String> idList = new Vector<String>();
629
		while (codeIter.hasNext()) {
630
			int serverLocation = codeIter.next();
631
			Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
632
			Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
633
			Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
634
			Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
635
			
636
			idList.addAll(idList0);
637
			idList.addAll(idList1);
638
			idList.addAll(idList2);
639
			idList.addAll(idList3);
640
		
641
		}
642
		
643
		// go through all the identifiers now
644
		for (String localId : idList) {
645
			try {
646
				String guid = IdentifierManager.getInstance().getGUID(
647
						DocumentUtil.getDocIdFromAccessionNumber(localId), 
648
						DocumentUtil.getRevisionFromAccessionNumber(localId));
649
				Identifier pid = new Identifier();
650
				pid.setValue(guid);
651
				identifiers.add(pid);
652
			} catch (McdbDocNotFoundException nfe) {
653
				// just skip it
654
				continue;
655
			}
656
		}
657
		ds.summarize(identifiers);
658
		System.exit(0);
659
	}
660
	
661
}
(2-2/3)