/ - Diff - Metacat - Ecoinformatics Redmine

     package edu.ucsb.nceas.metacat.index.resourcemap;
     import java.io.IOException;
     import java.io.InputStream;
     import java.net.MalformedURLException;
     import java.util.ArrayList;
     import java.util.Date;
-...
     import org.apache.solr.common.params.SolrParams;
     import org.apache.solr.schema.IndexSchema;
     import org.apache.solr.servlet.SolrRequestParsers;
     import org.dataone.cn.indexer.XPathDocumentParser;
     import org.dataone.cn.indexer.convert.SolrDateConverter;
     import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor;
     import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
-...
         @Override
         public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs,
         Document doc) throws IOException, EncoderException, SAXException,
         InputStream is) throws IOException, EncoderException, SAXException,
         XPathExpressionException, ParserConfigurationException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException {
             SolrDoc resourceMapDoc = docs.get(identifier);
             List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc);
             Document doc = XPathDocumentParser.generateXmlDocument(is);
     		List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc );
             Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
             for (SolrDoc processedDoc : processedDocs) {
                 processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);

     import org.apache.solr.schema.IndexSchema;
     import org.dataone.cn.indexer.XMLNamespaceConfig;
     import org.dataone.cn.indexer.convert.SolrDateConverter;
     import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor;
     import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
     import org.dataone.cn.indexer.parser.SolrField;
     import org.dataone.cn.indexer.resourcemap.ResourceEntry;
     import org.dataone.cn.indexer.resourcemap.ResourceMap;
     import org.dataone.cn.indexer.resourcemap.ResourceMapFactory;
     import org.dataone.cn.indexer.solrhttp.SolrDoc;
     import org.dataone.cn.indexer.solrhttp.SolrElementField;
     import org.dataone.service.exceptions.NotFound;
-...
          */
         public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
             for (IDocumentSubprocessor subprocessor : subprocessorList) {
                 subprocessor.initExpression(xpath);
             	if (subprocessor instanceof AbstractDocumentSubprocessor) {
             		((AbstractDocumentSubprocessor)subprocessor).initExpression(xpath);
+            	}
+            }
             this.subprocessors = subprocessorList;
+        }
-...
             SolrDoc indexDocument = new SolrDoc(sysSolrFields);
             Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
             docs.put(id, indexDocument);
             // get the format id for this object
             String formatId = indexDocument.getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT);
             // Determine if subprocessors are available for this ID
             if (subprocessors != null) {
                         // for each subprocessor loaded from the spring config
                         for (IDocumentSubprocessor subprocessor : subprocessors) {
                             // Does this subprocessor apply?
                             if (subprocessor.canProcess(sysMetaDoc)) {
                                 // if so, then extract the additional information from the
                                 // document.
                                 try {
                                     // docObject = the resource map document or science
                                     // metadata document.
                                     // note that resource map processing touches all objects
                                     // referenced by the resource map.
                                 	InputStream dataStream = new FileInputStream(objectPath);
                                     Document docObject = generateXmlDocument(dataStream);
                                     if (docObject == null) {
                                         throw new Exception("Could not load OBJECT for ID " + id );
                                     } else {
                                         docs = subprocessor.processDocument(id, docs, docObject);
+                                    }
                                 } catch (Exception e) {
                                     log.error(e.getMessage(), e);
                                     throw new SolrServerException(e.getMessage());
+                                }
+                            }
+                        }
     	        // for each subprocessor loaded from the spring config
     	        for (IDocumentSubprocessor subprocessor : subprocessors) {
     	            // Does this subprocessor apply?
     	            if (subprocessor.canProcess(formatId)) {
     	                // if so, then extract the additional information from the
     	                // document.
     	                try {
     	                    // docObject = the resource map document or science
     	                    // metadata document.
     	                    // note that resource map processing touches all objects
     	                    // referenced by the resource map.
     	                	FileInputStream dataStream = new FileInputStream(objectPath);
     	                    if (!dataStream.getFD().valid()) {
     	                    	log.error("Could not load OBJECT file for ID,Path=" + id + ", "
                                         + objectPath);
     	                        //throw new Exception("Could not load OBJECT for ID " + id );
     	                    } else {
     	                        docs = subprocessor.processDocument(id, docs, dataStream);
+    	                    }
     	                } catch (Exception e) {
     	                    log.error(e.getMessage(), e);
     	                    throw new SolrServerException(e.getMessage());
+    	                }
+    	            }
+    	        }
+           }
            // TODO: in the XPathDocumentParser class in d1_cn_index_process module,

      */
     package edu.ucsb.nceas.metacat.index.annotation;
     import java.io.ByteArrayInputStream;
     import java.io.ByteArrayOutputStream;
     import java.io.IOException;
     import java.io.InputStream;
     import java.net.MalformedURLException;
-...
     import java.util.Set;
     import javax.xml.parsers.ParserConfigurationException;
     import javax.xml.transform.Result;
     import javax.xml.transform.Source;
     import javax.xml.transform.TransformerConfigurationException;
     import javax.xml.transform.TransformerException;
     import javax.xml.transform.TransformerFactory;
     import javax.xml.transform.TransformerFactoryConfigurationError;
     import javax.xml.transform.dom.DOMSource;
     import javax.xml.transform.stream.StreamResult;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
-...
     import org.dataone.service.types.v1.util.AccessUtil;
     import org.dataone.service.types.v1.util.AuthUtils;
     import org.dataone.service.util.DateTimeMarshaller;
     import org.w3c.dom.Document;
     import org.xml.sax.SAXException;
     import com.hp.hpl.jena.ontology.OntModel;
-...
+        }
         @Override
         public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs, Document doc) throws Exception {
         public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs, InputStream is) throws Exception {
             SolrDoc resourceMapDoc = docs.get(identifier);
             List<SolrDoc> processedDocs = process(resourceMapDoc, doc);
             List<SolrDoc> processedDocs = process(resourceMapDoc, is);
             Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
             for (SolrDoc processedDoc : processedDocs) {
                 processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);
-...
             Map<String, SolrDoc> mergedDocuments = mergeDocs(docs, processedDocsMap);
             return mergedDocuments;
+        }
         private InputStream toInputStream(Document doc) throws TransformerConfigurationException, TransformerException, TransformerFactoryConfigurationError {
         	ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
         	Source xmlSource = new DOMSource(doc);
         	Result outputTarget = new StreamResult(outputStream);
         	TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
         	InputStream is = new ByteArrayInputStream(outputStream.toByteArray());
         	return is;
+        }
         private List<SolrDoc> process(SolrDoc indexDocument, Document rdfXmlDocument) throws Exception {
         private List<SolrDoc> process(SolrDoc indexDocument, InputStream is) throws Exception {
         	// get the triplestore dataset
     		Dataset dataset = TripleStoreService.getInstance().getDataset();
         	// read the annotation
     		InputStream source = toInputStream(rdfXmlDocument);
         	String indexDocId = indexDocument.getIdentifier();
         	String name = indexDocId;
-...
         	boolean loaded = dataset.containsNamedModel(name);
     		if (!loaded) {
     			OntModel ontModel = ModelFactory.createOntologyModel();
     			ontModel.read(source, name);
     			ontModel.read(is, name);
     			dataset.addNamedModel(name, ontModel);
+    		}
     		//dataset.getDefaultModel().add(ontModel);

      <bean id="eml200Subprocessor"
       class="org.dataone.cn.indexer.parser.ScienceMetadataDocumentSubprocessor">
       <property name="matchDocument"
        value="/d200:systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.0']"></property>
       	<property name="matchDocuments">
     		<list>
     			<value>eml://ecoinformatics.org/eml-2.0.0</value>
     		</list>
     	</property>
       <property name="fieldList">
        <list>
         <ref bean="eml.abstract" />

     	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
     	<bean id="rdfXmlSubprocessor" class="edu.ucsb.nceas.metacat.index.annotation.RdfXmlSubprocessor">
     		<property name="matchDocument"
     			value="/d200:systemMetadata/formatId[text()='http://www.w3.org/TR/rdf-syntax-grammar'] | /d200:systemMetadata/formatId[text()='http://www.openarchives.org/ore/terms']" />
     		<property name="matchDocuments">
     			<list>
     				<value>http://www.w3.org/TR/rdf-syntax-grammar</value>
     				<value>http://www.openarchives.org/ore/terms</value>
     			</list>
     		</property>
     		<property name="fieldList">
     			<list>
     				<ref bean="annotation.standard" />

      <bean id="eml201Subprocessor"
       class="org.dataone.cn.indexer.parser.ScienceMetadataDocumentSubprocessor">
       <property name="matchDocument"
        value="/d200:systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.1']"></property>
       	<property name="matchDocuments">
     		<list>
     			<value>eml://ecoinformatics.org/eml-2.0.1</value>
     		</list>
     	</property>
       <property name="fieldList">
        <list>
         <ref bean="eml.abstract" />

      <bean id="eml210Subprocessor"
       class="org.dataone.cn.indexer.parser.ScienceMetadataDocumentSubprocessor">
       <property name="matchDocument"
        value="/d200:systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.0']"></property>
       	<property name="matchDocuments">
     		<list>
     			<value>eml://ecoinformatics.org/eml-2.1.0</value>
     		</list>
     	</property>
       <property name="fieldList">
        <list>
         <ref bean="eml.abstract" />

      <bean id="eml211Subprocessor"
       class="org.dataone.cn.indexer.parser.ScienceMetadataDocumentSubprocessor">
       <property name="matchDocument"
        value="/d200:systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"></property>
       	<property name="matchDocuments">
     		<list>
     			<value>eml://ecoinformatics.org/eml-2.1.1</value>
     		</list>
     	</property>
       <property name="fieldList">
        <list>
         <ref bean="eml.abstract" />

     	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
     	<bean id="resourceMapSubprocessor" class="edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor">
     		<property name="matchDocument"
     			value="/d200:systemMetadata/formatId[text()='http://www.openarchives.org/ore/terms']" />
     		<property name="matchDocuments">
     			<list>
     				<value>http://www.openarchives.org/ore/terms</value>
     			</list>
     		</property>
     		<!-- <property name="httpService" ref="httpService"></property>
     		<property name="solrQueryUri" value="${solr.query.uri}"></property> -->
     	</bean>

     	<bean id="annotatorSubprocessor" class="org.dataone.cn.indexer.annotation.AnnotatorSubprocessor">
     		<!-- match any document type -->
     		<property name="matchDocument"
     			value="/d200:systemMetadata/formatId[text() != '']" />
     		<!-- match annotation documents -->
     		<property name="matchDocuments">
     			<list>
     				<value>http://docs.annotatorjs.org/en/v1.2.x/annotation-format.html</value>
     			</list>
     		</property>
     		<property name="fieldList">
     			<list>
     				<ref bean="annotation.expansion" />

Project

General

Profile

Metacat

Revision 9018

Added by ben leinfelder over 10 years ago