Project

General

Profile

Revision 8190

better checking for ORE maps when publishing DOIs (need to update the packages that contain sci meta). https://projects.ecoinformatics.org/ecoinfo/issues/6061

View differences:

src/edu/ucsb/nceas/metacat/dataone/MNodeService.java
30 30
import java.io.FileOutputStream;
31 31
import java.io.IOException;
32 32
import java.io.InputStream;
33
import java.io.InputStreamReader;
33 34
import java.io.OutputStreamWriter;
34 35
import java.io.UnsupportedEncodingException;
35 36
import java.io.Writer;
......
54 55
import org.apache.commons.beanutils.BeanUtils;
55 56
import org.apache.commons.io.IOUtils;
56 57
import org.apache.log4j.Logger;
58
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
57 59
import org.dataone.client.CNode;
58 60
import org.dataone.client.D1Client;
59 61
import org.dataone.client.MNode;
......
137 139
import edu.ucsb.nceas.metacat.util.DocumentUtil;
138 140
import edu.ucsb.nceas.metacat.util.SystemUtil;
139 141
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
142
import edu.ucsb.nceas.utilities.XMLUtilities;
140 143
import gov.loc.repository.bagit.Bag;
141 144
import gov.loc.repository.bagit.BagFactory;
142 145
import gov.loc.repository.bagit.writer.impl.ZipWriter;
......
1605 1608
		this.update(session, originalIdentifier, inputStream, newIdentifier, sysmeta);
1606 1609
		
1607 1610
		// update ORE that references the scimeta
1608
		// TODO: better ORE location algorithm -- this is just convention for generated resource maps and is fragile
1611
		// first try the naive method, then check the SOLR index
1609 1612
		try {
1610 1613
			String localId = IdentifierManager.getInstance().getLocalId(originalIdentifier.getValue());
1611 1614
			
......
1618 1621
			} catch (NotFound nf) {
1619 1622
				// this is probably okay for many sci meta data docs
1620 1623
				logMetacat.warn("No potential ORE map found for: " + potentialOreIdentifier.getValue());
1624
				// try the SOLR index
1625
				List<Identifier> potentialOreIdentifiers = this.lookupOreFor(originalIdentifier);
1626
				if (potentialOreIdentifiers != null) {
1627
					potentialOreIdentifier = potentialOreIdentifiers.get(0);
1628
					try {
1629
						oreInputStream = this.get(session, potentialOreIdentifier);
1630
					} catch (NotFound nf2) {
1631
						// this is probably okay for many sci meta data docs
1632
						logMetacat.warn("No potential ORE map found for: " + potentialOreIdentifier.getValue());
1633
					}
1634
				}
1621 1635
			}
1622 1636
			if (oreInputStream != null) {
1623 1637
				Identifier newOreIdentifier = MNodeService.getInstance(request).generateIdentifier(session, MNodeService.UUID_SCHEME, null);
......
1703 1717
	}
1704 1718
	
1705 1719
	/**
1720
	 * Determines if we already have registered an ORE map for this package
1721
	 * NOTE: uses a solr query to locate OREs for the object
1722
	 * @param guid of the EML/packaging object
1723
	 * @return list of resource map identifiers for the given pid
1724
	 */
1725
	public List<Identifier> lookupOreFor(Identifier guid) {
1726
		// Search for the ORE if we can find it
1727
		String pid = guid.getValue();
1728
		List<Identifier> retList = null;
1729
		try {
1730
			MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
1731
			String query = "fl=id,resourceMap&wt=xml&q=formatType:METADATA+-obsoletedBy:*+resourceMap:*+id:\"" + pid + "\"";
1732
			InputStream results = this.query("solr", query);
1733
			org.w3c.dom.Node rootNode = XMLUtilities.getXMLReaderAsDOMTreeRootNode(new InputStreamReader(results, "UTF-8"));
1734
			//String resultString = XMLUtilities.getDOMTreeAsString(rootNode);
1735
			org.w3c.dom.NodeList nodeList = XMLUtilities.getNodeListWithXPath(rootNode, "//arr[@name=\"resourceMap\"]/str");
1736
			if (nodeList != null && nodeList.getLength() > 0) {
1737
				retList = new ArrayList<Identifier>();
1738
				for (int i = 0; i < nodeList.getLength(); i++) {
1739
					String found = nodeList.item(i).getFirstChild().getNodeValue();
1740
					Identifier oreId = new Identifier();
1741
					oreId.setValue(found);
1742
					retList.add(oreId);
1743
				}
1744
			}
1745
		} catch (Exception e) {
1746
			logMetacat.error("Error checking for resourceMap[s] on pid " + pid + ". " + e.getMessage(), e);
1747
		}
1748
		
1749
		return retList;
1750
	}
1751
	
1752
	/**
1706 1753
	 * Packages the given package in a Bagit collection for download
1707 1754
	 * @param pid
1708 1755
	 * @throws NotImplemented 
src/edu/ucsb/nceas/metacat/dataone/SystemMetadataFactory.java
28 28
import java.io.File;
29 29
import java.io.IOException;
30 30
import java.io.InputStream;
31
import java.io.InputStreamReader;
32 31
import java.math.BigInteger;
33 32
import java.net.URL;
34 33
import java.net.URLConnection;
......
71 70
import org.dataone.service.util.DateTimeMarshaller;
72 71
import org.dspace.foresite.ResourceMap;
73 72
import org.jibx.runtime.JiBXException;
74
import org.w3c.dom.Node;
75
import org.w3c.dom.NodeList;
76 73
import org.xml.sax.SAXException;
77 74

  
78 75
import java.util.Calendar;
......
96 93
import edu.ucsb.nceas.metacat.util.DocumentUtil;
97 94
import edu.ucsb.nceas.utilities.ParseLSIDException;
98 95
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
99
import edu.ucsb.nceas.utilities.XMLUtilities;
100 96

  
101 97
public class SystemMetadataFactory {
102 98

  
......
610 606
		return sysMeta;
611 607
	}
612 608

  
613
    /**
609
	/**
610
	 * Checks for potential ORE object existence 
611
	 * @param identifier
612
	 * @return
613
	 */
614
    public static boolean oreExistsFor(Identifier identifier) {
615
    	MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
616
		List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier);
617
		return (ids != null && ids.size() > 0);
618
	}
619

  
620
	/**
614 621
     * Generate SystemMetadata for any object in the object store that does
615 622
     * not already have it.  SystemMetadata documents themselves, are, of course,
616 623
     * exempt.  This is a utility method for migration of existing object 
......
661 668
        }
662 669
        logMetacat.info("done generating system metadata for given list");
663 670
    }
664
    
665
	/**
666
	 * Determines if we already have registered an ORE map for this package
667
	 * NOTE: uses a solr query to locate OREs for the object
668
	 * @param guid of the EML/packaging object
669
	 * @return true if there is an ORE map for the given package
670
	 */
671
	public static boolean oreExistsFor(Identifier guid) {
672
		// Search for the ORE if we can find it
673
		String pid = guid.getValue();
674
		MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
675
		String query = "fl=id,resourceMap&wt=xml&q=formatType:METADATA+-obsoletedBy:*+resourceMap:*+id:\"" + pid + "\"";
676
		try {
677
			InputStream results = MNodeService.getInstance(request).query("solr", query);
678
			Node rootNode = XMLUtilities.getXMLReaderAsDOMTreeRootNode(new InputStreamReader(results, "UTF-8"));
679
			//String resultString = XMLUtilities.getDOMTreeAsString(rootNode);
680
			NodeList nodeList = XMLUtilities.getNodeListWithXPath(rootNode, "//arr[@name=\"resourceMap\"]/str");
681
			if (nodeList != null && nodeList.getLength() > 0) {
682
				//String found = nodeList.item(0).getFirstChild().getNodeValue();
683
				return true;
684
			}
685
		} catch (Exception e) {
686
			logMetacat.error("Error checking for resourceMap[s] on pid " + pid + ". " + e.getMessage(), e);
687
		}
688
		
689
		return false;
690
	}
691 671

  
692 672
	/**
693 673
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out

Also available in: Unified diff