Revision 8190
Added by ben leinfelder about 11 years ago
src/edu/ucsb/nceas/metacat/dataone/MNodeService.java | ||
---|---|---|
30 | 30 |
import java.io.FileOutputStream; |
31 | 31 |
import java.io.IOException; |
32 | 32 |
import java.io.InputStream; |
33 |
import java.io.InputStreamReader; |
|
33 | 34 |
import java.io.OutputStreamWriter; |
34 | 35 |
import java.io.UnsupportedEncodingException; |
35 | 36 |
import java.io.Writer; |
... | ... | |
54 | 55 |
import org.apache.commons.beanutils.BeanUtils; |
55 | 56 |
import org.apache.commons.io.IOUtils; |
56 | 57 |
import org.apache.log4j.Logger; |
58 |
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest; |
|
57 | 59 |
import org.dataone.client.CNode; |
58 | 60 |
import org.dataone.client.D1Client; |
59 | 61 |
import org.dataone.client.MNode; |
... | ... | |
137 | 139 |
import edu.ucsb.nceas.metacat.util.DocumentUtil; |
138 | 140 |
import edu.ucsb.nceas.metacat.util.SystemUtil; |
139 | 141 |
import edu.ucsb.nceas.utilities.PropertyNotFoundException; |
142 |
import edu.ucsb.nceas.utilities.XMLUtilities; |
|
140 | 143 |
import gov.loc.repository.bagit.Bag; |
141 | 144 |
import gov.loc.repository.bagit.BagFactory; |
142 | 145 |
import gov.loc.repository.bagit.writer.impl.ZipWriter; |
... | ... | |
1605 | 1608 |
this.update(session, originalIdentifier, inputStream, newIdentifier, sysmeta); |
1606 | 1609 |
|
1607 | 1610 |
// update ORE that references the scimeta |
1608 |
// TODO: better ORE location algorithm -- this is just convention for generated resource maps and is fragile
|
|
1611 |
// first try the naive method, then check the SOLR index
|
|
1609 | 1612 |
try { |
1610 | 1613 |
String localId = IdentifierManager.getInstance().getLocalId(originalIdentifier.getValue()); |
1611 | 1614 |
|
... | ... | |
1618 | 1621 |
} catch (NotFound nf) { |
1619 | 1622 |
// this is probably okay for many sci meta data docs |
1620 | 1623 |
logMetacat.warn("No potential ORE map found for: " + potentialOreIdentifier.getValue()); |
1624 |
// try the SOLR index |
|
1625 |
List<Identifier> potentialOreIdentifiers = this.lookupOreFor(originalIdentifier); |
|
1626 |
if (potentialOreIdentifiers != null) { |
|
1627 |
potentialOreIdentifier = potentialOreIdentifiers.get(0); |
|
1628 |
try { |
|
1629 |
oreInputStream = this.get(session, potentialOreIdentifier); |
|
1630 |
} catch (NotFound nf2) { |
|
1631 |
// this is probably okay for many sci meta data docs |
|
1632 |
logMetacat.warn("No potential ORE map found for: " + potentialOreIdentifier.getValue()); |
|
1633 |
} |
|
1634 |
} |
|
1621 | 1635 |
} |
1622 | 1636 |
if (oreInputStream != null) { |
1623 | 1637 |
Identifier newOreIdentifier = MNodeService.getInstance(request).generateIdentifier(session, MNodeService.UUID_SCHEME, null); |
... | ... | |
1703 | 1717 |
} |
1704 | 1718 |
|
1705 | 1719 |
/** |
1720 |
* Determines if we already have registered an ORE map for this package |
|
1721 |
* NOTE: uses a solr query to locate OREs for the object |
|
1722 |
* @param guid of the EML/packaging object |
|
1723 |
* @return list of resource map identifiers for the given pid |
|
1724 |
*/ |
|
1725 |
public List<Identifier> lookupOreFor(Identifier guid) { |
|
1726 |
// Search for the ORE if we can find it |
|
1727 |
String pid = guid.getValue(); |
|
1728 |
List<Identifier> retList = null; |
|
1729 |
try { |
|
1730 |
MockHttpServletRequest request = new MockHttpServletRequest(null, null, null); |
|
1731 |
String query = "fl=id,resourceMap&wt=xml&q=formatType:METADATA+-obsoletedBy:*+resourceMap:*+id:\"" + pid + "\""; |
|
1732 |
InputStream results = this.query("solr", query); |
|
1733 |
org.w3c.dom.Node rootNode = XMLUtilities.getXMLReaderAsDOMTreeRootNode(new InputStreamReader(results, "UTF-8")); |
|
1734 |
//String resultString = XMLUtilities.getDOMTreeAsString(rootNode); |
|
1735 |
org.w3c.dom.NodeList nodeList = XMLUtilities.getNodeListWithXPath(rootNode, "//arr[@name=\"resourceMap\"]/str"); |
|
1736 |
if (nodeList != null && nodeList.getLength() > 0) { |
|
1737 |
retList = new ArrayList<Identifier>(); |
|
1738 |
for (int i = 0; i < nodeList.getLength(); i++) { |
|
1739 |
String found = nodeList.item(i).getFirstChild().getNodeValue(); |
|
1740 |
Identifier oreId = new Identifier(); |
|
1741 |
oreId.setValue(found); |
|
1742 |
retList.add(oreId); |
|
1743 |
} |
|
1744 |
} |
|
1745 |
} catch (Exception e) { |
|
1746 |
logMetacat.error("Error checking for resourceMap[s] on pid " + pid + ". " + e.getMessage(), e); |
|
1747 |
} |
|
1748 |
|
|
1749 |
return retList; |
|
1750 |
} |
|
1751 |
|
|
1752 |
/** |
|
1706 | 1753 |
* Packages the given package in a Bagit collection for download |
1707 | 1754 |
* @param pid |
1708 | 1755 |
* @throws NotImplemented |
src/edu/ucsb/nceas/metacat/dataone/SystemMetadataFactory.java | ||
---|---|---|
28 | 28 |
import java.io.File; |
29 | 29 |
import java.io.IOException; |
30 | 30 |
import java.io.InputStream; |
31 |
import java.io.InputStreamReader; |
|
32 | 31 |
import java.math.BigInteger; |
33 | 32 |
import java.net.URL; |
34 | 33 |
import java.net.URLConnection; |
... | ... | |
71 | 70 |
import org.dataone.service.util.DateTimeMarshaller; |
72 | 71 |
import org.dspace.foresite.ResourceMap; |
73 | 72 |
import org.jibx.runtime.JiBXException; |
74 |
import org.w3c.dom.Node; |
|
75 |
import org.w3c.dom.NodeList; |
|
76 | 73 |
import org.xml.sax.SAXException; |
77 | 74 |
|
78 | 75 |
import java.util.Calendar; |
... | ... | |
96 | 93 |
import edu.ucsb.nceas.metacat.util.DocumentUtil; |
97 | 94 |
import edu.ucsb.nceas.utilities.ParseLSIDException; |
98 | 95 |
import edu.ucsb.nceas.utilities.PropertyNotFoundException; |
99 |
import edu.ucsb.nceas.utilities.XMLUtilities; |
|
100 | 96 |
|
101 | 97 |
public class SystemMetadataFactory { |
102 | 98 |
|
... | ... | |
610 | 606 |
return sysMeta; |
611 | 607 |
} |
612 | 608 |
|
613 |
/** |
|
609 |
/** |
|
610 |
* Checks for potential ORE object existence |
|
611 |
* @param identifier |
|
612 |
* @return |
|
613 |
*/ |
|
614 |
public static boolean oreExistsFor(Identifier identifier) { |
|
615 |
MockHttpServletRequest request = new MockHttpServletRequest(null, null, null); |
|
616 |
List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier); |
|
617 |
return (ids != null && ids.size() > 0); |
|
618 |
} |
|
619 |
|
|
620 |
/** |
|
614 | 621 |
* Generate SystemMetadata for any object in the object store that does |
615 | 622 |
* not already have it. SystemMetadata documents themselves, are, of course, |
616 | 623 |
* exempt. This is a utility method for migration of existing object |
... | ... | |
661 | 668 |
} |
662 | 669 |
logMetacat.info("done generating system metadata for given list"); |
663 | 670 |
} |
664 |
|
|
665 |
/** |
|
666 |
* Determines if we already have registered an ORE map for this package |
|
667 |
* NOTE: uses a solr query to locate OREs for the object |
|
668 |
* @param guid of the EML/packaging object |
|
669 |
* @return true if there is an ORE map for the given package |
|
670 |
*/ |
|
671 |
public static boolean oreExistsFor(Identifier guid) { |
|
672 |
// Search for the ORE if we can find it |
|
673 |
String pid = guid.getValue(); |
|
674 |
MockHttpServletRequest request = new MockHttpServletRequest(null, null, null); |
|
675 |
String query = "fl=id,resourceMap&wt=xml&q=formatType:METADATA+-obsoletedBy:*+resourceMap:*+id:\"" + pid + "\""; |
|
676 |
try { |
|
677 |
InputStream results = MNodeService.getInstance(request).query("solr", query); |
|
678 |
Node rootNode = XMLUtilities.getXMLReaderAsDOMTreeRootNode(new InputStreamReader(results, "UTF-8")); |
|
679 |
//String resultString = XMLUtilities.getDOMTreeAsString(rootNode); |
|
680 |
NodeList nodeList = XMLUtilities.getNodeListWithXPath(rootNode, "//arr[@name=\"resourceMap\"]/str"); |
|
681 |
if (nodeList != null && nodeList.getLength() > 0) { |
|
682 |
//String found = nodeList.item(0).getFirstChild().getNodeValue(); |
|
683 |
return true; |
|
684 |
} |
|
685 |
} catch (Exception e) { |
|
686 |
logMetacat.error("Error checking for resourceMap[s] on pid " + pid + ". " + e.getMessage(), e); |
|
687 |
} |
|
688 |
|
|
689 |
return false; |
|
690 |
} |
|
691 | 671 |
|
692 | 672 |
/** |
693 | 673 |
* Find the size (in bytes) of a stream. Note: This needs to refactored out |
Also available in: Unified diff
better checking for ORE maps when publishing DOIs (need to update the packages that contain sci meta). https://projects.ecoinformatics.org/ecoinfo/issues/6061