Revision 8437
Added by Lauren Walker about 11 years ago
src/edu/ucsb/nceas/metacat/dataone/MNodeService.java | ||
---|---|---|
40 | 40 |
import java.util.ArrayList; |
41 | 41 |
import java.util.Calendar; |
42 | 42 |
import java.util.Date; |
43 |
import java.util.HashMap; |
|
43 | 44 |
import java.util.HashSet; |
44 | 45 |
import java.util.Hashtable; |
45 | 46 |
import java.util.List; |
... | ... | |
54 | 55 |
import org.apache.commons.beanutils.BeanUtils; |
55 | 56 |
import org.apache.commons.io.IOUtils; |
56 | 57 |
import org.apache.log4j.Logger; |
58 |
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest; |
|
57 | 59 |
import org.dataone.client.CNode; |
58 | 60 |
import org.dataone.client.D1Client; |
59 | 61 |
import org.dataone.client.MNode; |
... | ... | |
116 | 118 |
import org.dspace.foresite.OREParserException; |
117 | 119 |
import org.dspace.foresite.ORESerialiserException; |
118 | 120 |
import org.dspace.foresite.ResourceMap; |
121 |
import org.ecoinformatics.datamanager.parser.DataPackage; |
|
122 |
import org.ecoinformatics.datamanager.parser.Entity; |
|
123 |
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface; |
|
124 |
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser; |
|
119 | 125 |
|
120 | 126 |
import edu.ucsb.nceas.ezid.EZIDException; |
121 | 127 |
import edu.ucsb.nceas.metacat.DBQuery; |
122 | 128 |
import edu.ucsb.nceas.metacat.DBTransform; |
129 |
import edu.ucsb.nceas.metacat.DocumentImpl; |
|
123 | 130 |
import edu.ucsb.nceas.metacat.EventLog; |
124 | 131 |
import edu.ucsb.nceas.metacat.IdentifierManager; |
125 | 132 |
import edu.ucsb.nceas.metacat.McdbDocNotFoundException; |
133 |
import edu.ucsb.nceas.metacat.McdbException; |
|
126 | 134 |
import edu.ucsb.nceas.metacat.MetaCatServlet; |
127 | 135 |
import edu.ucsb.nceas.metacat.MetacatHandler; |
128 |
|
|
129 | 136 |
import edu.ucsb.nceas.metacat.common.query.EnabledQueryEngines; |
130 | 137 |
import edu.ucsb.nceas.metacat.common.query.stream.ContentTypeByteArrayInputStream; |
131 | 138 |
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService; |
... | ... | |
1827 | 1834 |
|
1828 | 1835 |
// catch non-D1 service errors and throw as ServiceFailures |
1829 | 1836 |
try { |
1837 |
//Create a map of dataone ids and file names |
|
1838 |
Map<Identifier, String> fileNames = new HashMap<Identifier, String>(); |
|
1830 | 1839 |
|
1831 | 1840 |
// find the package contents |
1832 | 1841 |
SystemMetadata sysMeta = this.getSystemMetadata(session, pid); |
1833 | 1842 |
if (ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId()).getFormatType().equals("RESOURCE")) { |
1843 |
//Get the resource map as a map of Identifiers |
|
1834 | 1844 |
InputStream oreInputStream = this.get(session, pid); |
1835 | 1845 |
Map<Identifier, Map<Identifier, List<Identifier>>> resourceMapStructure = ResourceMapFactory.getInstance().parseResourceMap(oreInputStream); |
1836 | 1846 |
packagePids.addAll(resourceMapStructure.keySet()); |
1847 |
//Loop through each object in this resource map |
|
1837 | 1848 |
for (Map<Identifier, List<Identifier>> entries: resourceMapStructure.values()) { |
1849 |
//Loop through each metadata object in this entry |
|
1850 |
Set<Identifier> metadataIdentifiers = entries.keySet(); |
|
1851 |
for(Identifier metadataID: metadataIdentifiers){ |
|
1852 |
try{ |
|
1853 |
//Get the system metadata for this metadata object |
|
1854 |
SystemMetadata metadataSysMeta = this.getSystemMetadata(session, metadataID); |
|
1855 |
|
|
1856 |
//If this is in eml format, extract the filename and GUID from each entity in its package |
|
1857 |
if (metadataSysMeta.getFormatId().getValue().startsWith("eml://")) { |
|
1858 |
//Get the package |
|
1859 |
DataPackageParserInterface parser = new Eml200DataPackageParser(); |
|
1860 |
InputStream emlStream = this.get(session, metadataID); |
|
1861 |
parser.parse(emlStream); |
|
1862 |
DataPackage dataPackage = parser.getDataPackage(); |
|
1863 |
|
|
1864 |
//Get all the entities in this package and loop through each to extract its ID and file name |
|
1865 |
Entity[] entities = dataPackage.getEntityList(); |
|
1866 |
for(Entity entity: entities){ |
|
1867 |
try{ |
|
1868 |
//Get the file name from the metadata |
|
1869 |
String fileNameFromMetadata = entity.getName(); |
|
1870 |
|
|
1871 |
//Get the ecogrid URL from the metadata |
|
1872 |
String ecogridIdentifier = entity.getEntityIdentifier(); |
|
1873 |
//Parse the ecogrid URL to get the local id |
|
1874 |
String idFromMetadata = DocumentUtil.getAccessionNumberFromEcogridIdentifier(ecogridIdentifier); |
|
1875 |
|
|
1876 |
//Get the docid and rev pair |
|
1877 |
String docid = DocumentUtil.getDocIdFromString(idFromMetadata); |
|
1878 |
String rev = DocumentUtil.getRevisionStringFromString(idFromMetadata); |
|
1879 |
|
|
1880 |
//Get the GUID |
|
1881 |
String guid = IdentifierManager.getInstance().getGUID(docid, Integer.valueOf(rev)); |
|
1882 |
Identifier dataIdentifier = new Identifier(); |
|
1883 |
dataIdentifier.setValue(guid); |
|
1884 |
|
|
1885 |
//Add the GUID to our GUID & file name map |
|
1886 |
fileNames.put(dataIdentifier, fileNameFromMetadata); |
|
1887 |
} |
|
1888 |
catch(Exception e){ |
|
1889 |
//Prevent just one entity error |
|
1890 |
e.printStackTrace(); |
|
1891 |
logMetacat.debug(e.getMessage(), e); |
|
1892 |
} |
|
1893 |
} |
|
1894 |
} |
|
1895 |
} |
|
1896 |
catch(Exception e){ |
|
1897 |
//Catch errors that would prevent package download |
|
1898 |
logMetacat.debug(e.toString()); |
|
1899 |
} |
|
1900 |
} |
|
1838 | 1901 |
packagePids.addAll(entries.keySet()); |
1839 | 1902 |
for (List<Identifier> dataPids: entries.values()) { |
1840 | 1903 |
packagePids.addAll(dataPids); |
... | ... | |
1845 | 1908 |
packagePids.add(pid); |
1846 | 1909 |
} |
1847 | 1910 |
|
1848 |
//Create a temp directory in the default temp directory |
|
1849 |
String defaultTempDir = System.getProperty("java.io.tmpdir"); |
|
1850 |
File tempDir = new File(defaultTempDir + "/" + System.nanoTime()); |
|
1911 |
//Create a temp file, then delete it and make a directory with that name |
|
1912 |
File tempDir = File.createTempFile("temp", Long.toString(System.nanoTime())); |
|
1913 |
tempDir.delete(); |
|
1914 |
tempDir = new File(tempDir.getPath() + "_dir"); |
|
1915 |
tempDir.mkdir(); |
|
1851 | 1916 |
tempFiles.add(tempDir); |
1852 |
tempDir.mkdir(); |
|
1853 | 1917 |
|
1854 | 1918 |
// track the pid-to-file mapping |
1855 | 1919 |
StringBuffer pidMapping = new StringBuffer(); |
... | ... | |
1857 | 1921 |
// loop through the package contents |
1858 | 1922 |
for (Identifier entryPid: packagePids) { |
1859 | 1923 |
//Get the system metadata for each item |
1860 |
SystemMetadata entrySysMeta = this.getSystemMetadata(session, entryPid); |
|
1924 |
SystemMetadata entrySysMeta = this.getSystemMetadata(session, entryPid);
|
|
1861 | 1925 |
|
1862 |
//Create the temp file extension and prefix |
|
1863 |
String extension = ObjectFormatInfo.instance().getExtension(entrySysMeta.getFormatId().getValue()); |
|
1864 | 1926 |
String objectFormatType = ObjectFormatCache.getInstance().getFormat(entrySysMeta.getFormatId()).getFormatType(); |
1865 |
String fileName = entryPid.getValue().replaceAll("\\W+", "_") + "-" + objectFormatType;
|
|
1927 |
String fileName = null;
|
|
1866 | 1928 |
|
1929 |
//TODO: Be more specific of what characters to replace. Make sure periods arent replaced for the filename from metadata |
|
1930 |
//Our default file name is just the ID + format type (e.g. walker.1.1-DATA) |
|
1931 |
fileName = entryPid.getValue().replaceAll("[^a-zA-Z0-9\\-\\.]", "_") + "-" + objectFormatType; |
|
1932 |
|
|
1933 |
if(fileNames.containsKey(entryPid)){ |
|
1934 |
//Let's use the file name and extension from the metadata is we have it |
|
1935 |
fileName = entryPid.getValue().replaceAll("[^a-zA-Z0-9\\-\\.]", "_") + "-" + fileNames.get(entryPid).replaceAll("[^a-zA-Z0-9\\-\\.]", "_"); |
|
1936 |
} |
|
1937 |
else{ |
|
1938 |
//If we couldn't find a given file name, use the system metadata extension |
|
1939 |
String extension = ObjectFormatInfo.instance().getExtension(entrySysMeta.getFormatId().getValue()); |
|
1940 |
fileName += extension; |
|
1941 |
} |
|
1942 |
|
|
1867 | 1943 |
//Create a new file for this item and add to the list |
1868 |
File tempFile = new File(tempDir, fileName+extension);
|
|
1944 |
File tempFile = new File(tempDir, fileName); |
|
1869 | 1945 |
tempFiles.add(tempFile); |
1870 | 1946 |
|
1871 | 1947 |
InputStream entryInputStream = this.get(session, entryPid); |
... | ... | |
1883 | 1959 |
bag = bag.makeComplete(); |
1884 | 1960 |
|
1885 | 1961 |
///Now create the zip file |
1886 |
//Use the pid as the file name prefix, replacing illegal characters with a hyphen
|
|
1887 |
String zipName = pid.getValue().replaceAll("\\W+", "_");
|
|
1962 |
//Use the pid as the file name prefix, replacing all non-word characters
|
|
1963 |
String zipName = pid.getValue().replaceAll("\\W", "_"); |
|
1888 | 1964 |
|
1889 | 1965 |
File bagFile = new File(tempDir, zipName+".zip"); |
1890 | 1966 |
|
Also available in: Unified diff
retrieve the given file names (entityName) of the data files from the EML and use those for our file names in the BagIt content.