Project

General

Profile

Revision 8437

retrieve the given file names (entityName) of the data files from the EML and use those for our file names in the BagIt content.

View differences:

src/edu/ucsb/nceas/metacat/dataone/MNodeService.java
40 40
import java.util.ArrayList;
41 41
import java.util.Calendar;
42 42
import java.util.Date;
43
import java.util.HashMap;
43 44
import java.util.HashSet;
44 45
import java.util.Hashtable;
45 46
import java.util.List;
......
54 55
import org.apache.commons.beanutils.BeanUtils;
55 56
import org.apache.commons.io.IOUtils;
56 57
import org.apache.log4j.Logger;
58
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
57 59
import org.dataone.client.CNode;
58 60
import org.dataone.client.D1Client;
59 61
import org.dataone.client.MNode;
......
116 118
import org.dspace.foresite.OREParserException;
117 119
import org.dspace.foresite.ORESerialiserException;
118 120
import org.dspace.foresite.ResourceMap;
121
import org.ecoinformatics.datamanager.parser.DataPackage;
122
import org.ecoinformatics.datamanager.parser.Entity;
123
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
124
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
119 125

  
120 126
import edu.ucsb.nceas.ezid.EZIDException;
121 127
import edu.ucsb.nceas.metacat.DBQuery;
122 128
import edu.ucsb.nceas.metacat.DBTransform;
129
import edu.ucsb.nceas.metacat.DocumentImpl;
123 130
import edu.ucsb.nceas.metacat.EventLog;
124 131
import edu.ucsb.nceas.metacat.IdentifierManager;
125 132
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
133
import edu.ucsb.nceas.metacat.McdbException;
126 134
import edu.ucsb.nceas.metacat.MetaCatServlet;
127 135
import edu.ucsb.nceas.metacat.MetacatHandler;
128

  
129 136
import edu.ucsb.nceas.metacat.common.query.EnabledQueryEngines;
130 137
import edu.ucsb.nceas.metacat.common.query.stream.ContentTypeByteArrayInputStream;
131 138
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
......
1827 1834
		
1828 1835
		// catch non-D1 service errors and throw as ServiceFailures
1829 1836
		try {
1837
			//Create a map of dataone ids and file names
1838
			Map<Identifier, String> fileNames = new HashMap<Identifier, String>();
1830 1839
			
1831 1840
			// find the package contents
1832 1841
			SystemMetadata sysMeta = this.getSystemMetadata(session, pid);
1833 1842
			if (ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId()).getFormatType().equals("RESOURCE")) {
1843
				//Get the resource map as a map of Identifiers
1834 1844
				InputStream oreInputStream = this.get(session, pid);
1835 1845
				Map<Identifier, Map<Identifier, List<Identifier>>> resourceMapStructure = ResourceMapFactory.getInstance().parseResourceMap(oreInputStream);
1836 1846
				packagePids.addAll(resourceMapStructure.keySet());
1847
				//Loop through each object in this resource map
1837 1848
				for (Map<Identifier, List<Identifier>> entries: resourceMapStructure.values()) {
1849
					//Loop through each metadata object in this entry
1850
					Set<Identifier> metadataIdentifiers = entries.keySet();
1851
					for(Identifier metadataID: metadataIdentifiers){
1852
						try{
1853
							//Get the system metadata for this metadata object
1854
							SystemMetadata metadataSysMeta = this.getSystemMetadata(session, metadataID);
1855
							
1856
							//If this is in eml format, extract the filename and GUID from each entity in its package
1857
							if (metadataSysMeta.getFormatId().getValue().startsWith("eml://")) {
1858
								//Get the package
1859
								DataPackageParserInterface parser = new Eml200DataPackageParser();
1860
								InputStream emlStream = this.get(session, metadataID);
1861
								parser.parse(emlStream);
1862
								DataPackage dataPackage = parser.getDataPackage();
1863
								
1864
								//Get all the entities in this package and loop through each to extract its ID and file name
1865
								Entity[] entities = dataPackage.getEntityList();
1866
								for(Entity entity: entities){
1867
									try{
1868
										//Get the file name from the metadata
1869
										String fileNameFromMetadata = entity.getName();
1870
										
1871
										//Get the ecogrid URL from the metadata
1872
										String ecogridIdentifier = entity.getEntityIdentifier();
1873
										//Parse the ecogrid URL to get the local id
1874
										String idFromMetadata = DocumentUtil.getAccessionNumberFromEcogridIdentifier(ecogridIdentifier);
1875
										
1876
										//Get the docid and rev pair
1877
										String docid = DocumentUtil.getDocIdFromString(idFromMetadata);
1878
										String rev = DocumentUtil.getRevisionStringFromString(idFromMetadata);
1879
										
1880
										//Get the GUID
1881
										String guid = IdentifierManager.getInstance().getGUID(docid, Integer.valueOf(rev));
1882
										Identifier dataIdentifier = new Identifier();
1883
										dataIdentifier.setValue(guid);
1884
										
1885
										//Add the GUID to our GUID & file name map
1886
										fileNames.put(dataIdentifier, fileNameFromMetadata);
1887
									}
1888
									catch(Exception e){
1889
										//Prevent just one entity error
1890
										e.printStackTrace();
1891
										logMetacat.debug(e.getMessage(), e);
1892
									}
1893
								}
1894
							}
1895
						}
1896
						catch(Exception e){
1897
							//Catch errors that would prevent package download
1898
							logMetacat.debug(e.toString());
1899
						}
1900
					}
1838 1901
					packagePids.addAll(entries.keySet());
1839 1902
					for (List<Identifier> dataPids: entries.values()) {
1840 1903
						packagePids.addAll(dataPids);
......
1845 1908
				packagePids.add(pid);
1846 1909
			}
1847 1910
			
1848
			//Create a temp directory in the default temp directory
1849
			String defaultTempDir = System.getProperty("java.io.tmpdir");
1850
			File tempDir = new File(defaultTempDir + "/" + System.nanoTime());
1911
			//Create a temp file, then delete it and make a directory with that name
1912
			File tempDir = File.createTempFile("temp", Long.toString(System.nanoTime()));
1913
			tempDir.delete();
1914
			tempDir = new File(tempDir.getPath() + "_dir");
1915
			tempDir.mkdir();			
1851 1916
			tempFiles.add(tempDir);
1852
			tempDir.mkdir();
1853 1917
			
1854 1918
			// track the pid-to-file mapping
1855 1919
			StringBuffer pidMapping = new StringBuffer();
......
1857 1921
			// loop through the package contents
1858 1922
			for (Identifier entryPid: packagePids) {
1859 1923
				//Get the system metadata for each item
1860
				SystemMetadata entrySysMeta = this.getSystemMetadata(session, entryPid);
1924
				SystemMetadata entrySysMeta = this.getSystemMetadata(session, entryPid);					
1861 1925
				
1862
				//Create the temp file extension and prefix
1863
				String extension = ObjectFormatInfo.instance().getExtension(entrySysMeta.getFormatId().getValue());
1864 1926
				String objectFormatType = ObjectFormatCache.getInstance().getFormat(entrySysMeta.getFormatId()).getFormatType();
1865
				String fileName = entryPid.getValue().replaceAll("\\W+", "_") + "-" + objectFormatType;			
1927
				String fileName = null;
1866 1928
				
1929
				//TODO: Be more specific of what characters to replace. Make sure periods arent replaced for the filename from metadata
1930
				//Our default file name is just the ID + format type (e.g. walker.1.1-DATA)
1931
				fileName = entryPid.getValue().replaceAll("[^a-zA-Z0-9\\-\\.]", "_") + "-" + objectFormatType;
1932

  
1933
				if(fileNames.containsKey(entryPid)){
1934
					//Let's use the file name and extension from the metadata is we have it
1935
					fileName = entryPid.getValue().replaceAll("[^a-zA-Z0-9\\-\\.]", "_") + "-" + fileNames.get(entryPid).replaceAll("[^a-zA-Z0-9\\-\\.]", "_");
1936
				}
1937
				else{
1938
					//If we couldn't find a given file name, use the system metadata extension
1939
					String extension = ObjectFormatInfo.instance().getExtension(entrySysMeta.getFormatId().getValue());
1940
					fileName += extension;
1941
				}
1942
				
1867 1943
		        //Create a new file for this item and add to the list
1868
				File tempFile = new File(tempDir, fileName+extension);
1944
				File tempFile = new File(tempDir, fileName);
1869 1945
				tempFiles.add(tempFile);
1870 1946
				
1871 1947
				InputStream entryInputStream = this.get(session, entryPid);			
......
1883 1959
			bag = bag.makeComplete();
1884 1960
			
1885 1961
			///Now create the zip file
1886
			//Use the pid as the file name prefix, replacing illegal characters with a hyphen
1887
			String zipName = pid.getValue().replaceAll("\\W+", "_");
1962
			//Use the pid as the file name prefix, replacing all non-word characters
1963
			String zipName = pid.getValue().replaceAll("\\W", "_");
1888 1964
			
1889 1965
			File bagFile = new File(tempDir, zipName+".zip");
1890 1966
			

Also available in: Unified diff