Project

General

Profile

« Previous | Next » 

Revision 8437

retrieve the given file names (entityName) of the data files from the EML and use those for our file names in the BagIt content.

View differences:

src/edu/ucsb/nceas/metacat/dataone/MNodeService.java
40 40
import java.util.ArrayList;
41 41
import java.util.Calendar;
42 42
import java.util.Date;
43
import java.util.HashMap;
43 44
import java.util.HashSet;
44 45
import java.util.Hashtable;
45 46
import java.util.List;
......
54 55
import org.apache.commons.beanutils.BeanUtils;
55 56
import org.apache.commons.io.IOUtils;
56 57
import org.apache.log4j.Logger;
58
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
57 59
import org.dataone.client.CNode;
58 60
import org.dataone.client.D1Client;
59 61
import org.dataone.client.MNode;
......
116 118
import org.dspace.foresite.OREParserException;
117 119
import org.dspace.foresite.ORESerialiserException;
118 120
import org.dspace.foresite.ResourceMap;
121
import org.ecoinformatics.datamanager.parser.DataPackage;
122
import org.ecoinformatics.datamanager.parser.Entity;
123
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
124
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
119 125

  
120 126
import edu.ucsb.nceas.ezid.EZIDException;
121 127
import edu.ucsb.nceas.metacat.DBQuery;
122 128
import edu.ucsb.nceas.metacat.DBTransform;
129
import edu.ucsb.nceas.metacat.DocumentImpl;
123 130
import edu.ucsb.nceas.metacat.EventLog;
124 131
import edu.ucsb.nceas.metacat.IdentifierManager;
125 132
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
133
import edu.ucsb.nceas.metacat.McdbException;
126 134
import edu.ucsb.nceas.metacat.MetaCatServlet;
127 135
import edu.ucsb.nceas.metacat.MetacatHandler;
128

  
129 136
import edu.ucsb.nceas.metacat.common.query.EnabledQueryEngines;
130 137
import edu.ucsb.nceas.metacat.common.query.stream.ContentTypeByteArrayInputStream;
131 138
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
......
1827 1834
		
1828 1835
		// catch non-D1 service errors and throw as ServiceFailures
1829 1836
		try {
1837
			//Create a map of dataone ids and file names
1838
			Map<Identifier, String> fileNames = new HashMap<Identifier, String>();
1830 1839
			
1831 1840
			// find the package contents
1832 1841
			SystemMetadata sysMeta = this.getSystemMetadata(session, pid);
1833 1842
			if (ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId()).getFormatType().equals("RESOURCE")) {
1843
				//Get the resource map as a map of Identifiers
1834 1844
				InputStream oreInputStream = this.get(session, pid);
1835 1845
				Map<Identifier, Map<Identifier, List<Identifier>>> resourceMapStructure = ResourceMapFactory.getInstance().parseResourceMap(oreInputStream);
1836 1846
				packagePids.addAll(resourceMapStructure.keySet());
1847
				//Loop through each object in this resource map
1837 1848
				for (Map<Identifier, List<Identifier>> entries: resourceMapStructure.values()) {
1849
					//Loop through each metadata object in this entry
1850
					Set<Identifier> metadataIdentifiers = entries.keySet();
1851
					for(Identifier metadataID: metadataIdentifiers){
1852
						try{
1853
							//Get the system metadata for this metadata object
1854
							SystemMetadata metadataSysMeta = this.getSystemMetadata(session, metadataID);
1855
							
1856
							//If this is in eml format, extract the filename and GUID from each entity in its package
1857
							if (metadataSysMeta.getFormatId().getValue().startsWith("eml://")) {
1858
								//Get the package
1859
								DataPackageParserInterface parser = new Eml200DataPackageParser();
1860
								InputStream emlStream = this.get(session, metadataID);
1861
								parser.parse(emlStream);
1862
								DataPackage dataPackage = parser.getDataPackage();
1863
								
1864
								//Get all the entities in this package and loop through each to extract its ID and file name
1865
								Entity[] entities = dataPackage.getEntityList();
1866
								for(Entity entity: entities){
1867
									try{
1868
										//Get the file name from the metadata
1869
										String fileNameFromMetadata = entity.getName();
1870
										
1871
										//Get the ecogrid URL from the metadata
1872
										String ecogridIdentifier = entity.getEntityIdentifier();
1873
										//Parse the ecogrid URL to get the local id
1874
										String idFromMetadata = DocumentUtil.getAccessionNumberFromEcogridIdentifier(ecogridIdentifier);
1875
										
1876
										//Get the docid and rev pair
1877
										String docid = DocumentUtil.getDocIdFromString(idFromMetadata);
1878
										String rev = DocumentUtil.getRevisionStringFromString(idFromMetadata);
1879
										
1880
										//Get the GUID
1881
										String guid = IdentifierManager.getInstance().getGUID(docid, Integer.valueOf(rev));
1882
										Identifier dataIdentifier = new Identifier();
1883
										dataIdentifier.setValue(guid);
1884
										
1885
										//Add the GUID to our GUID & file name map
1886
										fileNames.put(dataIdentifier, fileNameFromMetadata);
1887
									}
1888
									catch(Exception e){
1889
										//Prevent just one entity error
1890
										e.printStackTrace();
1891
										logMetacat.debug(e.getMessage(), e);
1892
									}
1893
								}
1894
							}
1895
						}
1896
						catch(Exception e){
1897
							//Catch errors that would prevent package download
1898
							logMetacat.debug(e.toString());
1899
						}
1900
					}
1838 1901
					packagePids.addAll(entries.keySet());
1839 1902
					for (List<Identifier> dataPids: entries.values()) {
1840 1903
						packagePids.addAll(dataPids);
......
1845 1908
				packagePids.add(pid);
1846 1909
			}
1847 1910
			
1848
			//Create a temp directory in the default temp directory
1849
			String defaultTempDir = System.getProperty("java.io.tmpdir");
1850
			File tempDir = new File(defaultTempDir + "/" + System.nanoTime());
1911
			//Create a temp file, then delete it and make a directory with that name
1912
			File tempDir = File.createTempFile("temp", Long.toString(System.nanoTime()));
1913
			tempDir.delete();
1914
			tempDir = new File(tempDir.getPath() + "_dir");
1915
			tempDir.mkdir();			
1851 1916
			tempFiles.add(tempDir);
1852
			tempDir.mkdir();
1853 1917
			
1854 1918
			// track the pid-to-file mapping
1855 1919
			StringBuffer pidMapping = new StringBuffer();
......
1857 1921
			// loop through the package contents
1858 1922
			for (Identifier entryPid: packagePids) {
1859 1923
				//Get the system metadata for each item
1860
				SystemMetadata entrySysMeta = this.getSystemMetadata(session, entryPid);
1924
				SystemMetadata entrySysMeta = this.getSystemMetadata(session, entryPid);					
1861 1925
				
1862
				//Create the temp file extension and prefix
1863
				String extension = ObjectFormatInfo.instance().getExtension(entrySysMeta.getFormatId().getValue());
1864 1926
				String objectFormatType = ObjectFormatCache.getInstance().getFormat(entrySysMeta.getFormatId()).getFormatType();
1865
				String fileName = entryPid.getValue().replaceAll("\\W+", "_") + "-" + objectFormatType;			
1927
				String fileName = null;
1866 1928
				
1929
				//TODO: Be more specific of what characters to replace. Make sure periods arent replaced for the filename from metadata
1930
				//Our default file name is just the ID + format type (e.g. walker.1.1-DATA)
1931
				fileName = entryPid.getValue().replaceAll("[^a-zA-Z0-9\\-\\.]", "_") + "-" + objectFormatType;
1932

  
1933
				if(fileNames.containsKey(entryPid)){
1934
					//Let's use the file name and extension from the metadata is we have it
1935
					fileName = entryPid.getValue().replaceAll("[^a-zA-Z0-9\\-\\.]", "_") + "-" + fileNames.get(entryPid).replaceAll("[^a-zA-Z0-9\\-\\.]", "_");
1936
				}
1937
				else{
1938
					//If we couldn't find a given file name, use the system metadata extension
1939
					String extension = ObjectFormatInfo.instance().getExtension(entrySysMeta.getFormatId().getValue());
1940
					fileName += extension;
1941
				}
1942
				
1867 1943
		        //Create a new file for this item and add to the list
1868
				File tempFile = new File(tempDir, fileName+extension);
1944
				File tempFile = new File(tempDir, fileName);
1869 1945
				tempFiles.add(tempFile);
1870 1946
				
1871 1947
				InputStream entryInputStream = this.get(session, entryPid);			
......
1883 1959
			bag = bag.makeComplete();
1884 1960
			
1885 1961
			///Now create the zip file
1886
			//Use the pid as the file name prefix, replacing illegal characters with a hyphen
1887
			String zipName = pid.getValue().replaceAll("\\W+", "_");
1962
			//Use the pid as the file name prefix, replacing all non-word characters
1963
			String zipName = pid.getValue().replaceAll("\\W", "_");
1888 1964
			
1889 1965
			File bagFile = new File(tempDir, zipName+".zip");
1890 1966
			

Also available in: Unified diff