Project

General

Profile

« Previous | Next » 

Revision 7322

utility methods to update/reserialize existing ORE maps that were generated with older foresite (and included bad dateTime strings).
https://redmine.dataone.org/issues/3046

View differences:

src/edu/ucsb/nceas/metacat/dataone/SystemMetadataFactory.java
676 676
	 * 
677 677
	 * @throws IOException
678 678
	 */
679
	private static long sizeOfStream(InputStream is) throws IOException {
679
	public static long sizeOfStream(InputStream is) throws IOException {
680 680

  
681 681
		long size = 0;
682 682
		byte[] b = new byte[1024];
src/edu/ucsb/nceas/metacat/admin/upgrade/dataone/GenerateORE.java
26 26
 */
27 27

  
28 28

  
29
import java.io.InputStream;
30
import java.math.BigInteger;
31
import java.util.ArrayList;
32
import java.util.Calendar;
29 33
import java.util.Collections;
34
import java.util.Date;
30 35
import java.util.List;
36
import java.util.Map;
31 37

  
38
import org.apache.commons.beanutils.BeanUtils;
39
import org.apache.commons.io.IOUtils;
32 40
import org.apache.commons.logging.Log;
33 41
import org.apache.commons.logging.LogFactory;
42
import org.dataone.client.D1Client;
43
import org.dataone.client.MNode;
44
import org.dataone.client.ObjectFormatCache;
45
import org.dataone.ore.ResourceMapFactory;
46
import org.dataone.service.types.v1.Checksum;
47
import org.dataone.service.types.v1.Identifier;
48
import org.dataone.service.types.v1.ObjectFormatIdentifier;
49
import org.dataone.service.types.v1.ObjectInfo;
50
import org.dataone.service.types.v1.ObjectList;
51
import org.dataone.service.types.v1.SystemMetadata;
52
import org.dataone.service.types.v1.util.ChecksumUtil;
53
import org.dspace.foresite.ResourceMap;
34 54

  
35 55
import edu.ucsb.nceas.metacat.DBUtil;
36 56
import edu.ucsb.nceas.metacat.DocumentImpl;
57
import edu.ucsb.nceas.metacat.MetaCatServlet;
37 58
import edu.ucsb.nceas.metacat.admin.AdminException;
38 59
import edu.ucsb.nceas.metacat.admin.upgrade.UpgradeUtilityInterface;
39 60
import edu.ucsb.nceas.metacat.dataone.SystemMetadataFactory;
......
97 118
	public void setServerLocation(int serverLocation) {
98 119
		this.serverLocation = serverLocation;
99 120
	}
121
	
122
	/**
123
	 * Need to update the existing ORE maps to have correct dateTime serializations
124
	 * see: https://redmine.dataone.org/issues/3046
125
	 * @param mnBaseUrl
126
	 */
127
	public static void updateOREdateFormat(String mnBaseUrl) {
128
		
129
		List<Identifier> orePids = getAllOREpids(mnBaseUrl);
130
		updateOREs(orePids, "b", mnBaseUrl);
131
	}
132
	
133
	/**
134
	 * Retrieves a list of all ORE objects on the given MN
135
	 * @param mnBaseUrl
136
	 * @return
137
	 */
138
	public static List<Identifier> getAllOREpids(String mnBaseUrl) {
139
		
140
		MNode mn = null;
141
		ObjectFormatIdentifier formatId = null;
142
		List<Identifier> pids = null;
143
		try {
144
			
145
			// get the MN
146
			mn = D1Client.getMN(mnBaseUrl);
147
			
148
			// get the ORE format id
149
	        formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
150
	        
151
	        // get the objects that match
152
			ObjectList objectList = mn.listObjects(null, null, null, formatId , null, 0, Integer.MAX_VALUE);
153
			pids = new ArrayList<Identifier>();
154
			for (ObjectInfo o: objectList.getObjectInfoList()) {
155
				pids.add(o.getIdentifier());
156
			}
157
			
158
		} catch (Exception e) {
159
			log.error("Could not get MN list of ORE pids", e);
160
		}
161
		
162
		return pids;
163
				
164
	}
165
	
166
	/**
167
	 * Updates the given OREs by regenerating and reserializing the RDF using the updated foresite library
168
	 * Only non-obsolete, non-archived ORE objects are updated and their SystemMetadata is based on the original version.
169
	 * see: https://redmine.dataone.org/issues/3046
170
	 * @param orePids
171
	 * @param pidSuffix
172
	 * @param mnBaseUrl
173
	 */
174
	public static void updateOREs(List<Identifier> orePids, String pidSuffix, String mnBaseUrl) {
175
		
176
		// get a MN client for this local node, or use the given baseUrl
177
		MNode mn = null;
178
		try {
179
			mn = D1Client.getMN(mnBaseUrl);
180
		} catch (Exception e) {
181
			log.error("Could not get MN client", e);
182
			// nothing more we can do here
183
			return;
184
		}
185
		
186
		// make sure we have something for the suffix
187
		if (pidSuffix == null) {
188
			pidSuffix = "b";
189
		}
190
		
191
		for (Identifier orePid: orePids) {
192
			try {
193
				
194
				log.debug("processing ORE pid: " + orePid.getValue());
100 195

  
196
				// get original SystemMetadata
197
				SystemMetadata originalOreSysMeta = mn.getSystemMetadata(orePid);
198
				
199
				// only update the CURRENT revision of the ORE
200
				if (originalOreSysMeta.getObsoletedBy() != null || originalOreSysMeta.getArchived()) {
201
					log.debug("ORE pid is obsolete or archived, skipping: " + orePid.getValue());
202
					continue;
203
				}
204
				
205
				// get the original ORE map
206
				InputStream originalOreStream = mn.get(orePid);
207
				Map<Identifier, Map<Identifier, List<Identifier>>> originalOre = ResourceMapFactory.getInstance().parseResourceMap(originalOreStream);
208

  
209
				// generate the updated ORE map, in this case we aren't changing any values, just altering the serialization using a newer foresite library
210
				Identifier updatedOrePid = new Identifier();
211
				updatedOrePid.setValue(orePid.getValue() + pidSuffix);
212
				ResourceMap updatedOre = ResourceMapFactory.getInstance().createResourceMap(updatedOrePid , originalOre.entrySet().iterator().next().getValue());
213
				String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(updatedOre);
214
	            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
215
	            Date today = Calendar.getInstance().getTime();
216
	            
217
				// copy the existing SystemMetada into the new SystemMetadata
218
				SystemMetadata updatedOreSysMeta = new SystemMetadata();
219
				BeanUtils.copyProperties(updatedOreSysMeta, originalOreSysMeta);
220
				
221
				// set the new SystemMetadata values
222
				updatedOreSysMeta.setIdentifier(updatedOrePid);
223
				updatedOreSysMeta.setObsoletes(orePid);
224
				updatedOreSysMeta.setObsoletedBy(null);
225
				updatedOreSysMeta.setArchived(false);
226
	            updatedOreSysMeta.setChecksum(oreChecksum);
227
	            updatedOreSysMeta.setSize(BigInteger.valueOf(SystemMetadataFactory.sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
228
	            updatedOreSysMeta.setDateSysMetadataModified(today);
229
	            updatedOreSysMeta.setDateUploaded(today);
230
	            updatedOreSysMeta.setReplicaList(null);
231

  
232
	            // save the updated ORE to the MN
233
				InputStream updatedOreStream = IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING);
234
				mn.update(null, orePid, updatedOreStream, updatedOrePid, updatedOreSysMeta);
235
				
236
				
237
			} catch (Exception e) {
238
				log.error("Could not update ORE map: " + orePid, e);
239
				
240
				// go to the next record, there's nothing else to do here
241
				continue;
242
			}
243
			
244
		}
245
	}
246

  
101 247
	public static void main(String [] ags){
102 248

  
103 249
        try {

Also available in: Unified diff