Project

General

Profile

1
package edu.ucsb.nceas.metacat.admin.upgrade.dataone;
2
/**
3
 *  '$RCSfile$'
4
 *    Purpose: A Class for upgrading the database to version 1.5
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Saurabh Garg
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2013-09-13 15:53:44 -0700 (Fri, 13 Sep 2013) $'
11
 * '$Revision: 8200 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27

    
28

    
29
import java.io.InputStream;
30
import java.math.BigInteger;
31
import java.util.ArrayList;
32
import java.util.Calendar;
33
import java.util.Collections;
34
import java.util.Date;
35
import java.util.List;
36
import java.util.Map;
37

    
38
import org.apache.commons.beanutils.BeanUtils;
39
import org.apache.commons.io.IOUtils;
40
import org.apache.commons.logging.Log;
41
import org.apache.commons.logging.LogFactory;
42
import org.dataone.client.D1Client;
43
import org.dataone.client.MNode;
44
import org.dataone.client.ObjectFormatCache;
45
import org.dataone.ore.ResourceMapFactory;
46
import org.dataone.service.types.v1.Checksum;
47
import org.dataone.service.types.v1.Identifier;
48
import org.dataone.service.types.v1.ObjectFormatIdentifier;
49
import org.dataone.service.types.v1.ObjectInfo;
50
import org.dataone.service.types.v1.ObjectList;
51
import org.dataone.service.types.v1.SystemMetadata;
52
import org.dataone.service.types.v1.util.ChecksumUtil;
53
import org.dspace.foresite.ResourceMap;
54

    
55
import edu.ucsb.nceas.metacat.DBUtil;
56
import edu.ucsb.nceas.metacat.DocumentImpl;
57
import edu.ucsb.nceas.metacat.MetaCatServlet;
58
import edu.ucsb.nceas.metacat.admin.AdminException;
59
import edu.ucsb.nceas.metacat.admin.upgrade.UpgradeUtilityInterface;
60
import edu.ucsb.nceas.metacat.dataone.SystemMetadataFactory;
61
import edu.ucsb.nceas.metacat.properties.PropertyService;
62
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
63
import edu.ucsb.nceas.utilities.SortedProperties;
64

    
65
public class GenerateORE implements UpgradeUtilityInterface {
66

    
67
	private static Log log = LogFactory.getLog(GenerateORE.class);
68
	
69
	private int serverLocation = 1;
70

    
71
    public boolean upgrade() throws AdminException {
72
        boolean success = true;
73
        
74
        // include ORE, data, for this server only
75
        boolean includeOre = true;
76
        boolean downloadData = false;
77
        try {
78
			downloadData = Boolean.parseBoolean(PropertyService.getProperty("dataone.ore.downloaddata"));
79
		} catch (PropertyNotFoundException e) {
80
			// ignore, default to false
81
			log.warn("Could not find ORE 'dataone.ore.downloaddata' property, defaulting to false", e);
82
		}
83

    
84

    
85
        try {
86
        	// get only local ids for this server
87
            List<String> idList = null;
88
            
89
            idList = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, true, serverLocation);
90
            filterOutExisting(idList);
91
            Collections.sort(idList);
92
            SystemMetadataFactory.generateSystemMetadata(idList, includeOre, downloadData);
93
            
94
            idList = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, true, serverLocation);
95
            filterOutExisting(idList);
96
            Collections.sort(idList);
97
            SystemMetadataFactory.generateSystemMetadata(idList, includeOre, downloadData);
98
            
99
            idList = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, true, serverLocation);
100
            filterOutExisting(idList);
101
            Collections.sort(idList);
102
            SystemMetadataFactory.generateSystemMetadata(idList, includeOre, downloadData);
103
            
104
            idList = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, true, serverLocation);
105
            filterOutExisting(idList);
106
            Collections.sort(idList);
107
            SystemMetadataFactory.generateSystemMetadata(idList, includeOre, downloadData);
108
            
109
		} catch (Exception e) {
110
			String msg = "Problem generating missing system metadata: " + e.getMessage();
111
			log.error(msg, e);
112
			success = false;
113
			throw new AdminException(msg);
114
		}
115
    	return success;
116
    }
117
    
118
    private List<String> filterOutExisting(List<String> idList) {
119
    	List<String> toRemove = new ArrayList<String>();
120
    	for (String id: idList) {
121
    		Identifier identifier = new Identifier();
122
    		identifier.setValue(id);
123
			boolean exists = SystemMetadataFactory.oreExistsFor(identifier);
124
			if (exists) {
125
				toRemove.add(id);
126
			}
127
    	}
128
    	for (String id: toRemove) {
129
    		idList.remove(id);
130
    	}
131
    	return idList;
132
    }
133
    
134
    public int getServerLocation() {
135
		return serverLocation;
136
	}
137

    
138
	public void setServerLocation(int serverLocation) {
139
		this.serverLocation = serverLocation;
140
	}
141
	
142
	/**
143
	 * Need to update the existing ORE maps to have correct dateTime serializations
144
	 * see: https://redmine.dataone.org/issues/3046
145
	 * @param mnBaseUrl
146
	 */
147
	public static void updateOREdateFormat(String mnBaseUrl) {
148
		
149
		List<Identifier> orePids = getAllOREpids(mnBaseUrl);
150
		updateOREs(orePids, "b", mnBaseUrl);
151
	}
152
	
153
	/**
154
	 * Retrieves a list of all ORE objects on the given MN
155
	 * @param mnBaseUrl
156
	 * @return
157
	 */
158
	public static List<Identifier> getAllOREpids(String mnBaseUrl) {
159
		
160
		MNode mn = null;
161
		ObjectFormatIdentifier formatId = null;
162
		List<Identifier> pids = null;
163
		try {
164
			
165
			// get the MN
166
			mn = D1Client.getMN(mnBaseUrl);
167
			
168
			// get the ORE format id
169
	        formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
170
	        
171
	        // get the objects that match
172
			ObjectList objectList = mn.listObjects(null, null, null, formatId , null, 0, Integer.MAX_VALUE);
173
			pids = new ArrayList<Identifier>();
174
			for (ObjectInfo o: objectList.getObjectInfoList()) {
175
				pids.add(o.getIdentifier());
176
			}
177
			
178
		} catch (Exception e) {
179
			log.error("Could not get MN list of ORE pids", e);
180
		}
181
		
182
		return pids;
183
				
184
	}
185
	
186
	/**
187
	 * Updates the given OREs by regenerating and reserializing the RDF using the updated foresite library
188
	 * Only non-obsolete, non-archived ORE objects are updated and their SystemMetadata is based on the original version.
189
	 * see: https://redmine.dataone.org/issues/3046
190
	 * @param orePids
191
	 * @param pidSuffix
192
	 * @param mnBaseUrl
193
	 */
194
	public static void updateOREs(List<Identifier> orePids, String pidSuffix, String mnBaseUrl) {
195
		
196
		// get a MN client for this local node, or use the given baseUrl
197
		MNode mn = null;
198
		try {
199
			mn = D1Client.getMN(mnBaseUrl);
200
		} catch (Exception e) {
201
			log.error("Could not get MN client", e);
202
			// nothing more we can do here
203
			return;
204
		}
205
		
206
		// make sure we have something for the suffix
207
		if (pidSuffix == null) {
208
			pidSuffix = "b";
209
		}
210
		
211
		for (Identifier orePid: orePids) {
212
			try {
213
				
214
				log.debug("processing ORE pid: " + orePid.getValue());
215

    
216
				// get original SystemMetadata
217
				SystemMetadata originalOreSysMeta = mn.getSystemMetadata(orePid);
218
				
219
				// only update the CURRENT revision of the ORE
220
				if (originalOreSysMeta.getObsoletedBy() != null || (originalOreSysMeta.getArchived() != null && originalOreSysMeta.getArchived())) {
221
					log.debug("ORE pid is obsolete or archived, skipping: " + orePid.getValue());
222
					continue;
223
				}
224
				
225
				// get the original ORE map
226
				InputStream originalOreStream = mn.get(orePid);
227
				Map<Identifier, Map<Identifier, List<Identifier>>> originalOre = ResourceMapFactory.getInstance().parseResourceMap(originalOreStream);
228

    
229
				// generate the updated ORE map, in this case we aren't changing any values, just altering the serialization using a newer foresite library
230
				Identifier updatedOrePid = new Identifier();
231
				updatedOrePid.setValue(orePid.getValue() + pidSuffix);
232
				ResourceMap updatedOre = ResourceMapFactory.getInstance().createResourceMap(updatedOrePid , originalOre.entrySet().iterator().next().getValue());
233
				String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(updatedOre);
234
	            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
235
	            Date today = Calendar.getInstance().getTime();
236
	            
237
				// copy the existing SystemMetada into the new SystemMetadata
238
				SystemMetadata updatedOreSysMeta = new SystemMetadata();
239
				BeanUtils.copyProperties(updatedOreSysMeta, originalOreSysMeta);
240
				
241
				// set the new SystemMetadata values
242
				updatedOreSysMeta.setIdentifier(updatedOrePid);
243
				updatedOreSysMeta.setObsoletes(orePid);
244
				updatedOreSysMeta.setObsoletedBy(null);
245
				updatedOreSysMeta.setArchived(false);
246
	            updatedOreSysMeta.setChecksum(oreChecksum);
247
	            updatedOreSysMeta.setSize(BigInteger.valueOf(SystemMetadataFactory.sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
248
	            updatedOreSysMeta.setDateSysMetadataModified(today);
249
	            updatedOreSysMeta.setDateUploaded(today);
250
	            updatedOreSysMeta.setReplicaList(null);
251

    
252
	            // save the updated ORE to the MN
253
				InputStream updatedOreStream = IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING);
254
				mn.update(null, orePid, updatedOreStream, updatedOrePid, updatedOreSysMeta);
255
				
256
				
257
			} catch (Exception e) {
258
				log.error("Could not update ORE map: " + orePid, e);
259
				
260
				// go to the next record, there's nothing else to do here
261
				continue;
262
			}
263
			
264
		}
265
	}
266

    
267
	public static void main(String [] ags){
268

    
269
        try {
270
        	// set up the properties based on the test/deployed configuration of the workspace
271
        	SortedProperties testProperties = 
272
				new SortedProperties("test/test.properties");
273
			testProperties.load();
274
			String metacatContextDir = testProperties.getProperty("metacat.contextDir");
275
			PropertyService.getInstance(metacatContextDir + "/WEB-INF");
276
			// now run it
277
            GenerateORE upgrader = new GenerateORE();
278
	        upgrader.upgrade();
279
	        
280
        } catch (Exception ex) {
281
            System.out.println("Exception:" + ex.getMessage());
282
            ex.printStackTrace();
283
        }
284
    }
285
}
(1-1/2)