26 |
26 |
*/
|
27 |
27 |
|
28 |
28 |
|
|
29 |
import java.io.InputStream;
|
|
30 |
import java.math.BigInteger;
|
|
31 |
import java.util.ArrayList;
|
|
32 |
import java.util.Calendar;
|
29 |
33 |
import java.util.Collections;
|
|
34 |
import java.util.Date;
|
30 |
35 |
import java.util.List;
|
|
36 |
import java.util.Map;
|
31 |
37 |
|
|
38 |
import org.apache.commons.beanutils.BeanUtils;
|
|
39 |
import org.apache.commons.io.IOUtils;
|
32 |
40 |
import org.apache.commons.logging.Log;
|
33 |
41 |
import org.apache.commons.logging.LogFactory;
|
|
42 |
import org.dataone.client.D1Client;
|
|
43 |
import org.dataone.client.MNode;
|
|
44 |
import org.dataone.client.ObjectFormatCache;
|
|
45 |
import org.dataone.ore.ResourceMapFactory;
|
|
46 |
import org.dataone.service.types.v1.Checksum;
|
|
47 |
import org.dataone.service.types.v1.Identifier;
|
|
48 |
import org.dataone.service.types.v1.ObjectFormatIdentifier;
|
|
49 |
import org.dataone.service.types.v1.ObjectInfo;
|
|
50 |
import org.dataone.service.types.v1.ObjectList;
|
|
51 |
import org.dataone.service.types.v1.SystemMetadata;
|
|
52 |
import org.dataone.service.types.v1.util.ChecksumUtil;
|
|
53 |
import org.dspace.foresite.ResourceMap;
|
34 |
54 |
|
35 |
55 |
import edu.ucsb.nceas.metacat.DBUtil;
|
36 |
56 |
import edu.ucsb.nceas.metacat.DocumentImpl;
|
|
57 |
import edu.ucsb.nceas.metacat.MetaCatServlet;
|
37 |
58 |
import edu.ucsb.nceas.metacat.admin.AdminException;
|
38 |
59 |
import edu.ucsb.nceas.metacat.admin.upgrade.UpgradeUtilityInterface;
|
39 |
60 |
import edu.ucsb.nceas.metacat.dataone.SystemMetadataFactory;
|
... | ... | |
97 |
118 |
public void setServerLocation(int serverLocation) {
|
98 |
119 |
this.serverLocation = serverLocation;
|
99 |
120 |
}
|
|
121 |
|
|
122 |
/**
|
|
123 |
* Need to update the existing ORE maps to have correct dateTime serializations
|
|
124 |
* see: https://redmine.dataone.org/issues/3046
|
|
125 |
* @param mnBaseUrl
|
|
126 |
*/
|
|
127 |
public static void updateOREdateFormat(String mnBaseUrl) {
|
|
128 |
|
|
129 |
List<Identifier> orePids = getAllOREpids(mnBaseUrl);
|
|
130 |
updateOREs(orePids, "b", mnBaseUrl);
|
|
131 |
}
|
|
132 |
|
|
133 |
/**
|
|
134 |
* Retrieves a list of all ORE objects on the given MN
|
|
135 |
* @param mnBaseUrl
|
|
136 |
* @return
|
|
137 |
*/
|
|
138 |
public static List<Identifier> getAllOREpids(String mnBaseUrl) {
|
|
139 |
|
|
140 |
MNode mn = null;
|
|
141 |
ObjectFormatIdentifier formatId = null;
|
|
142 |
List<Identifier> pids = null;
|
|
143 |
try {
|
|
144 |
|
|
145 |
// get the MN
|
|
146 |
mn = D1Client.getMN(mnBaseUrl);
|
|
147 |
|
|
148 |
// get the ORE format id
|
|
149 |
formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
|
|
150 |
|
|
151 |
// get the objects that match
|
|
152 |
ObjectList objectList = mn.listObjects(null, null, null, formatId , null, 0, Integer.MAX_VALUE);
|
|
153 |
pids = new ArrayList<Identifier>();
|
|
154 |
for (ObjectInfo o: objectList.getObjectInfoList()) {
|
|
155 |
pids.add(o.getIdentifier());
|
|
156 |
}
|
|
157 |
|
|
158 |
} catch (Exception e) {
|
|
159 |
log.error("Could not get MN list of ORE pids", e);
|
|
160 |
}
|
|
161 |
|
|
162 |
return pids;
|
|
163 |
|
|
164 |
}
|
|
165 |
|
|
166 |
/**
|
|
167 |
* Updates the given OREs by regenerating and reserializing the RDF using the updated foresite library
|
|
168 |
* Only non-obsolete, non-archived ORE objects are updated and their SystemMetadata is based on the original version.
|
|
169 |
* see: https://redmine.dataone.org/issues/3046
|
|
170 |
* @param orePids
|
|
171 |
* @param pidSuffix
|
|
172 |
* @param mnBaseUrl
|
|
173 |
*/
|
|
174 |
public static void updateOREs(List<Identifier> orePids, String pidSuffix, String mnBaseUrl) {
|
|
175 |
|
|
176 |
// get a MN client for this local node, or use the given baseUrl
|
|
177 |
MNode mn = null;
|
|
178 |
try {
|
|
179 |
mn = D1Client.getMN(mnBaseUrl);
|
|
180 |
} catch (Exception e) {
|
|
181 |
log.error("Could not get MN client", e);
|
|
182 |
// nothing more we can do here
|
|
183 |
return;
|
|
184 |
}
|
|
185 |
|
|
186 |
// make sure we have something for the suffix
|
|
187 |
if (pidSuffix == null) {
|
|
188 |
pidSuffix = "b";
|
|
189 |
}
|
|
190 |
|
|
191 |
for (Identifier orePid: orePids) {
|
|
192 |
try {
|
|
193 |
|
|
194 |
log.debug("processing ORE pid: " + orePid.getValue());
|
100 |
195 |
|
|
196 |
// get original SystemMetadata
|
|
197 |
SystemMetadata originalOreSysMeta = mn.getSystemMetadata(orePid);
|
|
198 |
|
|
199 |
// only update the CURRENT revision of the ORE
|
|
200 |
if (originalOreSysMeta.getObsoletedBy() != null || originalOreSysMeta.getArchived()) {
|
|
201 |
log.debug("ORE pid is obsolete or archived, skipping: " + orePid.getValue());
|
|
202 |
continue;
|
|
203 |
}
|
|
204 |
|
|
205 |
// get the original ORE map
|
|
206 |
InputStream originalOreStream = mn.get(orePid);
|
|
207 |
Map<Identifier, Map<Identifier, List<Identifier>>> originalOre = ResourceMapFactory.getInstance().parseResourceMap(originalOreStream);
|
|
208 |
|
|
209 |
// generate the updated ORE map, in this case we aren't changing any values, just altering the serialization using a newer foresite library
|
|
210 |
Identifier updatedOrePid = new Identifier();
|
|
211 |
updatedOrePid.setValue(orePid.getValue() + pidSuffix);
|
|
212 |
ResourceMap updatedOre = ResourceMapFactory.getInstance().createResourceMap(updatedOrePid , originalOre.entrySet().iterator().next().getValue());
|
|
213 |
String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(updatedOre);
|
|
214 |
Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
|
|
215 |
Date today = Calendar.getInstance().getTime();
|
|
216 |
|
|
217 |
// copy the existing SystemMetada into the new SystemMetadata
|
|
218 |
SystemMetadata updatedOreSysMeta = new SystemMetadata();
|
|
219 |
BeanUtils.copyProperties(updatedOreSysMeta, originalOreSysMeta);
|
|
220 |
|
|
221 |
// set the new SystemMetadata values
|
|
222 |
updatedOreSysMeta.setIdentifier(updatedOrePid);
|
|
223 |
updatedOreSysMeta.setObsoletes(orePid);
|
|
224 |
updatedOreSysMeta.setObsoletedBy(null);
|
|
225 |
updatedOreSysMeta.setArchived(false);
|
|
226 |
updatedOreSysMeta.setChecksum(oreChecksum);
|
|
227 |
updatedOreSysMeta.setSize(BigInteger.valueOf(SystemMetadataFactory.sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
|
|
228 |
updatedOreSysMeta.setDateSysMetadataModified(today);
|
|
229 |
updatedOreSysMeta.setDateUploaded(today);
|
|
230 |
updatedOreSysMeta.setReplicaList(null);
|
|
231 |
|
|
232 |
// save the updated ORE to the MN
|
|
233 |
InputStream updatedOreStream = IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING);
|
|
234 |
mn.update(null, orePid, updatedOreStream, updatedOrePid, updatedOreSysMeta);
|
|
235 |
|
|
236 |
|
|
237 |
} catch (Exception e) {
|
|
238 |
log.error("Could not update ORE map: " + orePid, e);
|
|
239 |
|
|
240 |
// go to the next record, there's nothing else to do here
|
|
241 |
continue;
|
|
242 |
}
|
|
243 |
|
|
244 |
}
|
|
245 |
}
|
|
246 |
|
101 |
247 |
public static void main(String [] ags){
|
102 |
248 |
|
103 |
249 |
try {
|
utility methods to update/reserialize existing ORE maps that were generated with older foresite (and included bad dateTime strings).
https://redmine.dataone.org/issues/3046