Revision 7322
Added by ben leinfelder almost 12 years ago
GenerateORE.java | ||
---|---|---|
26 | 26 |
*/ |
27 | 27 |
|
28 | 28 |
|
29 |
import java.io.InputStream; |
|
30 |
import java.math.BigInteger; |
|
31 |
import java.util.ArrayList; |
|
32 |
import java.util.Calendar; |
|
29 | 33 |
import java.util.Collections; |
34 |
import java.util.Date; |
|
30 | 35 |
import java.util.List; |
36 |
import java.util.Map; |
|
31 | 37 |
|
38 |
import org.apache.commons.beanutils.BeanUtils; |
|
39 |
import org.apache.commons.io.IOUtils; |
|
32 | 40 |
import org.apache.commons.logging.Log; |
33 | 41 |
import org.apache.commons.logging.LogFactory; |
42 |
import org.dataone.client.D1Client; |
|
43 |
import org.dataone.client.MNode; |
|
44 |
import org.dataone.client.ObjectFormatCache; |
|
45 |
import org.dataone.ore.ResourceMapFactory; |
|
46 |
import org.dataone.service.types.v1.Checksum; |
|
47 |
import org.dataone.service.types.v1.Identifier; |
|
48 |
import org.dataone.service.types.v1.ObjectFormatIdentifier; |
|
49 |
import org.dataone.service.types.v1.ObjectInfo; |
|
50 |
import org.dataone.service.types.v1.ObjectList; |
|
51 |
import org.dataone.service.types.v1.SystemMetadata; |
|
52 |
import org.dataone.service.types.v1.util.ChecksumUtil; |
|
53 |
import org.dspace.foresite.ResourceMap; |
|
34 | 54 |
|
35 | 55 |
import edu.ucsb.nceas.metacat.DBUtil; |
36 | 56 |
import edu.ucsb.nceas.metacat.DocumentImpl; |
57 |
import edu.ucsb.nceas.metacat.MetaCatServlet; |
|
37 | 58 |
import edu.ucsb.nceas.metacat.admin.AdminException; |
38 | 59 |
import edu.ucsb.nceas.metacat.admin.upgrade.UpgradeUtilityInterface; |
39 | 60 |
import edu.ucsb.nceas.metacat.dataone.SystemMetadataFactory; |
... | ... | |
97 | 118 |
public void setServerLocation(int serverLocation) { |
98 | 119 |
this.serverLocation = serverLocation; |
99 | 120 |
} |
121 |
|
|
122 |
/** |
|
123 |
* Need to update the existing ORE maps to have correct dateTime serializations |
|
124 |
* see: https://redmine.dataone.org/issues/3046 |
|
125 |
* @param mnBaseUrl |
|
126 |
*/ |
|
127 |
public static void updateOREdateFormat(String mnBaseUrl) { |
|
128 |
|
|
129 |
List<Identifier> orePids = getAllOREpids(mnBaseUrl); |
|
130 |
updateOREs(orePids, "b", mnBaseUrl); |
|
131 |
} |
|
132 |
|
|
133 |
/** |
|
134 |
* Retrieves a list of all ORE objects on the given MN |
|
135 |
* @param mnBaseUrl |
|
136 |
* @return |
|
137 |
*/ |
|
138 |
public static List<Identifier> getAllOREpids(String mnBaseUrl) { |
|
139 |
|
|
140 |
MNode mn = null; |
|
141 |
ObjectFormatIdentifier formatId = null; |
|
142 |
List<Identifier> pids = null; |
|
143 |
try { |
|
144 |
|
|
145 |
// get the MN |
|
146 |
mn = D1Client.getMN(mnBaseUrl); |
|
147 |
|
|
148 |
// get the ORE format id |
|
149 |
formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId(); |
|
150 |
|
|
151 |
// get the objects that match |
|
152 |
ObjectList objectList = mn.listObjects(null, null, null, formatId , null, 0, Integer.MAX_VALUE); |
|
153 |
pids = new ArrayList<Identifier>(); |
|
154 |
for (ObjectInfo o: objectList.getObjectInfoList()) { |
|
155 |
pids.add(o.getIdentifier()); |
|
156 |
} |
|
157 |
|
|
158 |
} catch (Exception e) { |
|
159 |
log.error("Could not get MN list of ORE pids", e); |
|
160 |
} |
|
161 |
|
|
162 |
return pids; |
|
163 |
|
|
164 |
} |
|
165 |
|
|
166 |
/** |
|
167 |
* Updates the given OREs by regenerating and reserializing the RDF using the updated foresite library |
|
168 |
* Only non-obsolete, non-archived ORE objects are updated and their SystemMetadata is based on the original version. |
|
169 |
* see: https://redmine.dataone.org/issues/3046 |
|
170 |
* @param orePids |
|
171 |
* @param pidSuffix |
|
172 |
* @param mnBaseUrl |
|
173 |
*/ |
|
174 |
public static void updateOREs(List<Identifier> orePids, String pidSuffix, String mnBaseUrl) { |
|
175 |
|
|
176 |
// get a MN client for this local node, or use the given baseUrl |
|
177 |
MNode mn = null; |
|
178 |
try { |
|
179 |
mn = D1Client.getMN(mnBaseUrl); |
|
180 |
} catch (Exception e) { |
|
181 |
log.error("Could not get MN client", e); |
|
182 |
// nothing more we can do here |
|
183 |
return; |
|
184 |
} |
|
185 |
|
|
186 |
// make sure we have something for the suffix |
|
187 |
if (pidSuffix == null) { |
|
188 |
pidSuffix = "b"; |
|
189 |
} |
|
190 |
|
|
191 |
for (Identifier orePid: orePids) { |
|
192 |
try { |
|
193 |
|
|
194 |
log.debug("processing ORE pid: " + orePid.getValue()); |
|
100 | 195 |
|
196 |
// get original SystemMetadata |
|
197 |
SystemMetadata originalOreSysMeta = mn.getSystemMetadata(orePid); |
|
198 |
|
|
199 |
// only update the CURRENT revision of the ORE |
|
200 |
if (originalOreSysMeta.getObsoletedBy() != null || originalOreSysMeta.getArchived()) { |
|
201 |
log.debug("ORE pid is obsolete or archived, skipping: " + orePid.getValue()); |
|
202 |
continue; |
|
203 |
} |
|
204 |
|
|
205 |
// get the original ORE map |
|
206 |
InputStream originalOreStream = mn.get(orePid); |
|
207 |
Map<Identifier, Map<Identifier, List<Identifier>>> originalOre = ResourceMapFactory.getInstance().parseResourceMap(originalOreStream); |
|
208 |
|
|
209 |
// generate the updated ORE map, in this case we aren't changing any values, just altering the serialization using a newer foresite library |
|
210 |
Identifier updatedOrePid = new Identifier(); |
|
211 |
updatedOrePid.setValue(orePid.getValue() + pidSuffix); |
|
212 |
ResourceMap updatedOre = ResourceMapFactory.getInstance().createResourceMap(updatedOrePid , originalOre.entrySet().iterator().next().getValue()); |
|
213 |
String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(updatedOre); |
|
214 |
Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5"); |
|
215 |
Date today = Calendar.getInstance().getTime(); |
|
216 |
|
|
217 |
// copy the existing SystemMetada into the new SystemMetadata |
|
218 |
SystemMetadata updatedOreSysMeta = new SystemMetadata(); |
|
219 |
BeanUtils.copyProperties(updatedOreSysMeta, originalOreSysMeta); |
|
220 |
|
|
221 |
// set the new SystemMetadata values |
|
222 |
updatedOreSysMeta.setIdentifier(updatedOrePid); |
|
223 |
updatedOreSysMeta.setObsoletes(orePid); |
|
224 |
updatedOreSysMeta.setObsoletedBy(null); |
|
225 |
updatedOreSysMeta.setArchived(false); |
|
226 |
updatedOreSysMeta.setChecksum(oreChecksum); |
|
227 |
updatedOreSysMeta.setSize(BigInteger.valueOf(SystemMetadataFactory.sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING)))); |
|
228 |
updatedOreSysMeta.setDateSysMetadataModified(today); |
|
229 |
updatedOreSysMeta.setDateUploaded(today); |
|
230 |
updatedOreSysMeta.setReplicaList(null); |
|
231 |
|
|
232 |
// save the updated ORE to the MN |
|
233 |
InputStream updatedOreStream = IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING); |
|
234 |
mn.update(null, orePid, updatedOreStream, updatedOrePid, updatedOreSysMeta); |
|
235 |
|
|
236 |
|
|
237 |
} catch (Exception e) { |
|
238 |
log.error("Could not update ORE map: " + orePid, e); |
|
239 |
|
|
240 |
// go to the next record, there's nothing else to do here |
|
241 |
continue; |
|
242 |
} |
|
243 |
|
|
244 |
} |
|
245 |
} |
|
246 |
|
|
101 | 247 |
public static void main(String [] ags){ |
102 | 248 |
|
103 | 249 |
try { |
Also available in: Unified diff
utility methods to update/reserialize existing ORE maps that were generated with older foresite (and included bad dateTime strings).
https://redmine.dataone.org/issues/3046