Revision 8725
Added by ben leinfelder over 10 years ago
test/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizerTest.java | ||
---|---|---|
20 | 20 |
|
21 | 21 |
import java.io.ByteArrayInputStream; |
22 | 22 |
import java.io.InputStream; |
23 |
import java.math.BigInteger; |
|
24 |
import java.util.ArrayList; |
|
25 |
import java.util.Arrays; |
|
26 |
import java.util.Iterator; |
|
27 |
import java.util.List; |
|
28 |
import java.util.Map; |
|
29 |
import java.util.Vector; |
|
23 | 30 |
|
24 | 31 |
import junit.framework.Test; |
25 | 32 |
import junit.framework.TestSuite; |
26 | 33 |
|
34 |
import org.apache.commons.io.IOUtils; |
|
35 |
import org.dataone.service.exceptions.NotFound; |
|
27 | 36 |
import org.dataone.service.types.v1.Identifier; |
28 | 37 |
import org.dataone.service.types.v1.ObjectFormatIdentifier; |
29 | 38 |
import org.dataone.service.types.v1.Session; |
30 | 39 |
import org.dataone.service.types.v1.SystemMetadata; |
31 | 40 |
|
41 |
import edu.ucsb.nceas.metacat.DBUtil; |
|
42 |
import edu.ucsb.nceas.metacat.DocumentImpl; |
|
43 |
import edu.ucsb.nceas.metacat.IdentifierManager; |
|
44 |
import edu.ucsb.nceas.metacat.McdbDocNotFoundException; |
|
32 | 45 |
import edu.ucsb.nceas.metacat.dataone.D1NodeServiceTest; |
33 | 46 |
import edu.ucsb.nceas.metacat.dataone.MNodeService; |
47 |
import edu.ucsb.nceas.metacat.replication.ReplicationService; |
|
48 |
import edu.ucsb.nceas.metacat.util.DocumentUtil; |
|
34 | 49 |
|
35 | 50 |
public class DatapackageSummarizerTest extends D1NodeServiceTest { |
36 | 51 |
|
... | ... | |
62 | 77 |
*/ |
63 | 78 |
public static Test suite() { |
64 | 79 |
TestSuite suite = new TestSuite(); |
65 |
suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation")); |
|
80 |
suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotations")); |
|
81 |
// suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation")); |
|
66 | 82 |
// suite.addTest(new DatapackageSummarizerTest("testGenerateRandomAnnotation")); |
67 | 83 |
return suite; |
68 | 84 |
} |
... | ... | |
125 | 141 |
|
126 | 142 |
// check that it was parsed? |
127 | 143 |
} |
144 |
|
|
145 |
public void testGenerateAnnotations() throws Exception { |
|
146 |
|
|
147 |
// summarize the packages |
|
148 |
DatapackageSummarizer ds = new DatapackageSummarizer(); |
|
149 |
List<Identifier> identifiers = new ArrayList<Identifier>(); |
|
150 |
Map<Integer, String> serverCodes = ReplicationService.getServerCodes(); |
|
128 | 151 |
|
152 |
// select the metadata ids we want to summarize |
|
153 |
boolean includeReplicas = false; |
|
154 |
Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator(); |
|
155 |
if (includeReplicas ) { |
|
156 |
codeIter = serverCodes.keySet().iterator(); |
|
157 |
} |
|
158 |
|
|
159 |
Vector<String> idList = new Vector<String>(); |
|
160 |
while (codeIter.hasNext()) { |
|
161 |
int serverLocation = codeIter.next(); |
|
162 |
Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation); |
|
163 |
idList.addAll(idList0); |
|
164 |
Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation); |
|
165 |
idList.addAll(idList1); |
|
166 |
Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation); |
|
167 |
idList.addAll(idList2); |
|
168 |
Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation); |
|
169 |
idList.addAll(idList3); |
|
170 |
|
|
171 |
} |
|
172 |
|
|
173 |
// go through all the identifiers now |
|
174 |
for (String localId : idList) { |
|
175 |
try { |
|
176 |
String guid = IdentifierManager.getInstance().getGUID( |
|
177 |
DocumentUtil.getDocIdFromAccessionNumber(localId), |
|
178 |
DocumentUtil.getRevisionFromAccessionNumber(localId)); |
|
179 |
Identifier pid = new Identifier(); |
|
180 |
pid.setValue(guid); |
|
181 |
identifiers.add(pid); |
|
182 |
|
|
183 |
String annotation = ds.generateAnnotation(pid); |
|
184 |
Identifier annotationPid = new Identifier(); |
|
185 |
annotationPid.setValue("http://annotation/" + guid); |
|
186 |
Session session = getTestSession(); |
|
187 |
|
|
188 |
SystemMetadata sysmeta = null; |
|
189 |
// look for the latest version of the annotation, if there is one |
|
190 |
do { |
|
191 |
try { |
|
192 |
sysmeta = MNodeService.getInstance(request).getSystemMetadata(annotationPid); |
|
193 |
if (sysmeta.getObsoletedBy() != null) { |
|
194 |
annotationPid.setValue(sysmeta.getObsoletedBy().getValue()); |
|
195 |
} |
|
196 |
} catch (NotFound nf) { |
|
197 |
break; |
|
198 |
} |
|
199 |
} while (sysmeta != null && sysmeta.getObsoletedBy() != null); |
|
200 |
|
|
201 |
boolean exists = (sysmeta != null); |
|
202 |
|
|
203 |
InputStream object = null; |
|
204 |
object = IOUtils.toInputStream(annotation, "UTF-8"); |
|
205 |
sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object); |
|
206 |
ObjectFormatIdentifier formatId = new ObjectFormatIdentifier(); |
|
207 |
formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar"); |
|
208 |
sysmeta.setFormatId(formatId); |
|
209 |
sysmeta.setSize(BigInteger.valueOf(annotation.getBytes("UTF-8").length)); |
|
210 |
|
|
211 |
// get the stream fresh for inserting/updating |
|
212 |
object = IOUtils.toInputStream(annotation, "UTF-8"); |
|
213 |
|
|
214 |
if (!exists) { |
|
215 |
MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta); |
|
216 |
} else { |
|
217 |
Identifier newAnnotationPid = new Identifier(); |
|
218 |
// use an old-style revision scheme for updating the annotation identifier |
|
219 |
String value = annotationPid.getValue(); |
|
220 |
int rev = DocumentUtil.getRevisionFromAccessionNumber(value); |
|
221 |
String partialId = DocumentUtil.getSmartDocId(value); |
|
222 |
rev++; |
|
223 |
newAnnotationPid.setValue(partialId + "." + rev); |
|
224 |
sysmeta.setIdentifier(newAnnotationPid); |
|
225 |
sysmeta.setObsoletes(annotationPid); |
|
226 |
MNodeService.getInstance(request).update(session, annotationPid, object, newAnnotationPid, sysmeta); |
|
227 |
} |
|
228 |
|
|
229 |
System.out.println("Generated annotation for pid: " + guid); |
|
230 |
|
|
231 |
} catch (McdbDocNotFoundException nfe) { |
|
232 |
// just skip it |
|
233 |
continue; |
|
234 |
} |
|
235 |
} |
|
236 |
//System.exit(0); |
|
237 |
} |
|
238 |
|
|
129 | 239 |
} |
Also available in: Unified diff
add "test" for generating annotations based on the entity/attribute details of a datapackage. This iterates through all current EML revisions and either updates or creates annotations based on what it finds. It does add content to your metacat deployment (RDF files) but it can be safely re-run when each time we change our annotation algorithm.