1
|
/** '$RCSfile$'
|
2
|
* Copyright: 2010 Regents of the University of California and the
|
3
|
* National Center for Ecological Analysis and Synthesis
|
4
|
*
|
5
|
* This program is free software; you can redistribute it and/or modify
|
6
|
* it under the terms of the GNU General Public License as published by
|
7
|
* the Free Software Foundation; either version 2 of the License, or
|
8
|
* (at your option) any later version.
|
9
|
*
|
10
|
* This program is distributed in the hope that it will be useful,
|
11
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
* GNU General Public License for more details.
|
14
|
*
|
15
|
* You should have received a copy of the GNU General Public License
|
16
|
* along with this program; if not, write to the Free Software
|
17
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
*/
|
19
|
package edu.ucsb.nceas.metacat.annotation;
|
20
|
|
21
|
import java.io.ByteArrayInputStream;
|
22
|
import java.io.InputStream;
|
23
|
import java.math.BigInteger;
|
24
|
import java.util.ArrayList;
|
25
|
import java.util.Arrays;
|
26
|
import java.util.Iterator;
|
27
|
import java.util.List;
|
28
|
import java.util.Map;
|
29
|
import java.util.Vector;
|
30
|
|
31
|
import junit.framework.Test;
|
32
|
import junit.framework.TestSuite;
|
33
|
|
34
|
import org.apache.commons.io.IOUtils;
|
35
|
import org.dataone.service.exceptions.NotFound;
|
36
|
import org.dataone.service.types.v1.Identifier;
|
37
|
import org.dataone.service.types.v1.ObjectFormatIdentifier;
|
38
|
import org.dataone.service.types.v1.Session;
|
39
|
import org.dataone.service.types.v2.SystemMetadata;
|
40
|
|
41
|
import edu.ucsb.nceas.metacat.DBUtil;
|
42
|
import edu.ucsb.nceas.metacat.DocumentImpl;
|
43
|
import edu.ucsb.nceas.metacat.IdentifierManager;
|
44
|
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
|
45
|
import edu.ucsb.nceas.metacat.dataone.D1NodeServiceTest;
|
46
|
import edu.ucsb.nceas.metacat.dataone.MNodeService;
|
47
|
import edu.ucsb.nceas.metacat.replication.ReplicationService;
|
48
|
import edu.ucsb.nceas.metacat.util.DocumentUtil;
|
49
|
|
50
|
public class DatapackageSummarizerTest extends D1NodeServiceTest {
|
51
|
|
52
|
|
53
|
private static final String ANNOTATION_TEST_DOC = "test/eml-sample-annotation.xml";
|
54
|
|
55
|
/**
|
56
|
* constructor for the test
|
57
|
*/
|
58
|
public DatapackageSummarizerTest(String name) {
|
59
|
super(name);
|
60
|
}
|
61
|
|
62
|
/**
|
63
|
* Establish a testing framework by initializing appropriate objects
|
64
|
*/
|
65
|
public void setUp() throws Exception {
|
66
|
super.setUp();
|
67
|
}
|
68
|
|
69
|
/**
|
70
|
* Release any objects after tests are complete
|
71
|
*/
|
72
|
public void tearDown() {
|
73
|
}
|
74
|
|
75
|
/**
|
76
|
* Create a suite of tests to be run together
|
77
|
*/
|
78
|
public static Test suite() {
|
79
|
TestSuite suite = new TestSuite();
|
80
|
// suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotations"));
|
81
|
// suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
|
82
|
// suite.addTest(new DatapackageSummarizerTest("testStandaloneAnnotation"));
|
83
|
suite.addTest(new DatapackageSummarizerTest("testIndexAnnotations"));
|
84
|
return suite;
|
85
|
}
|
86
|
|
87
|
public void testStandaloneAnnotation() throws Exception {
|
88
|
// insert the test document to sem-index
|
89
|
Identifier metadataPid = new Identifier();
|
90
|
metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
|
91
|
Session session = getTestSession();
|
92
|
try {
|
93
|
InputStream object = new ByteArrayInputStream(this.getTestDocFromFile(ANNOTATION_TEST_DOC).getBytes("UTF-8"));
|
94
|
SystemMetadata sysmeta = createSystemMetadata(metadataPid, session.getSubject(), object);
|
95
|
ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
|
96
|
formatId.setValue("eml://ecoinformatics.org/eml-2.0.0");
|
97
|
sysmeta.setFormatId(formatId);
|
98
|
Identifier pid = MNodeService.getInstance(request).create(session, metadataPid, object, sysmeta);
|
99
|
assertEquals(metadataPid.getValue(), pid.getValue());
|
100
|
} catch (Exception e) {
|
101
|
e.printStackTrace();
|
102
|
fail("Could not add metadata test file: " + e.getMessage());
|
103
|
}
|
104
|
|
105
|
// index it
|
106
|
DatapackageSummarizer ds = new DatapackageSummarizer();
|
107
|
ds.indexEphemeralAnnotation(metadataPid);
|
108
|
|
109
|
// check it
|
110
|
|
111
|
}
|
112
|
|
113
|
/**
|
114
|
* Generate a single annotation based exclusively on the metadata
|
115
|
* @throws Exception
|
116
|
*/
|
117
|
public void testGenerateAnnotation() throws Exception {
|
118
|
Identifier metadataPid = new Identifier();
|
119
|
metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
|
120
|
Session session = getTestSession();
|
121
|
try {
|
122
|
InputStream object = new ByteArrayInputStream(this.getTestDocFromFile(ANNOTATION_TEST_DOC).getBytes("UTF-8"));
|
123
|
SystemMetadata sysmeta = createSystemMetadata(metadataPid, session.getSubject(), object);
|
124
|
ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
|
125
|
formatId.setValue("eml://ecoinformatics.org/eml-2.0.0");
|
126
|
sysmeta.setFormatId(formatId);
|
127
|
Identifier pid = MNodeService.getInstance(request).create(session, metadataPid, object, sysmeta);
|
128
|
assertEquals(metadataPid.getValue(), pid.getValue());
|
129
|
} catch (Exception e) {
|
130
|
e.printStackTrace();
|
131
|
fail("Could not add metadata test file: " + e.getMessage());
|
132
|
}
|
133
|
|
134
|
// generate the annotation for the metadata
|
135
|
DatapackageSummarizer ds = new DatapackageSummarizer();
|
136
|
String rdfContent = ds.generateAnnotation(metadataPid);
|
137
|
|
138
|
// save the annotation
|
139
|
Identifier annotationPid = new Identifier();
|
140
|
annotationPid.setValue("http://annotation/" + metadataPid.getValue());
|
141
|
try {
|
142
|
InputStream object = new ByteArrayInputStream(rdfContent.getBytes("UTF-8"));
|
143
|
SystemMetadata sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object);
|
144
|
ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
|
145
|
formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar");
|
146
|
sysmeta.setFormatId(formatId);
|
147
|
Identifier pid = MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta);
|
148
|
assertEquals(annotationPid.getValue(), pid.getValue());
|
149
|
} catch (Exception e) {
|
150
|
e.printStackTrace();
|
151
|
fail("Could not add annotation test file: " + e.getMessage());
|
152
|
}
|
153
|
|
154
|
// check that it was parsed?
|
155
|
}
|
156
|
|
157
|
public void testIndexAnnotations() throws Exception {
|
158
|
testGenerateAnnotations(true);
|
159
|
}
|
160
|
|
161
|
public void testGenerateAnnotations() throws Exception {
|
162
|
testGenerateAnnotations(false);
|
163
|
}
|
164
|
|
165
|
private void testGenerateAnnotations(boolean indexOnly) throws Exception {
|
166
|
|
167
|
// summarize the packages
|
168
|
DatapackageSummarizer ds = new DatapackageSummarizer();
|
169
|
List<Identifier> identifiers = new ArrayList<Identifier>();
|
170
|
Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
|
171
|
|
172
|
// select the metadata ids we want to summarize
|
173
|
boolean includeReplicas = false;
|
174
|
Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
|
175
|
if (includeReplicas ) {
|
176
|
codeIter = serverCodes.keySet().iterator();
|
177
|
}
|
178
|
|
179
|
Vector<String> idList = new Vector<String>();
|
180
|
while (codeIter.hasNext()) {
|
181
|
int serverLocation = codeIter.next();
|
182
|
Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
|
183
|
idList.addAll(idList0);
|
184
|
Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
|
185
|
idList.addAll(idList1);
|
186
|
Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
|
187
|
idList.addAll(idList2);
|
188
|
Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
|
189
|
idList.addAll(idList3);
|
190
|
|
191
|
}
|
192
|
|
193
|
// go through all the identifiers now
|
194
|
for (String localId : idList) {
|
195
|
try {
|
196
|
String guid = IdentifierManager.getInstance().getGUID(
|
197
|
DocumentUtil.getDocIdFromAccessionNumber(localId),
|
198
|
DocumentUtil.getRevisionFromAccessionNumber(localId));
|
199
|
Identifier pid = new Identifier();
|
200
|
pid.setValue(guid);
|
201
|
identifiers.add(pid);
|
202
|
|
203
|
// just index the annotation, don't save it
|
204
|
if (indexOnly) {
|
205
|
ds.indexEphemeralAnnotation(pid);
|
206
|
continue;
|
207
|
}
|
208
|
|
209
|
String annotation = ds.generateAnnotation(pid);
|
210
|
Identifier annotationPid = new Identifier();
|
211
|
annotationPid.setValue("http://annotation/" + guid);
|
212
|
Session session = getTestSession();
|
213
|
|
214
|
SystemMetadata sysmeta = null;
|
215
|
// look for the latest version of the annotation, if there is one
|
216
|
do {
|
217
|
try {
|
218
|
sysmeta = MNodeService.getInstance(request).getSystemMetadata(null, annotationPid);
|
219
|
if (sysmeta.getObsoletedBy() != null) {
|
220
|
annotationPid.setValue(sysmeta.getObsoletedBy().getValue());
|
221
|
}
|
222
|
} catch (NotFound nf) {
|
223
|
break;
|
224
|
}
|
225
|
} while (sysmeta != null && sysmeta.getObsoletedBy() != null);
|
226
|
|
227
|
boolean exists = (sysmeta != null);
|
228
|
|
229
|
InputStream object = null;
|
230
|
object = IOUtils.toInputStream(annotation, "UTF-8");
|
231
|
sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object);
|
232
|
ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
|
233
|
formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar");
|
234
|
sysmeta.setFormatId(formatId);
|
235
|
sysmeta.setSize(BigInteger.valueOf(annotation.getBytes("UTF-8").length));
|
236
|
|
237
|
// get the stream fresh for inserting/updating
|
238
|
object = IOUtils.toInputStream(annotation, "UTF-8");
|
239
|
|
240
|
if (!exists) {
|
241
|
MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta);
|
242
|
} else {
|
243
|
Identifier newAnnotationPid = new Identifier();
|
244
|
// use an old-style revision scheme for updating the annotation identifier
|
245
|
String value = annotationPid.getValue();
|
246
|
int rev = DocumentUtil.getRevisionFromAccessionNumber(value);
|
247
|
String partialId = DocumentUtil.getSmartDocId(value);
|
248
|
rev++;
|
249
|
newAnnotationPid.setValue(partialId + "." + rev);
|
250
|
sysmeta.setIdentifier(newAnnotationPid);
|
251
|
sysmeta.setObsoletes(annotationPid);
|
252
|
MNodeService.getInstance(request).update(session, annotationPid, object, newAnnotationPid, sysmeta);
|
253
|
}
|
254
|
|
255
|
System.out.println("Generated annotation for pid: " + guid);
|
256
|
|
257
|
} catch (McdbDocNotFoundException nfe) {
|
258
|
// just skip it
|
259
|
continue;
|
260
|
}
|
261
|
}
|
262
|
//System.exit(0);
|
263
|
}
|
264
|
|
265
|
}
|