Project

General

Profile

« Previous | Next » 

Revision 8725

add "test" for generating annotations based on the entity/attribute details of a datapackage. This iterates through all current EML revisions and either updates or creates annotations based on what it finds. It does add content to your metacat deployment (RDF files) but it can be safely re-run when each time we change our annotation algorithm.

View differences:

test/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizerTest.java
20 20

  
21 21
import java.io.ByteArrayInputStream;
22 22
import java.io.InputStream;
23
import java.math.BigInteger;
24
import java.util.ArrayList;
25
import java.util.Arrays;
26
import java.util.Iterator;
27
import java.util.List;
28
import java.util.Map;
29
import java.util.Vector;
23 30

  
24 31
import junit.framework.Test;
25 32
import junit.framework.TestSuite;
26 33

  
34
import org.apache.commons.io.IOUtils;
35
import org.dataone.service.exceptions.NotFound;
27 36
import org.dataone.service.types.v1.Identifier;
28 37
import org.dataone.service.types.v1.ObjectFormatIdentifier;
29 38
import org.dataone.service.types.v1.Session;
30 39
import org.dataone.service.types.v1.SystemMetadata;
31 40

  
41
import edu.ucsb.nceas.metacat.DBUtil;
42
import edu.ucsb.nceas.metacat.DocumentImpl;
43
import edu.ucsb.nceas.metacat.IdentifierManager;
44
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
32 45
import edu.ucsb.nceas.metacat.dataone.D1NodeServiceTest;
33 46
import edu.ucsb.nceas.metacat.dataone.MNodeService;
47
import edu.ucsb.nceas.metacat.replication.ReplicationService;
48
import edu.ucsb.nceas.metacat.util.DocumentUtil;
34 49

  
35 50
public class DatapackageSummarizerTest extends D1NodeServiceTest {
36 51

  
......
62 77
	 */
63 78
	public static Test suite() {
64 79
		TestSuite suite = new TestSuite();
65
		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
80
		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotations"));
81
//		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
66 82
//		suite.addTest(new DatapackageSummarizerTest("testGenerateRandomAnnotation"));
67 83
		return suite;
68 84
	}
......
125 141
		
126 142
		// check that it was parsed?
127 143
	}
144
	
145
	public void testGenerateAnnotations() throws Exception {
146
		
147
		// summarize the packages
148
		DatapackageSummarizer ds = new DatapackageSummarizer();
149
		List<Identifier> identifiers = new ArrayList<Identifier>();
150
		Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
128 151

  
152
		// select the metadata ids we want to summarize
153
		boolean includeReplicas = false;
154
		Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
155
		if (includeReplicas ) {
156
			codeIter = serverCodes.keySet().iterator();
157
		}
158
		
159
		Vector<String> idList = new Vector<String>();
160
		while (codeIter.hasNext()) {
161
			int serverLocation = codeIter.next();
162
			Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
163
			idList.addAll(idList0);
164
			Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
165
			idList.addAll(idList1);
166
			Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
167
			idList.addAll(idList2);
168
			Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
169
			idList.addAll(idList3);
170
		
171
		}
172
		
173
		// go through all the identifiers now
174
		for (String localId : idList) {
175
			try {
176
				String guid = IdentifierManager.getInstance().getGUID(
177
						DocumentUtil.getDocIdFromAccessionNumber(localId), 
178
						DocumentUtil.getRevisionFromAccessionNumber(localId));
179
				Identifier pid = new Identifier();
180
				pid.setValue(guid);
181
				identifiers.add(pid);
182
				
183
				String annotation = ds.generateAnnotation(pid);
184
				Identifier annotationPid = new Identifier();
185
				annotationPid.setValue("http://annotation/" + guid);
186
				Session session = getTestSession();
187
				
188
				SystemMetadata sysmeta = null;
189
				// look for the latest version of the annotation, if there is one
190
				do {
191
					try {
192
						sysmeta = MNodeService.getInstance(request).getSystemMetadata(annotationPid);
193
						if (sysmeta.getObsoletedBy() != null) {
194
							annotationPid.setValue(sysmeta.getObsoletedBy().getValue());
195
						}
196
					} catch (NotFound nf) {
197
						break;
198
					}
199
				} while (sysmeta != null && sysmeta.getObsoletedBy() != null);
200

  
201
				boolean exists = (sysmeta != null);
202
				
203
				InputStream object = null;
204
				object = IOUtils.toInputStream(annotation, "UTF-8");
205
				sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object);
206
				ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
207
				formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar");
208
				sysmeta.setFormatId(formatId);
209
				sysmeta.setSize(BigInteger.valueOf(annotation.getBytes("UTF-8").length));
210
				
211
				// get the stream fresh for inserting/updating
212
				object = IOUtils.toInputStream(annotation, "UTF-8");
213

  
214
				if (!exists) {
215
					MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta);
216
				} else {
217
					Identifier newAnnotationPid = new Identifier();
218
					// use an old-style revision scheme for updating the annotation identifier
219
					String value = annotationPid.getValue();
220
					int rev = DocumentUtil.getRevisionFromAccessionNumber(value);
221
					String partialId = DocumentUtil.getSmartDocId(value);
222
					rev++;
223
					newAnnotationPid.setValue(partialId + "." + rev);
224
					sysmeta.setIdentifier(newAnnotationPid);
225
					sysmeta.setObsoletes(annotationPid);
226
					MNodeService.getInstance(request).update(session, annotationPid, object, newAnnotationPid, sysmeta);
227
				}
228
				
229
				System.out.println("Generated annotation for pid: " + guid);
230
				
231
			} catch (McdbDocNotFoundException nfe) {
232
				// just skip it
233
				continue;
234
			}
235
		}
236
		//System.exit(0);
237
	}
238

  
129 239
}

Also available in: Unified diff