Project

General

Profile

1
/**  '$RCSfile$'
2
 *  Copyright: 2010 Regents of the University of California and the
3
 *              National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.annotation;
20

    
21
import java.io.ByteArrayInputStream;
22
import java.io.InputStream;
23
import java.math.BigInteger;
24
import java.util.ArrayList;
25
import java.util.Arrays;
26
import java.util.Iterator;
27
import java.util.List;
28
import java.util.Map;
29
import java.util.Vector;
30

    
31
import junit.framework.Test;
32
import junit.framework.TestSuite;
33

    
34
import org.apache.commons.io.IOUtils;
35
import org.dataone.service.exceptions.NotFound;
36
import org.dataone.service.types.v1.Identifier;
37
import org.dataone.service.types.v1.ObjectFormatIdentifier;
38
import org.dataone.service.types.v1.Session;
39
import org.dataone.service.types.v1.SystemMetadata;
40

    
41
import edu.ucsb.nceas.metacat.DBUtil;
42
import edu.ucsb.nceas.metacat.DocumentImpl;
43
import edu.ucsb.nceas.metacat.IdentifierManager;
44
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
45
import edu.ucsb.nceas.metacat.dataone.D1NodeServiceTest;
46
import edu.ucsb.nceas.metacat.dataone.MNodeService;
47
import edu.ucsb.nceas.metacat.replication.ReplicationService;
48
import edu.ucsb.nceas.metacat.util.DocumentUtil;
49

    
50
public class DatapackageSummarizerTest extends D1NodeServiceTest {
51

    
52
	
53
    private static final String ANNOTATION_TEST_DOC = "test/eml-sample-annotation.xml";
54

    
55
	/**
56
	 * constructor for the test
57
	 */
58
	public DatapackageSummarizerTest(String name) {
59
		super(name);
60
	}
61

    
62
	/**
63
	 * Establish a testing framework by initializing appropriate objects
64
	 */
65
	public void setUp() throws Exception {
66
		super.setUp();
67
	}
68

    
69
	/**
70
	 * Release any objects after tests are complete
71
	 */
72
	public void tearDown() {
73
	}
74

    
75
	/**
76
	 * Create a suite of tests to be run together
77
	 */
78
	public static Test suite() {
79
		TestSuite suite = new TestSuite();
80
//		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotations"));
81
//		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
82
		suite.addTest(new DatapackageSummarizerTest("testStandaloneAnnotation"));
83
//		suite.addTest(new DatapackageSummarizerTest("testGenerateRandomAnnotation"));
84
		return suite;
85
	}
86
	
87
	public void testStandaloneAnnotation() throws Exception {
88
		// insert the test document to sem-index
89
		Identifier metadataPid = new Identifier();
90
		metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
91
		Session session = getTestSession();
92
		try {
93
			InputStream object = new ByteArrayInputStream(this.getTestDocFromFile(ANNOTATION_TEST_DOC).getBytes("UTF-8"));
94
			SystemMetadata sysmeta = createSystemMetadata(metadataPid, session.getSubject(), object);
95
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
96
			formatId.setValue("eml://ecoinformatics.org/eml-2.0.0");
97
			sysmeta.setFormatId(formatId);
98
			Identifier pid = MNodeService.getInstance(request).create(session, metadataPid, object, sysmeta);
99
			assertEquals(metadataPid.getValue(), pid.getValue());
100
		} catch (Exception e) {
101
			e.printStackTrace();
102
			fail("Could not add metadata test file: " + e.getMessage());
103
		}
104
		
105
		// index it
106
		DatapackageSummarizer ds = new DatapackageSummarizer();
107
		ds.indexEphemeralAnnotation(metadataPid);
108
		
109
		// check it
110
		
111
	}
112
	
113
	/**
114
	 * Generate a single annotation based exclusively on the metadata
115
	 * @throws Exception
116
	 */
117
	public void testGenerateAnnotation() throws Exception {
118
		this.testGenerateAnnotation_base(false);
119
	}
120
	
121
	/**
122
	 * Generate a bunch of random annotations
123
	 * @throws Exception
124
	 */
125
	public void testGenerateRandomAnnotation() throws Exception {
126
		for (int i = 0; i < 5; i++) {
127
			this.testGenerateAnnotation_base(true);
128
		}
129
	}
130

    
131
	private void testGenerateAnnotation_base(boolean randomize) throws Exception {
132
		Identifier metadataPid = new Identifier();
133
		metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
134
		Session session = getTestSession();
135
		try {
136
			InputStream object = new ByteArrayInputStream(this.getTestDocFromFile(ANNOTATION_TEST_DOC).getBytes("UTF-8"));
137
			SystemMetadata sysmeta = createSystemMetadata(metadataPid, session.getSubject(), object);
138
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
139
			formatId.setValue("eml://ecoinformatics.org/eml-2.0.0");
140
			sysmeta.setFormatId(formatId);
141
			Identifier pid = MNodeService.getInstance(request).create(session, metadataPid, object, sysmeta);
142
			assertEquals(metadataPid.getValue(), pid.getValue());
143
		} catch (Exception e) {
144
			e.printStackTrace();
145
			fail("Could not add metadata test file: " + e.getMessage());
146
		}
147

    
148
		// generate the annotation for the metadata
149
		DatapackageSummarizer ds = new DatapackageSummarizer();
150
		ds.randomize = randomize;
151
		String rdfContent = ds.generateAnnotation(metadataPid);
152
		
153
		// save the annotation
154
		Identifier annotationPid = new Identifier();
155
		annotationPid.setValue("http://annotation/" + metadataPid.getValue());
156
		try {
157
			InputStream object = new ByteArrayInputStream(rdfContent.getBytes("UTF-8"));
158
			SystemMetadata sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object);
159
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
160
			formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar");
161
			sysmeta.setFormatId(formatId);
162
			Identifier pid = MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta);
163
			assertEquals(annotationPid.getValue(), pid.getValue());
164
		} catch (Exception e) {
165
			e.printStackTrace();
166
			fail("Could not add annotation test file: " + e.getMessage());
167
		}
168
		
169
		// check that it was parsed?
170
	}
171
	
172
	public void testGenerateAnnotations() throws Exception {
173
		
174
		// summarize the packages
175
		DatapackageSummarizer ds = new DatapackageSummarizer();
176
		List<Identifier> identifiers = new ArrayList<Identifier>();
177
		Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
178

    
179
		// select the metadata ids we want to summarize
180
		boolean includeReplicas = false;
181
		Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
182
		if (includeReplicas ) {
183
			codeIter = serverCodes.keySet().iterator();
184
		}
185
		
186
		Vector<String> idList = new Vector<String>();
187
		while (codeIter.hasNext()) {
188
			int serverLocation = codeIter.next();
189
			Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
190
			idList.addAll(idList0);
191
			Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
192
			idList.addAll(idList1);
193
			Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
194
			idList.addAll(idList2);
195
			Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
196
			idList.addAll(idList3);
197
		
198
		}
199
		
200
		// go through all the identifiers now
201
		for (String localId : idList) {
202
			try {
203
				String guid = IdentifierManager.getInstance().getGUID(
204
						DocumentUtil.getDocIdFromAccessionNumber(localId), 
205
						DocumentUtil.getRevisionFromAccessionNumber(localId));
206
				Identifier pid = new Identifier();
207
				pid.setValue(guid);
208
				identifiers.add(pid);
209
				
210
				String annotation = ds.generateAnnotation(pid);
211
				Identifier annotationPid = new Identifier();
212
				annotationPid.setValue("http://annotation/" + guid);
213
				Session session = getTestSession();
214
				
215
				SystemMetadata sysmeta = null;
216
				// look for the latest version of the annotation, if there is one
217
				do {
218
					try {
219
						sysmeta = MNodeService.getInstance(request).getSystemMetadata(annotationPid);
220
						if (sysmeta.getObsoletedBy() != null) {
221
							annotationPid.setValue(sysmeta.getObsoletedBy().getValue());
222
						}
223
					} catch (NotFound nf) {
224
						break;
225
					}
226
				} while (sysmeta != null && sysmeta.getObsoletedBy() != null);
227

    
228
				boolean exists = (sysmeta != null);
229
				
230
				InputStream object = null;
231
				object = IOUtils.toInputStream(annotation, "UTF-8");
232
				sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object);
233
				ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
234
				formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar");
235
				sysmeta.setFormatId(formatId);
236
				sysmeta.setSize(BigInteger.valueOf(annotation.getBytes("UTF-8").length));
237
				
238
				// get the stream fresh for inserting/updating
239
				object = IOUtils.toInputStream(annotation, "UTF-8");
240

    
241
				if (!exists) {
242
					MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta);
243
				} else {
244
					Identifier newAnnotationPid = new Identifier();
245
					// use an old-style revision scheme for updating the annotation identifier
246
					String value = annotationPid.getValue();
247
					int rev = DocumentUtil.getRevisionFromAccessionNumber(value);
248
					String partialId = DocumentUtil.getSmartDocId(value);
249
					rev++;
250
					newAnnotationPid.setValue(partialId + "." + rev);
251
					sysmeta.setIdentifier(newAnnotationPid);
252
					sysmeta.setObsoletes(annotationPid);
253
					MNodeService.getInstance(request).update(session, annotationPid, object, newAnnotationPid, sysmeta);
254
				}
255
				
256
				System.out.println("Generated annotation for pid: " + guid);
257
				
258
			} catch (McdbDocNotFoundException nfe) {
259
				// just skip it
260
				continue;
261
			}
262
		}
263
		//System.exit(0);
264
	}
265

    
266
}
(2-2/3)