Project

General

Profile

1
/**  '$RCSfile$'
2
 *  Copyright: 2010 Regents of the University of California and the
3
 *              National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.annotation;
20

    
21
import java.io.ByteArrayInputStream;
22
import java.io.InputStream;
23
import java.math.BigInteger;
24
import java.util.ArrayList;
25
import java.util.Arrays;
26
import java.util.Iterator;
27
import java.util.List;
28
import java.util.Map;
29
import java.util.Vector;
30

    
31
import junit.framework.Test;
32
import junit.framework.TestSuite;
33

    
34
import org.apache.commons.io.IOUtils;
35
import org.dataone.service.exceptions.NotFound;
36
import org.dataone.service.types.v1.Identifier;
37
import org.dataone.service.types.v1.ObjectFormatIdentifier;
38
import org.dataone.service.types.v1.Session;
39
import org.dataone.service.types.v2.SystemMetadata;
40

    
41
import edu.ucsb.nceas.metacat.DBUtil;
42
import edu.ucsb.nceas.metacat.DocumentImpl;
43
import edu.ucsb.nceas.metacat.IdentifierManager;
44
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
45
import edu.ucsb.nceas.metacat.dataone.D1NodeServiceTest;
46
import edu.ucsb.nceas.metacat.dataone.MNodeService;
47
import edu.ucsb.nceas.metacat.replication.ReplicationService;
48
import edu.ucsb.nceas.metacat.util.DocumentUtil;
49

    
50
public class DatapackageSummarizerTest extends D1NodeServiceTest {
51

    
52
	
53
    private static final String ANNOTATION_TEST_DOC = "test/eml-sample-annotation.xml";
54

    
55
	/**
56
	 * constructor for the test
57
	 */
58
	public DatapackageSummarizerTest(String name) {
59
		super(name);
60
	}
61

    
62
	/**
63
	 * Establish a testing framework by initializing appropriate objects
64
	 */
65
	public void setUp() throws Exception {
66
		super.setUp();
67
	}
68

    
69
	/**
70
	 * Release any objects after tests are complete
71
	 */
72
	public void tearDown() {
73
	}
74

    
75
	/**
76
	 * Create a suite of tests to be run together
77
	 */
78
	public static Test suite() {
79
		TestSuite suite = new TestSuite();
80
//		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotations"));
81
//		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
82
//		suite.addTest(new DatapackageSummarizerTest("testStandaloneAnnotation"));
83
		suite.addTest(new DatapackageSummarizerTest("testIndexAnnotations"));
84
		return suite;
85
	}
86
	
87
	public void testStandaloneAnnotation() throws Exception {
88
		// insert the test document to sem-index
89
		Identifier metadataPid = new Identifier();
90
		metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
91
		Session session = getTestSession();
92
		try {
93
			InputStream object = new ByteArrayInputStream(this.getTestDocFromFile(ANNOTATION_TEST_DOC).getBytes("UTF-8"));
94
			SystemMetadata sysmeta = createSystemMetadata(metadataPid, session.getSubject(), object);
95
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
96
			formatId.setValue("eml://ecoinformatics.org/eml-2.0.0");
97
			sysmeta.setFormatId(formatId);
98
			Identifier pid = MNodeService.getInstance(request).create(session, metadataPid, object, sysmeta);
99
			assertEquals(metadataPid.getValue(), pid.getValue());
100
		} catch (Exception e) {
101
			e.printStackTrace();
102
			fail("Could not add metadata test file: " + e.getMessage());
103
		}
104
		
105
		// index it
106
		DatapackageSummarizer ds = new DatapackageSummarizer();
107
		ds.indexEphemeralAnnotation(metadataPid);
108
		
109
		// check it
110
		
111
	}
112
	
113
	/**
114
	 * Generate a single annotation based exclusively on the metadata
115
	 * @throws Exception
116
	 */
117
	public void testGenerateAnnotation() throws Exception {
118
		Identifier metadataPid = new Identifier();
119
		metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
120
		Session session = getTestSession();
121
		try {
122
			InputStream object = new ByteArrayInputStream(this.getTestDocFromFile(ANNOTATION_TEST_DOC).getBytes("UTF-8"));
123
			SystemMetadata sysmeta = createSystemMetadata(metadataPid, session.getSubject(), object);
124
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
125
			formatId.setValue("eml://ecoinformatics.org/eml-2.0.0");
126
			sysmeta.setFormatId(formatId);
127
			Identifier pid = MNodeService.getInstance(request).create(session, metadataPid, object, sysmeta);
128
			assertEquals(metadataPid.getValue(), pid.getValue());
129
		} catch (Exception e) {
130
			e.printStackTrace();
131
			fail("Could not add metadata test file: " + e.getMessage());
132
		}
133

    
134
		// generate the annotation for the metadata
135
		DatapackageSummarizer ds = new DatapackageSummarizer();
136
		String rdfContent = ds.generateAnnotation(metadataPid);
137
		
138
		// save the annotation
139
		Identifier annotationPid = new Identifier();
140
		annotationPid.setValue("http://annotation/" + metadataPid.getValue());
141
		try {
142
			InputStream object = new ByteArrayInputStream(rdfContent.getBytes("UTF-8"));
143
			SystemMetadata sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object);
144
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
145
			formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar");
146
			sysmeta.setFormatId(formatId);
147
			Identifier pid = MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta);
148
			assertEquals(annotationPid.getValue(), pid.getValue());
149
		} catch (Exception e) {
150
			e.printStackTrace();
151
			fail("Could not add annotation test file: " + e.getMessage());
152
		}
153
		
154
		// check that it was parsed?
155
	}
156
	
157
	public void testIndexAnnotations() throws Exception {
158
		testGenerateAnnotations(true);
159
	}
160
	
161
	public void testGenerateAnnotations() throws Exception {
162
		testGenerateAnnotations(false);
163
	}
164
	
165
	private void testGenerateAnnotations(boolean indexOnly) throws Exception {
166
		
167
		// summarize the packages
168
		DatapackageSummarizer ds = new DatapackageSummarizer();
169
		List<Identifier> identifiers = new ArrayList<Identifier>();
170
		Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
171

    
172
		// select the metadata ids we want to summarize
173
		boolean includeReplicas = false;
174
		Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
175
		if (includeReplicas ) {
176
			codeIter = serverCodes.keySet().iterator();
177
		}
178
		
179
		Vector<String> idList = new Vector<String>();
180
		while (codeIter.hasNext()) {
181
			int serverLocation = codeIter.next();
182
			Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
183
			idList.addAll(idList0);
184
			Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
185
			idList.addAll(idList1);
186
			Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
187
			idList.addAll(idList2);
188
			Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
189
			idList.addAll(idList3);
190
		
191
		}
192
		
193
		// go through all the identifiers now
194
		for (String localId : idList) {
195
			try {
196
				String guid = IdentifierManager.getInstance().getGUID(
197
						DocumentUtil.getDocIdFromAccessionNumber(localId), 
198
						DocumentUtil.getRevisionFromAccessionNumber(localId));
199
				Identifier pid = new Identifier();
200
				pid.setValue(guid);
201
				identifiers.add(pid);
202
				
203
				// just index the annotation, don't save it
204
				if (indexOnly) {
205
					ds.indexEphemeralAnnotation(pid);
206
					continue;
207
				}
208
				
209
				String annotation = ds.generateAnnotation(pid);
210
				Identifier annotationPid = new Identifier();
211
				annotationPid.setValue("http://annotation/" + guid);
212
				Session session = getTestSession();
213
				
214
				SystemMetadata sysmeta = null;
215
				// look for the latest version of the annotation, if there is one
216
				do {
217
					try {
218
						sysmeta = MNodeService.getInstance(request).getSystemMetadata(null, annotationPid);
219
						if (sysmeta.getObsoletedBy() != null) {
220
							annotationPid.setValue(sysmeta.getObsoletedBy().getValue());
221
						}
222
					} catch (NotFound nf) {
223
						break;
224
					}
225
				} while (sysmeta != null && sysmeta.getObsoletedBy() != null);
226

    
227
				boolean exists = (sysmeta != null);
228
				
229
				InputStream object = null;
230
				object = IOUtils.toInputStream(annotation, "UTF-8");
231
				sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object);
232
				ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
233
				formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar");
234
				sysmeta.setFormatId(formatId);
235
				sysmeta.setSize(BigInteger.valueOf(annotation.getBytes("UTF-8").length));
236
				
237
				// get the stream fresh for inserting/updating
238
				object = IOUtils.toInputStream(annotation, "UTF-8");
239

    
240
				if (!exists) {
241
					MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta);
242
				} else {
243
					Identifier newAnnotationPid = new Identifier();
244
					// use an old-style revision scheme for updating the annotation identifier
245
					String value = annotationPid.getValue();
246
					int rev = DocumentUtil.getRevisionFromAccessionNumber(value);
247
					String partialId = DocumentUtil.getSmartDocId(value);
248
					rev++;
249
					newAnnotationPid.setValue(partialId + "." + rev);
250
					sysmeta.setIdentifier(newAnnotationPid);
251
					sysmeta.setObsoletes(annotationPid);
252
					MNodeService.getInstance(request).update(session, annotationPid, object, newAnnotationPid, sysmeta);
253
				}
254
				
255
				System.out.println("Generated annotation for pid: " + guid);
256
				
257
			} catch (McdbDocNotFoundException nfe) {
258
				// just skip it
259
				continue;
260
			}
261
		}
262
		//System.exit(0);
263
	}
264

    
265
}
(2-2/4)