Project

General

Profile

« Previous | Next » 

Revision 6538

including newer d1 libclient that uses Foresite (and Jena) to construct/parse ORE resource maps for DataONE

View differences:

test/edu/ucsb/nceas/metacat/util/MetacatPopulatorTest.java
77 77
            MetacatPopulator mp = new MetacatPopulator(
78 78
                    "http://knb.ecoinformatics.org/knb",  
79 79
                    /*"http://localhost:8080/knb"*/
80
                    "http://knb-mn.ecoinformatics.org/knb/d1", 
81
                    /*"msucci"*//*"connolly.301"*/ "frog", 
80
                    "https://demo3.test.dataone.org/knb/d1/mn/v1", 
81
                    //"https://knb-test-1.test.dataone.org/knb/d1/mn/v1", 
82
                    /*"msucci"*//*"connolly.301"*/ 
83
                    "tao.1.1", //"frog", 
82 84
                    "uid=kepler,o=unaffiliated,dc=ecoinformatics,dc=org", 
83 85
                    "kepler");
84 86
            mp.populate();
src/edu/ucsb/nceas/metacat/util/MetacatPopulator.java
31 31
import java.math.BigInteger;
32 32
import java.net.HttpURLConnection;
33 33
import java.net.URL;
34
import java.util.ArrayList;
34 35
import java.util.Calendar;
35 36
import java.util.Date;
37
import java.util.HashMap;
38
import java.util.List;
39
import java.util.Map;
36 40
import java.util.Vector;
37 41

  
38 42
import javax.activation.DataHandler;
......
45 49
import org.dataone.client.MNode;
46 50
import org.dataone.client.ObjectFormatCache;
47 51
import org.dataone.client.auth.CertificateManager;
52
import org.dataone.ore.ResourceMapFactory;
48 53
import org.dataone.service.exceptions.NotFound;
49 54
import org.dataone.service.types.v1.AccessPolicy;
50 55
import org.dataone.service.types.v1.AccessRule;
......
58 63
import org.dataone.service.types.v1.SystemMetadata;
59 64
import org.dataone.service.types.v1.util.ChecksumUtil;
60 65
import org.dataone.service.util.Constants;
66
import org.dspace.foresite.ResourceMap;
61 67
import org.ecoinformatics.datamanager.DataManager;
62 68
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
63 69
import org.ecoinformatics.datamanager.parser.DataPackage;
......
79 85
    private String username = null;
80 86
    private String password = null;
81 87
    private Session session = null;
88
    private String subjectDN = null;
82 89
    
83 90
    /**
84 91
     * create a new MetacatPopulator with given source and destination urls.  
......
100 107
        this.destinationUrl = destUrl;
101 108
        // TODO: use specific certificate?
102 109
        this.session = null; //new Session();
110
        this.subjectDN = CertificateManager.getInstance().getSubjectDN(CertificateManager.getInstance().loadCertificate());
103 111
    }
104 112
    
105 113
    /**
......
130 138
        MNode mn = D1Client.getMN(destinationUrl + "/");
131 139
        
132 140
        printHeader("Processing " + docs.size() + " results.");
133
        for(int i=0; i<docs.size(); i++)
134
        {
141
        for (int i=0; i<docs.size(); i++) {
142
        	
143
        	// for generating the ORE map
144
            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
145
            List<Identifier> dataIds = new ArrayList<Identifier>();
146
            
135 147
            //for each document in the query
136 148
            Document doc = docs.get(i);
137 149
            String docid = doc.docid;
......
147 159
        	DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
148 160
        	DataPackage dataPackage = dataManager.parseMetadata(is);
149 161
        	
150
            if (dataPackage == null)
151
            {
162
            if (dataPackage == null) {
152 163
                continue;
153 164
            }
165
            
154 166
            //go through the DistributionMetadata and download any described data
155
            
156 167
            is = stringToStream(doctext);
157 168
            doc.doctext = doctext;
158 169

  
159 170
            printHeader("creating document on destination " + destinationUrl);            
160 171
            SystemMetadata sysmeta = generateSystemMetadata(doc);
172
            
173
            // iterate through the data objects
161 174
            if (dataPackage.getEntityList() != null) {
162
	            for(int j=0; j < dataPackage.getEntityList().length; j++)
163
	            {
175
	            for (int j=0; j < dataPackage.getEntityList().length; j++) {
164 176
	                String dataDocUrl = dataPackage.getEntityList()[j].getURL();
165
	                String dataDocMimeType = 
166
	                	dataPackage.getEntityList()[j].getDataFormat();
177
	                String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
167 178
	                if (dataDocMimeType == null) {
168 179
		                dataDocMimeType = 
169 180
		                	ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFmtid().getValue();
170 181
	                }
171 182
	                String dataDocLocalId = "";
172
	                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
173
	                { //we only handle ecogrid urls right now
183
	                if (dataDocUrl.trim().startsWith("ecogrid://knb/")) { //we only handle ecogrid urls right now
174 184
	                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
175 185
	                            "ecogrid://knb/".length(), dataDocUrl.length());
176 186
	                    //get the file
......
182 192
	                    Identifier did = new Identifier();
183 193
	                    did.setValue(dataDocLocalId);
184 194
	                    
185
	                    //add the desribeby to the eml's sysmeta
186
	                    // TODO Use ORE
187
//	                    System.out.println("adding describe for doc " + 
188
//	                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
189
//	                    sysmeta.addDescribe(did);
195
	                    // add the data identifier for ORE map 
196
	                    dataIds.add(did);
190 197
	                    
191 198
	                    //create sysmeta for the data doc                    
192 199
	                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
......
205 212
	                    String sizeStr = 
206 213
	                    	Long.toString(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
207 214
	                    dataDocSysMeta.setSize(new BigInteger(sizeStr));
208
	                    // TODO use ORE map
209
	                    //dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
215

  
210 216
	                    boolean error = false;
211 217
	                    
212 218
	                    //create the data doc on d1
213
	                    try
214
	                    {
219
	                    try {
215 220
	                        mn.create(session, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
216 221
	                    }
217
	                    catch(Exception e)
218
	                    {
222
	                    catch(Exception e) {
219 223
	                        error = true;
220 224
	                        System.out.println("ERROR: Could not create data document with id " + 
221 225
	                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
222 226
	                    }
223
	                    finally
224
	                    {
225
	                        if (error)
226
	                        {
227
	                    finally {
228
	                        if (error) {
227 229
	                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
228 230
	                                    "FAILED.");
229 231
	                        }
230
	                        else
231
	                        {
232
	                        else {
232 233
	                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
233 234
	                                " which is described by " + sysmeta.getIdentifier().getValue());
234 235
	                        }
235 236
	                    }
236 237
	                }
237
	                else
238
	                {
238
	                else {
239 239
	                    System.out.println("WARNING: Could not process describes url " +
240 240
	                            dataDocUrl + " for document " + doc.docid + 
241 241
	                    ".  Only ecogrid://knb/ urls are currently supported.");
......
243 243
	            }
244 244
            }
245 245
            
246
            try
247
            {
248
              Identifier id = mn.create(session, sysmeta.getIdentifier(), 
249
                    IOUtils.toInputStream(doc.doctext), sysmeta);
246
            try {
247
              Identifier id = 
248
            	  mn.create(session, sysmeta.getIdentifier(), IOUtils.toInputStream(doc.doctext), sysmeta);
250 249
              System.out.println("Success inserting document " + id.getValue());
251 250
              
251
              // no need for an ORE map if there's no data
252
              if (!dataIds.isEmpty()) {
253
	              // generate the ORE map for this datapackage
254
	              Identifier resourceMapId = new Identifier();
255
	              resourceMapId.setValue("resourceMap_" + sysmeta.getIdentifier().getValue());
256
	              idMap.put(sysmeta.getIdentifier(), dataIds);
257
	              ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
258
	              String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
259
	              Document rmDoc = new Document(resourceMapId.getValue(), "http://www.openarchives.org/ore/terms", "", "");
260
	              rmDoc.doctext = resourceMapXML;
261
	              SystemMetadata resourceMapSysMeta = generateSystemMetadata(rmDoc);
262
	              mn.create(session, resourceMapId, IOUtils.toInputStream(resourceMapXML), resourceMapSysMeta);
263
	              
264
	              // clean up the permissions (FORCE public read)
265
	              for (Identifier dataId: dataIds) {
266
	            	  mn.setAccessPolicy(session, dataId, sysmeta.getAccessPolicy());
267
	            	  System.out.println("Set public access policy for: " + dataId.getValue());
268
	              }
252 269
            }
253
            catch(Exception e)
254
            {
270
              
271
            }
272
            catch(Exception e) {
255 273
                e.printStackTrace();
256 274
                System.out.println("Could not create document with id " + 
257 275
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
258
                
259 276
            }
260
            finally
261
            {
262
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
277
            finally {
278
                printHeader("Done processing document " + sysmeta.getIdentifier().getValue());
263 279
            }
264 280
        }
265 281
        
......
273 289
     * @return
274 290
     */
275 291
    private SystemMetadata generateSystemMetadata(Document doc)
276
      throws Exception
277
    {
292
      throws Exception {
278 293
        SystemMetadata sm = new SystemMetadata();
279 294
        //set the id
280 295
        Identifier id = new Identifier();
......
283 298
        
284 299
        //set the object format
285 300
        ObjectFormat format = ObjectFormatCache.getInstance().getFormat(doc.doctype);
286
        if(format == null)
287
        {
288
            if(doc.doctype.trim().equals("BIN"))
289
            {
301
        if (format == null) {
302
            if (doc.doctype.trim().equals("BIN")) {
290 303
                format = ObjectFormatCache.getInstance().getFormat("application/octet-stream");
291 304
            }
292
            else
293
            {
305
            else {
294 306
                format = ObjectFormatCache.getInstance().getFormat("text/plain");
295 307
            }
296 308
        }
......
305 317
        String sizeStr = Long.toString(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
306 318
        sm.setSize(new BigInteger(sizeStr));
307 319
        
308
        //submitter
320
        //submitter, rights holder
309 321
        Subject p = new Subject();
310
        p.setValue("unknown");
322
        p.setValue(subjectDN);
311 323
        sm.setSubmitter(p);
312 324
        sm.setRightsHolder(p);
313
        try
314
        {
325
        try {
315 326
            Date dateCreated = parseMetacatDate(doc.createDate);
316 327
            sm.setDateUploaded(dateCreated);
317 328
            Date dateUpdated = parseMetacatDate(doc.updateDate);
318 329
            sm.setDateSysMetadataModified(dateUpdated);
319 330
        }
320
        catch(Exception e)
321
        {
331
        catch(Exception e) {
322 332
            System.out.println("couldn't parse a date: " + e.getMessage());
323 333
            Date dateCreated = new Date();
324 334
            sm.setDateUploaded(dateCreated);
......
338 348
        subject.setValue(Constants.SUBJECT_PUBLIC);
339 349
		accessRule.addSubject(subject);
340 350
		accessPolicy.addAllow(accessRule);
351
		
341 352
		sm.setAccessPolicy(accessPolicy);
342 353
        
343 354
        return sm;
344 355
    }
345 356
    
346
    private void printHeader(String s)
347
    {
357
    private void printHeader(String s) {
348 358
        System.out.println("****** " + s + " *******");
349 359
    }
350 360
    
351
    
352
    
353 361
    /**
354 362
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
355 363
     * a proper date object

Also available in: Unified diff