Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2000 Regents of the University of California and the
4
 *              National Center for Ecological Analysis and Synthesis
5
 *
6
 *   '$Author: leinfelder $'
7
 *     '$Date: 2017-03-06 16:22:32 -0800 (Mon, 06 Mar 2017) $'
8
 *
9
 * This program is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation; either version 2 of the License, or
12
 * (at your option) any later version.
13
 *
14
 * This program is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU General Public License
20
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22
 */
23
package edu.ucsb.nceas.metacat.dataone;
24

    
25
import java.io.InputStream;
26
import java.text.SimpleDateFormat;
27
import java.util.ArrayList;
28
import java.util.Arrays;
29
import java.util.Calendar;
30
import java.util.Date;
31
import java.util.HashMap;
32
import java.util.List;
33
import java.util.Map;
34

    
35
import org.apache.commons.lang.StringUtils;
36
import org.apache.log4j.Logger;
37
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
38
import org.dataone.client.v2.itk.D1Client;
39
import org.dataone.service.exceptions.BaseException;
40
import org.dataone.service.exceptions.InvalidRequest;
41
import org.dataone.service.exceptions.InvalidToken;
42
import org.dataone.service.exceptions.NotAuthorized;
43
import org.dataone.service.exceptions.NotFound;
44
import org.dataone.service.exceptions.NotImplemented;
45
import org.dataone.service.exceptions.ServiceFailure;
46
import org.dataone.service.types.v1.Identifier;
47
import org.dataone.service.types.v2.Node;
48
import org.dataone.service.types.v2.ObjectFormat;
49
import org.dataone.service.types.v1.Permission;
50
import org.dataone.service.types.v1.Person;
51
import org.dataone.service.types.v1.Session;
52
import org.dataone.service.types.v1.Subject;
53
import org.dataone.service.types.v1.SubjectInfo;
54
import org.dataone.service.types.v2.SystemMetadata;
55
import org.dataone.service.types.v1.util.AuthUtils;
56
import org.dataone.service.util.Constants;
57
import org.ecoinformatics.datamanager.parser.DataPackage;
58
import org.ecoinformatics.datamanager.parser.Party;
59
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
60
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
61

    
62
import edu.ucsb.nceas.ezid.EZIDClient;
63
import edu.ucsb.nceas.ezid.EZIDException;
64
import edu.ucsb.nceas.ezid.profile.DataCiteProfile;
65
import edu.ucsb.nceas.ezid.profile.DataCiteProfileResourceTypeValues;
66
import edu.ucsb.nceas.ezid.profile.ErcMissingValueCode;
67
import edu.ucsb.nceas.ezid.profile.InternalProfile;
68
import edu.ucsb.nceas.ezid.profile.InternalProfileValues;
69
import edu.ucsb.nceas.metacat.properties.PropertyService;
70
import edu.ucsb.nceas.metacat.util.SystemUtil;
71
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
72

    
73
/**
74
 * 
75
 * Singleton for interacting with the EZID DOI library.
76
 * Allows DOI minting/initial registration, creating and updating 
77
 * existing DOI registrations.
78
 * 
79
 * @author leinfelder
80
 */
81
public class DOIService {
82

    
83
	private Logger logMetacat = Logger.getLogger(DOIService.class);
84

    
85
	private boolean doiEnabled = false;
86
	
87
	private String shoulder = null;
88
	
89
	private String ezidUsername = null;
90
	
91
	private String ezidPassword = null;
92
	
93
	private EZIDClient ezid = null;
94
	
95
	private Date lastLogin = null;
96
	
97
	private long loginPeriod = 1 * 24 * 60 * 60 * 1000;
98

    
99
	private static DOIService instance = null;
100
	
101
	public static DOIService getInstance() {
102
		if (instance == null) {
103
			instance = new DOIService();
104
		}
105
		return instance;
106
	}
107
	
108
	/**
109
	 * Constructor, private for singleton access
110
	 */
111
	private DOIService() {
112
		
113
		// for DOIs
114
		String ezidServiceBaseUrl = null;
115
		
116
		try {
117
            doiEnabled = new Boolean(PropertyService.getProperty("guid.ezid.enabled")).booleanValue();
118
			shoulder = PropertyService.getProperty("guid.ezid.doishoulder.1");
119
			ezidServiceBaseUrl = PropertyService.getProperty("guid.ezid.baseurl");
120
			ezidUsername = PropertyService.getProperty("guid.ezid.username");
121
			ezidPassword = PropertyService.getProperty("guid.ezid.password");
122
		} catch (PropertyNotFoundException e) {
123
			logMetacat.warn("DOI support is not configured at this node.", e);
124
			return;
125
		}
126
		
127
		ezid = new EZIDClient(ezidServiceBaseUrl);
128

    
129
		
130
		
131
	}
132
	
133
	/**
134
	 * Make sure we have a current login before making any calls
135
	 * @throws EZIDException
136
	 */
137
	private void refreshLogin() throws EZIDException {
138
		Date now = Calendar.getInstance().getTime();
139
		if (lastLogin == null || now.getTime() - lastLogin.getTime() > loginPeriod) {
140
			ezid.login(ezidUsername, ezidPassword);
141
			lastLogin = now;	
142
		}
143
	}
144
	
145
	/**
146
	 * submits DOI metadata information about the object to EZID
147
	 * @param sysMeta
148
	 * @return
149
	 * @throws EZIDException 
150
	 * @throws ServiceFailure 
151
	 * @throws NotImplemented 
152
	 * @throws InterruptedException 
153
	 */
154
	public boolean registerDOI(SystemMetadata sysMeta) throws EZIDException, NotImplemented, ServiceFailure, InterruptedException {
155
				
156
		// only continue if we have the feature turned on
157
		if (doiEnabled) {
158
			
159
			String identifier = sysMeta.getIdentifier().getValue();
160
			
161
			// only continue if this DOI is in our configured shoulder
162
			if (identifier.startsWith(shoulder)) {
163
				
164
				// enter metadata about this identifier
165
				HashMap<String, String> metadata = new HashMap<String, String>();
166
				
167
				// default values first
168
				metadata.put(DataCiteProfile.TITLE.toString(), ErcMissingValueCode.UNKNOWN.toString());
169
				metadata.put(DataCiteProfile.CREATOR.toString(), ErcMissingValueCode.UNKNOWN.toString());
170

    
171
				// now look up title and creators from EML content 
172
				Map<String, String> emlMetadata = null;
173
				try {
174
					emlMetadata = this.lookupEMLMetadata(sysMeta);
175
					metadata.putAll(emlMetadata);
176

    
177
				} catch (Exception e) {
178
					// TODO Auto-generated catch block
179
					e.printStackTrace();
180
				}
181
				
182
				// publisher
183
				String publisher = ErcMissingValueCode.UNKNOWN.toString();
184
				Node node = MNodeService.getInstance(null).getCapabilities();
185
				publisher = node.getName();
186
				
187
				// publication year
188
				SimpleDateFormat sdf = new SimpleDateFormat("yyyy");
189
				String year = sdf.format(sysMeta.getDateUploaded());
190
				
191
				// type
192
				String resourceType = lookupResourceType(sysMeta);
193
				
194
				// format
195
				String format = sysMeta.getFormatId().getValue();
196
				
197
				//size
198
				String size = sysMeta.getSize().toString();
199
				
200
				// target (URL)
201
				String target = node.getBaseURL() + "/v1/object/" + identifier;
202
				String uriTemplate = null;
203
				String uriTemplateKey = "guid.ezid.uritemplate.data";
204
				ObjectFormat objectFormat = null;
205
				try {
206
					objectFormat = D1Client.getCN().getFormat(sysMeta.getFormatId());
207
				} catch (BaseException e1) {
208
					logMetacat.warn("Could not check format type for: " + sysMeta.getFormatId());
209
				}
210
				if (objectFormat != null && objectFormat.getFormatType().equals("METADATA")) {
211
					uriTemplateKey = "guid.ezid.uritemplate.metadata";
212
				}
213
				try {
214
					uriTemplate = PropertyService.getProperty(uriTemplateKey);
215
					target =  SystemUtil.getSecureServerURL() + uriTemplate.replaceAll("<IDENTIFIER>", identifier);
216
				} catch (PropertyNotFoundException e) {
217
					logMetacat.warn("No target URI template found in the configuration for: " + uriTemplateKey);
218
				}
219
				
220
				// status and export fields for public/protected data
221
				String status = InternalProfileValues.UNAVAILABLE.toString();
222
				String export = InternalProfileValues.NO.toString();
223
				Subject publicSubject = new Subject();
224
				publicSubject.setValue(Constants.SUBJECT_PUBLIC);
225
				if (AuthUtils.isAuthorized(Arrays.asList(new Subject[] {publicSubject}), Permission.READ, sysMeta)) {
226
					status = InternalProfileValues.PUBLIC.toString();
227
					export = InternalProfileValues.YES.toString();
228
				}
229
				
230
				// set the datacite metadata fields
231
				metadata.put(DataCiteProfile.PUBLISHER.toString(), publisher);
232
				metadata.put(DataCiteProfile.PUBLICATION_YEAR.toString(), year);
233
				metadata.put(DataCiteProfile.RESOURCE_TYPE.toString(), resourceType);
234
				metadata.put(DataCiteProfile.FORMAT.toString(), format);
235
				metadata.put(DataCiteProfile.SIZE.toString(), size);
236
				metadata.put(InternalProfile.TARGET.toString(), target);
237
				metadata.put(InternalProfile.STATUS.toString(), status);
238
				metadata.put(InternalProfile.EXPORT.toString(), export);
239
	
240
				// make sure we have a current login
241
				this.refreshLogin();
242
				
243
				// set using the API
244
				ezid.createOrUpdate(identifier, metadata);
245
				
246
			}
247
			
248
		}
249
		
250
		return true;
251
	}
252

    
253
	/**
254
	 * Generate a DOI using the EZID service as configured
255
	 * @return
256
	 * @throws EZIDException 
257
	 * @throws InvalidRequest 
258
	 */
259
	public Identifier generateDOI() throws EZIDException, InvalidRequest {
260

    
261
		
262
		// only continue if we have the feature turned on
263
		if (!doiEnabled) {
264
			throw new InvalidRequest("2193", "DOI scheme is not enabled at this node.");
265
		}
266
		
267
		// add only the minimal metadata required for this DOI
268
		HashMap<String, String> metadata = new HashMap<String, String>();
269
		metadata.put(DataCiteProfile.TITLE.toString(), ErcMissingValueCode.UNKNOWN.toString());
270
		metadata.put(DataCiteProfile.CREATOR.toString(), ErcMissingValueCode.UNKNOWN.toString());
271
		metadata.put(DataCiteProfile.PUBLISHER.toString(), ErcMissingValueCode.UNKNOWN.toString());
272
		metadata.put(DataCiteProfile.PUBLICATION_YEAR.toString(), ErcMissingValueCode.UNKNOWN.toString());
273
		metadata.put(InternalProfile.STATUS.toString(), InternalProfileValues.RESERVED.toString());
274
		metadata.put(InternalProfile.EXPORT.toString(), InternalProfileValues.NO.toString());
275

    
276
		// make sure we have a current login
277
		this.refreshLogin();
278

    
279
		// call the EZID service
280
		String doi = ezid.mintIdentifier(shoulder, metadata);
281
		Identifier identifier = new Identifier();
282
		identifier.setValue(doi);
283
		
284
		return identifier;
285
	}
286
	
287
	/**
288
	 * Locates an appropriate title for the object identified by the given SystemMetadata.
289
	 * Different types of objects will be handled differently for titles:
290
	 * 1. EML formats - parsed by the Datamanager library to find dataset title 
291
	 * 2. Data objects - TODO: use title from EML file that describes that data
292
	 * 3. ORE objects - TODO: use title from EML file contained in that package
293
	 * @param sysMeta
294
	 * @return appropriate title if known, or the missing value code
295
	 * @throws Exception
296
	 */
297
	private Map<String, String> lookupEMLMetadata(SystemMetadata sysMeta) throws Exception {
298
		
299
		Map<String, String> emlMetadata = new HashMap<String, String>();
300
		
301
		String title = ErcMissingValueCode.UNKNOWN.toString();
302
		List<String> people = new ArrayList<String>();
303

    
304
		if (sysMeta.getFormatId().getValue().startsWith("eml://")) {
305
			DataPackageParserInterface parser = new Eml200DataPackageParser();
306
			// for using the MN API as the MN itself
307
			MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
308
			Session session = new Session();
309
	        Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
310
	        session.setSubject(subject);
311
			InputStream emlStream = MNodeService.getInstance(request).get(session, sysMeta.getIdentifier());
312
			parser.parse(emlStream);
313
			DataPackage dataPackage = parser.getDataPackage();
314
			title = dataPackage.getTitle();
315
			
316
			emlMetadata.put(DataCiteProfile.TITLE.toString(), title);
317

    
318
			// extract the creator[s]
319
			String creator = sysMeta.getRightsHolder().getValue();
320
			List<Party> creators = dataPackage.getCreators();
321
			if (creators != null) {
322
				for (Party party: creators) {
323
					String name = "";
324
					if (party.getSurName() != null) {
325
						name = party.getSurName();
326
						if (party.getGivenNames() != null && party.getGivenNames().size() > 0) {
327
							String givenNames = "";
328
							for (String givenName: party.getGivenNames()) {
329
								givenNames = givenName + " ";
330
							}
331
							name = givenNames + name;
332
						}
333
					} else {
334
						name = party.getOrganization();
335
					}
336
					
337
					people.add(name);
338
				}
339
				creator = StringUtils.join(people, ";");
340
			} else {
341
				try {
342
					// from SM
343
					creator = lookupCreator(sysMeta.getRightsHolder());
344
				} catch (Exception e) {
345
					// ignore and use default
346
				}
347
			}
348
			
349
			emlMetadata.put(DataCiteProfile.CREATOR.toString(), creator);
350
		}
351
		return emlMetadata;
352
	}
353
	
354
	private String lookupResourceType(SystemMetadata sysMeta) {
355
		String resourceType = DataCiteProfileResourceTypeValues.DATASET.toString();
356
		try {
357
			ObjectFormat objectFormat = D1Client.getCN().getFormat(sysMeta.getFormatId());
358
			resourceType += "/" + objectFormat.getFormatType().toLowerCase();
359
		} catch (Exception e) {
360
			// ignore
361
			logMetacat.warn("Could not lookup resource type for formatId" + e.getMessage());
362
		}
363
		
364
		return resourceType;
365
	}
366

    
367
	/**
368
	 * Lookup the citable name for the given Subject
369
	 * Calls the configured CN to determine this information.
370
	 * If the person is not registered with the CN identity service, 
371
	 * a NotFound exception will be raised as expected from the service.
372
	 * @param subject
373
	 * @return fullName if found
374
	 * @throws ServiceFailure
375
	 * @throws NotAuthorized
376
	 * @throws NotImplemented
377
	 * @throws NotFound
378
	 * @throws InvalidToken
379
	 */
380
	private String lookupCreator(Subject subject) throws ServiceFailure, NotAuthorized, NotImplemented, NotFound, InvalidToken {
381
		// default to given DN
382
		String fullName = subject.getValue();
383
		
384
		SubjectInfo subjectInfo = D1Client.getCN().getSubjectInfo(null, subject);
385
		if (subjectInfo != null && subjectInfo.getPersonList() != null) {
386
			for (Person p: subjectInfo.getPersonList()) {
387
				if (p.getSubject().equals(subject)) {
388
					fullName = p.getFamilyName();
389
					if (p.getGivenNameList() != null && p.getGivenNameList().size() > 0) {
390
						fullName = fullName + ", " + p.getGivenName(0);
391
					}
392
					break;
393
				}
394
			}
395
		}
396
		
397
		return fullName;
398
		
399
	}
400
	
401
}
(4-4/8)