Project

General

Profile

1
package edu.ucsb.nceas.metacat.mdq;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.io.ByteArrayOutputStream;
5
import java.io.InputStream;
6
import java.math.BigInteger;
7
import java.util.Calendar;
8
import java.util.Date;
9
import java.util.concurrent.ExecutorService;
10
import java.util.concurrent.Executors;
11

    
12
import org.apache.commons.io.IOUtils;
13
import org.apache.http.HttpEntity;
14
import org.apache.http.client.methods.CloseableHttpResponse;
15
import org.apache.http.client.methods.HttpPost;
16
import org.apache.http.impl.client.CloseableHttpClient;
17
import org.apache.http.impl.client.HttpClients;
18
import org.apache.log4j.Logger;
19
import org.dataone.client.types.AccessPolicyEditor;
20
import org.dataone.client.v2.formats.ObjectFormatCache;
21
import org.dataone.configuration.Settings;
22
import org.dataone.mimemultipart.SimpleMultipartEntity;
23
import org.dataone.service.types.v1.Identifier;
24
import org.dataone.service.types.v1.ObjectFormatIdentifier;
25
import org.dataone.service.types.v1.Session;
26
import org.dataone.service.types.v1.util.ChecksumUtil;
27
import org.dataone.service.types.v2.ObjectFormat;
28
import org.dataone.service.types.v2.SystemMetadata;
29
import org.dataone.service.util.TypeMarshaller;
30

    
31
import edu.ucsb.nceas.metacat.IdentifierManager;
32
import edu.ucsb.nceas.metacat.MetacatHandler;
33
import edu.ucsb.nceas.metacat.dataone.MNodeService;
34

    
35
public class MDQClient {
36
	
37
	private static boolean mdqEnabled =  Settings.getConfiguration().getBoolean("mdq.service.enabled", false);
38

    
39
	private static String mdqURL =  Settings.getConfiguration().getString("mdq.service.url", "https://quality.nceas.ucsb.edu/quality/suites/arctic.data.center.suite.1/run");
40
	
41
	private static String mdqRunNamespace = Settings.getConfiguration().getString("mdq.run.namespace", "https://nceas.ucsb.edu/mdqe/v1#run");
42

    
43
	private static Logger logMetacat = Logger.getLogger(MDQClient.class);
44
	
45
	private static ExecutorService executor = Executors.newSingleThreadExecutor();
46

    
47
	public static void submit(final SystemMetadata sysMeta) {
48
				
49
		if (!mdqEnabled) {
50
			logMetacat.info("MDQ not enabled, skipping quality check for " + sysMeta.getIdentifier().getValue());
51
			return;
52
		}
53
		
54
		// can we even run QC on this object?
55
		try {
56
			// check that it is a ME
57
			ObjectFormat objFormat = ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId());
58
			// must know what we are dealing with
59
			if (objFormat == null) {
60
				logMetacat.info("Object format not found for formatId: " + sysMeta.getFormatId());
61
				return;
62
			}
63
			// only METADATA types
64
			if (!objFormat.getFormatType().equals("METADATA")) {
65
				logMetacat.info("MDQ not applicable to non METADATA object of: " + objFormat.getFormatType());
66
				return;
67
			}
68
			// don't run QC on a QC document
69
			if (objFormat.getFormatId().getValue().equals(mdqRunNamespace)) {
70
				logMetacat.info("Cannot run MDQ on a run document");
71
				return;
72
			}
73
		} catch (Exception e) {
74
			logMetacat.error("Could not inspect object format: " +  e.getMessage(), e);
75
			return;
76
		}
77
		
78
		// run the MDQ routine in a new thread
79
		Runnable task = new Runnable() {
80
			@Override
81
			public void run() {
82
				try {
83
					InputStream run = MDQClient.run(sysMeta);
84
					logMetacat.debug("Generated MDQ run for pid: " + sysMeta.getIdentifier().getValue());
85
					Identifier id = MDQClient.saveRun(run, sysMeta);
86
					logMetacat.info("Saved MDQ run " + id.getValue());
87
				} catch (Exception e) {
88
					logMetacat.error(e.getMessage(), e);
89
				}
90
			}
91
		};
92
		executor.submit(task);	
93
	}
94
	
95
	/**
96
	 * Runs MDQ suite for object identified in SystemMetadata param
97
	 * @param sysMeta
98
	 * @return InputStream for run result (XML)
99
	 * @throws Exception
100
	 */
101
	private static InputStream run(SystemMetadata sysMeta) throws Exception {
102
		
103
		InputStream runResult = null;
104
		
105
		// get the metadata content
106
		String docid = IdentifierManager.getInstance().getLocalId(sysMeta.getIdentifier().getValue());
107
		InputStream docStream = MetacatHandler.read(docid);
108
		
109
		// Construct the REST call
110
		HttpPost post = new HttpPost(mdqURL);
111
		
112
		// add document
113
		SimpleMultipartEntity entity = new SimpleMultipartEntity();
114
		entity.addFilePart("document", docStream);
115
		
116
		// add sysMeta 
117
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
118
		TypeMarshaller.marshalTypeToOutputStream(sysMeta, baos);
119
		entity.addFilePart("systemMetadata", new ByteArrayInputStream(baos.toByteArray()));
120
		
121
		// send to service
122
		post.setEntity(entity);
123
		CloseableHttpClient client = HttpClients.createDefault();
124
		CloseableHttpResponse response = client.execute(post);
125
		
126
		// retrieve results
127
		HttpEntity reponseEntity = response.getEntity();
128
		if (reponseEntity != null) {
129
			runResult  = reponseEntity.getContent(); 
130
		}
131

    
132
		return runResult;
133
	}
134
	
135
	private static Identifier saveRun(InputStream runStream, SystemMetadata metadataSysMeta) throws Exception {
136
		MNodeService mn = MNodeService.getInstance(null);
137
		
138
		// copy the properties from the metadata sysmeta to the run sysmeta
139
		byte[] bytes = IOUtils.toByteArray(runStream);
140
		SystemMetadata sysmeta = generateSystemMetadata(bytes, metadataSysMeta);
141
		
142
		// generate an identifier for the run result
143
		Session session = new Session();
144
		session.setSubject(sysmeta.getRightsHolder());
145
		Identifier pid = mn.generateIdentifier(session, "UUID", null);
146
		sysmeta.setIdentifier(pid);
147
		
148
		// save to this repo
149
		Identifier id = mn.create(session, pid, new ByteArrayInputStream(bytes), sysmeta);
150
		
151
		return id;
152
	}
153
	
154
	private static SystemMetadata generateSystemMetadata(byte[] bytes, SystemMetadata origSysMeta) 
155
			throws Exception {
156
		
157
		SystemMetadata sysMeta = new SystemMetadata();	
158
		
159
		// format id for the run
160
		ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
161
		formatId.setValue(mdqRunNamespace);
162
		sysMeta.setFormatId(formatId);
163
		
164
		// roles
165
		sysMeta.setRightsHolder(origSysMeta.getRightsHolder());	
166
		sysMeta.setSubmitter(origSysMeta.getRightsHolder());
167
		sysMeta.setAuthoritativeMemberNode(origSysMeta.getAuthoritativeMemberNode());
168
		sysMeta.setOriginMemberNode(origSysMeta.getOriginMemberNode());
169
		
170
		// for now, make them all public for easier debugging
171
		AccessPolicyEditor accessPolicyEditor = new AccessPolicyEditor(null);
172
		accessPolicyEditor.setPublicAccess();
173
		sysMeta.setAccessPolicy(accessPolicyEditor.getAccessPolicy());
174
				
175
		// size
176
		sysMeta.setSize(BigInteger.valueOf(bytes.length));
177
		sysMeta.setChecksum(ChecksumUtil.checksum(bytes, "MD5"));
178
		sysMeta.setFileName("run.xml");
179

    
180
		// timestamps
181
		Date now = Calendar.getInstance().getTime();
182
		sysMeta.setDateSysMetadataModified(now);
183
		sysMeta.setDateUploaded(now);
184
		
185
		return sysMeta;
186
	}
187
}
    (1-1/1)