Project

General

Profile

1
package edu.ucsb.nceas.metacat.mdq;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.io.ByteArrayOutputStream;
5
import java.io.InputStream;
6
import java.math.BigInteger;
7
import java.util.Calendar;
8
import java.util.Date;
9
import java.util.concurrent.ExecutorService;
10
import java.util.concurrent.Executors;
11

    
12
import javax.servlet.http.HttpServletRequest;
13

    
14
import org.apache.commons.io.IOUtils;
15
import org.apache.http.HttpEntity;
16
import org.apache.http.client.methods.CloseableHttpResponse;
17
import org.apache.http.client.methods.HttpPost;
18
import org.apache.http.impl.client.CloseableHttpClient;
19
import org.apache.http.impl.client.HttpClients;
20
import org.apache.log4j.Logger;
21
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
22
import org.dataone.client.types.AccessPolicyEditor;
23
import org.dataone.client.v2.formats.ObjectFormatCache;
24
import org.dataone.configuration.Settings;
25
import org.dataone.mimemultipart.SimpleMultipartEntity;
26
import org.dataone.service.types.v1.Identifier;
27
import org.dataone.service.types.v1.ObjectFormatIdentifier;
28
import org.dataone.service.types.v1.Session;
29
import org.dataone.service.types.v1.util.ChecksumUtil;
30
import org.dataone.service.types.v2.ObjectFormat;
31
import org.dataone.service.types.v2.SystemMetadata;
32
import org.dataone.service.util.TypeMarshaller;
33

    
34
import edu.ucsb.nceas.metacat.IdentifierManager;
35
import edu.ucsb.nceas.metacat.MetacatHandler;
36
import edu.ucsb.nceas.metacat.dataone.MNodeService;
37

    
38
public class MDQClient {
39
	
40
	private static boolean mdqEnabled =  Settings.getConfiguration().getBoolean("mdq.service.enabled", false);
41

    
42
	private static String mdqURL =  Settings.getConfiguration().getString("mdq.service.url", "https://quality.nceas.ucsb.edu/quality/suites/arctic.data.center.suite.1/run");
43
	
44
	private static String mdqRunNamespace = Settings.getConfiguration().getString("mdq.run.namespace", "https://nceas.ucsb.edu/mdqe/v1#run");
45

    
46
	private static Logger logMetacat = Logger.getLogger(MDQClient.class);
47
	
48
	private static ExecutorService executor = Executors.newSingleThreadExecutor();
49

    
50
	public static void submit(final SystemMetadata sysMeta) {
51
				
52
		if (!mdqEnabled) {
53
			logMetacat.info("MDQ not enabled, skipping quality check for " + sysMeta.getIdentifier().getValue());
54
			return;
55
		}
56
		
57
		// can we even run QC on this object?
58
		try {
59
			// check that it is a ME
60
			ObjectFormat objFormat = ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId());
61
			// must know what we are dealing with
62
			if (objFormat == null) {
63
				logMetacat.info("Object format not found for formatId: " + sysMeta.getFormatId());
64
				return;
65
			}
66
			// only METADATA types
67
			if (!objFormat.getFormatType().equals("METADATA")) {
68
				logMetacat.info("MDQ not applicable to non METADATA object of: " + objFormat.getFormatType());
69
				return;
70
			}
71
			// don't run QC on a QC document
72
			if (objFormat.getFormatId().getValue().equals(mdqRunNamespace)) {
73
				logMetacat.info("Cannot run MDQ on a run document");
74
				return;
75
			}
76
		} catch (Exception e) {
77
			logMetacat.error("Could not inspect object format: " +  e.getMessage(), e);
78
			return;
79
		}
80
		
81
		// run the MDQ routine in a new thread
82
		Runnable task = new Runnable() {
83
			@Override
84
			public void run() {
85
				try {
86
					InputStream run = MDQClient.run(sysMeta);
87
					logMetacat.debug("Generated MDQ run for pid: " + sysMeta.getIdentifier().getValue());
88
					Identifier id = MDQClient.saveRun(run, sysMeta);
89
					logMetacat.info("Saved MDQ run " + id.getValue());
90
				} catch (Exception e) {
91
					logMetacat.error(e.getMessage(), e);
92
				}
93
			}
94
		};
95
		executor.submit(task);	
96
	}
97
	
98
	/**
99
	 * Runs MDQ suite for object identified in SystemMetadata param
100
	 * @param sysMeta
101
	 * @return InputStream for run result (XML)
102
	 * @throws Exception
103
	 */
104
	private static InputStream run(SystemMetadata sysMeta) throws Exception {
105
		
106
		InputStream runResult = null;
107
		
108
		// get the metadata content
109
		String docid = IdentifierManager.getInstance().getLocalId(sysMeta.getIdentifier().getValue());
110
		InputStream docStream = MetacatHandler.read(docid);
111
		
112
		// Construct the REST call
113
		HttpPost post = new HttpPost(mdqURL);
114
		
115
		// add document
116
		SimpleMultipartEntity entity = new SimpleMultipartEntity();
117
		entity.addFilePart("document", docStream);
118
		
119
		// add sysMeta 
120
		logMetacat.debug("marshalling sysMeta for: " + sysMeta.getIdentifier().getValue());
121
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
122
		TypeMarshaller.marshalTypeToOutputStream(sysMeta, baos);
123
		entity.addFilePart("systemMetadata", new ByteArrayInputStream(baos.toByteArray()));
124
		
125
		// make sure we get XML back
126
		post.addHeader("Accept", "application/xml");
127
		
128
		// send to service
129
		post.setEntity(entity);
130
		CloseableHttpClient client = HttpClients.createDefault();
131
		CloseableHttpResponse response = client.execute(post);
132
		
133
		// retrieve results
134
		HttpEntity reponseEntity = response.getEntity();
135
		if (reponseEntity != null) {
136
			runResult  = reponseEntity.getContent(); 
137
		}
138

    
139
		return runResult;
140
	}
141
	
142
	private static Identifier saveRun(InputStream runStream, SystemMetadata metadataSysMeta) throws Exception {
143
		
144
		HttpServletRequest request = new MockHttpServletRequest(null, null, null);
145
		MNodeService mn = MNodeService.getInstance(request);
146
		
147
		// copy the properties from the metadata sysmeta to the run sysmeta
148
		byte[] bytes = IOUtils.toByteArray(runStream);
149
		SystemMetadata sysmeta = generateSystemMetadata(bytes, metadataSysMeta);
150
		
151
		// generate an identifier for the run result
152
		Session session = new Session();
153
		session.setSubject(sysmeta.getRightsHolder());
154
		Identifier pid = mn.generateIdentifier(session, "UUID", null);
155
		sysmeta.setIdentifier(pid);
156
		
157
		// save to this repo
158
		Identifier id = mn.create(session, pid, new ByteArrayInputStream(bytes), sysmeta);
159
		
160
		return id;
161
	}
162
	
163
	private static SystemMetadata generateSystemMetadata(byte[] bytes, SystemMetadata origSysMeta) 
164
			throws Exception {
165
		
166
		SystemMetadata sysMeta = new SystemMetadata();	
167
		
168
		// format id for the run
169
		ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
170
		formatId.setValue(mdqRunNamespace);
171
		sysMeta.setFormatId(formatId);
172
		
173
		// roles
174
		sysMeta.setRightsHolder(origSysMeta.getRightsHolder());	
175
		sysMeta.setSubmitter(origSysMeta.getRightsHolder());
176
		sysMeta.setAuthoritativeMemberNode(origSysMeta.getAuthoritativeMemberNode());
177
		sysMeta.setOriginMemberNode(origSysMeta.getOriginMemberNode());
178
		
179
		// for now, make them all public for easier debugging
180
		AccessPolicyEditor accessPolicyEditor = new AccessPolicyEditor(null);
181
		accessPolicyEditor.setPublicAccess();
182
		sysMeta.setAccessPolicy(accessPolicyEditor.getAccessPolicy());
183
				
184
		// size
185
		sysMeta.setSize(BigInteger.valueOf(bytes.length));
186
		sysMeta.setChecksum(ChecksumUtil.checksum(bytes, "MD5"));
187
		sysMeta.setFileName("run.xml");
188

    
189
		// timestamps
190
		Date now = Calendar.getInstance().getTime();
191
		sysMeta.setDateSysMetadataModified(now);
192
		sysMeta.setDateUploaded(now);
193
		
194
		return sysMeta;
195
	}
196
}
    (1-1/1)