1 |
10172
|
leinfelder
|
package edu.ucsb.nceas.metacat.mdq;
|
2 |
|
|
|
3 |
|
|
import java.io.ByteArrayInputStream;
|
4 |
|
|
import java.io.ByteArrayOutputStream;
|
5 |
|
|
import java.io.InputStream;
|
6 |
|
|
import java.math.BigInteger;
|
7 |
|
|
import java.util.Calendar;
|
8 |
|
|
import java.util.Date;
|
9 |
|
|
import java.util.concurrent.ExecutorService;
|
10 |
|
|
import java.util.concurrent.Executors;
|
11 |
|
|
|
12 |
10173
|
leinfelder
|
import javax.servlet.http.HttpServletRequest;
|
13 |
|
|
|
14 |
10172
|
leinfelder
|
import org.apache.commons.io.IOUtils;
|
15 |
|
|
import org.apache.http.HttpEntity;
|
16 |
|
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
17 |
|
|
import org.apache.http.client.methods.HttpPost;
|
18 |
|
|
import org.apache.http.impl.client.CloseableHttpClient;
|
19 |
|
|
import org.apache.http.impl.client.HttpClients;
|
20 |
|
|
import org.apache.log4j.Logger;
|
21 |
10173
|
leinfelder
|
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
|
22 |
10172
|
leinfelder
|
import org.dataone.client.types.AccessPolicyEditor;
|
23 |
|
|
import org.dataone.client.v2.formats.ObjectFormatCache;
|
24 |
|
|
import org.dataone.configuration.Settings;
|
25 |
|
|
import org.dataone.mimemultipart.SimpleMultipartEntity;
|
26 |
|
|
import org.dataone.service.types.v1.Identifier;
|
27 |
|
|
import org.dataone.service.types.v1.ObjectFormatIdentifier;
|
28 |
|
|
import org.dataone.service.types.v1.Session;
|
29 |
|
|
import org.dataone.service.types.v1.util.ChecksumUtil;
|
30 |
|
|
import org.dataone.service.types.v2.ObjectFormat;
|
31 |
|
|
import org.dataone.service.types.v2.SystemMetadata;
|
32 |
|
|
import org.dataone.service.util.TypeMarshaller;
|
33 |
|
|
|
34 |
|
|
import edu.ucsb.nceas.metacat.IdentifierManager;
|
35 |
|
|
import edu.ucsb.nceas.metacat.MetacatHandler;
|
36 |
|
|
import edu.ucsb.nceas.metacat.dataone.MNodeService;
|
37 |
|
|
|
38 |
|
|
public class MDQClient {
|
39 |
|
|
|
40 |
|
|
private static boolean mdqEnabled = Settings.getConfiguration().getBoolean("mdq.service.enabled", false);
|
41 |
|
|
|
42 |
|
|
private static String mdqURL = Settings.getConfiguration().getString("mdq.service.url", "https://quality.nceas.ucsb.edu/quality/suites/arctic.data.center.suite.1/run");
|
43 |
|
|
|
44 |
|
|
private static String mdqRunNamespace = Settings.getConfiguration().getString("mdq.run.namespace", "https://nceas.ucsb.edu/mdqe/v1#run");
|
45 |
|
|
|
46 |
|
|
private static Logger logMetacat = Logger.getLogger(MDQClient.class);
|
47 |
|
|
|
48 |
|
|
private static ExecutorService executor = Executors.newSingleThreadExecutor();
|
49 |
|
|
|
50 |
|
|
public static void submit(final SystemMetadata sysMeta) {
|
51 |
|
|
|
52 |
|
|
if (!mdqEnabled) {
|
53 |
|
|
logMetacat.info("MDQ not enabled, skipping quality check for " + sysMeta.getIdentifier().getValue());
|
54 |
|
|
return;
|
55 |
|
|
}
|
56 |
|
|
|
57 |
|
|
// can we even run QC on this object?
|
58 |
|
|
try {
|
59 |
|
|
// check that it is a ME
|
60 |
|
|
ObjectFormat objFormat = ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId());
|
61 |
|
|
// must know what we are dealing with
|
62 |
|
|
if (objFormat == null) {
|
63 |
|
|
logMetacat.info("Object format not found for formatId: " + sysMeta.getFormatId());
|
64 |
|
|
return;
|
65 |
|
|
}
|
66 |
|
|
// only METADATA types
|
67 |
|
|
if (!objFormat.getFormatType().equals("METADATA")) {
|
68 |
|
|
logMetacat.info("MDQ not applicable to non METADATA object of: " + objFormat.getFormatType());
|
69 |
|
|
return;
|
70 |
|
|
}
|
71 |
|
|
// don't run QC on a QC document
|
72 |
|
|
if (objFormat.getFormatId().getValue().equals(mdqRunNamespace)) {
|
73 |
|
|
logMetacat.info("Cannot run MDQ on a run document");
|
74 |
|
|
return;
|
75 |
|
|
}
|
76 |
|
|
} catch (Exception e) {
|
77 |
|
|
logMetacat.error("Could not inspect object format: " + e.getMessage(), e);
|
78 |
|
|
return;
|
79 |
|
|
}
|
80 |
|
|
|
81 |
|
|
// run the MDQ routine in a new thread
|
82 |
|
|
Runnable task = new Runnable() {
|
83 |
|
|
@Override
|
84 |
|
|
public void run() {
|
85 |
|
|
try {
|
86 |
|
|
InputStream run = MDQClient.run(sysMeta);
|
87 |
|
|
logMetacat.debug("Generated MDQ run for pid: " + sysMeta.getIdentifier().getValue());
|
88 |
|
|
Identifier id = MDQClient.saveRun(run, sysMeta);
|
89 |
|
|
logMetacat.info("Saved MDQ run " + id.getValue());
|
90 |
|
|
} catch (Exception e) {
|
91 |
|
|
logMetacat.error(e.getMessage(), e);
|
92 |
|
|
}
|
93 |
|
|
}
|
94 |
|
|
};
|
95 |
|
|
executor.submit(task);
|
96 |
|
|
}
|
97 |
|
|
|
98 |
|
|
/**
|
99 |
|
|
* Runs MDQ suite for object identified in SystemMetadata param
|
100 |
|
|
* @param sysMeta
|
101 |
|
|
* @return InputStream for run result (XML)
|
102 |
|
|
* @throws Exception
|
103 |
|
|
*/
|
104 |
|
|
private static InputStream run(SystemMetadata sysMeta) throws Exception {
|
105 |
|
|
|
106 |
|
|
InputStream runResult = null;
|
107 |
|
|
|
108 |
|
|
// get the metadata content
|
109 |
|
|
String docid = IdentifierManager.getInstance().getLocalId(sysMeta.getIdentifier().getValue());
|
110 |
|
|
InputStream docStream = MetacatHandler.read(docid);
|
111 |
|
|
|
112 |
|
|
// Construct the REST call
|
113 |
|
|
HttpPost post = new HttpPost(mdqURL);
|
114 |
|
|
|
115 |
|
|
// add document
|
116 |
|
|
SimpleMultipartEntity entity = new SimpleMultipartEntity();
|
117 |
|
|
entity.addFilePart("document", docStream);
|
118 |
|
|
|
119 |
|
|
// add sysMeta
|
120 |
10173
|
leinfelder
|
logMetacat.debug("marshalling sysMeta for: " + sysMeta.getIdentifier().getValue());
|
121 |
10172
|
leinfelder
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
122 |
|
|
TypeMarshaller.marshalTypeToOutputStream(sysMeta, baos);
|
123 |
|
|
entity.addFilePart("systemMetadata", new ByteArrayInputStream(baos.toByteArray()));
|
124 |
|
|
|
125 |
10173
|
leinfelder
|
// make sure we get XML back
|
126 |
|
|
post.addHeader("Accept", "application/xml");
|
127 |
|
|
|
128 |
10172
|
leinfelder
|
// send to service
|
129 |
|
|
post.setEntity(entity);
|
130 |
|
|
CloseableHttpClient client = HttpClients.createDefault();
|
131 |
|
|
CloseableHttpResponse response = client.execute(post);
|
132 |
|
|
|
133 |
|
|
// retrieve results
|
134 |
|
|
HttpEntity reponseEntity = response.getEntity();
|
135 |
|
|
if (reponseEntity != null) {
|
136 |
|
|
runResult = reponseEntity.getContent();
|
137 |
|
|
}
|
138 |
|
|
|
139 |
|
|
return runResult;
|
140 |
|
|
}
|
141 |
|
|
|
142 |
|
|
private static Identifier saveRun(InputStream runStream, SystemMetadata metadataSysMeta) throws Exception {
|
143 |
|
|
|
144 |
10173
|
leinfelder
|
HttpServletRequest request = new MockHttpServletRequest(null, null, null);
|
145 |
|
|
MNodeService mn = MNodeService.getInstance(request);
|
146 |
|
|
|
147 |
10172
|
leinfelder
|
// copy the properties from the metadata sysmeta to the run sysmeta
|
148 |
|
|
byte[] bytes = IOUtils.toByteArray(runStream);
|
149 |
|
|
SystemMetadata sysmeta = generateSystemMetadata(bytes, metadataSysMeta);
|
150 |
|
|
|
151 |
|
|
// generate an identifier for the run result
|
152 |
|
|
Session session = new Session();
|
153 |
|
|
session.setSubject(sysmeta.getRightsHolder());
|
154 |
|
|
Identifier pid = mn.generateIdentifier(session, "UUID", null);
|
155 |
|
|
sysmeta.setIdentifier(pid);
|
156 |
|
|
|
157 |
|
|
// save to this repo
|
158 |
|
|
Identifier id = mn.create(session, pid, new ByteArrayInputStream(bytes), sysmeta);
|
159 |
|
|
|
160 |
|
|
return id;
|
161 |
|
|
}
|
162 |
|
|
|
163 |
|
|
private static SystemMetadata generateSystemMetadata(byte[] bytes, SystemMetadata origSysMeta)
|
164 |
|
|
throws Exception {
|
165 |
|
|
|
166 |
|
|
SystemMetadata sysMeta = new SystemMetadata();
|
167 |
|
|
|
168 |
|
|
// format id for the run
|
169 |
|
|
ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
|
170 |
|
|
formatId.setValue(mdqRunNamespace);
|
171 |
|
|
sysMeta.setFormatId(formatId);
|
172 |
|
|
|
173 |
|
|
// roles
|
174 |
|
|
sysMeta.setRightsHolder(origSysMeta.getRightsHolder());
|
175 |
|
|
sysMeta.setSubmitter(origSysMeta.getRightsHolder());
|
176 |
|
|
sysMeta.setAuthoritativeMemberNode(origSysMeta.getAuthoritativeMemberNode());
|
177 |
|
|
sysMeta.setOriginMemberNode(origSysMeta.getOriginMemberNode());
|
178 |
|
|
|
179 |
|
|
// for now, make them all public for easier debugging
|
180 |
|
|
AccessPolicyEditor accessPolicyEditor = new AccessPolicyEditor(null);
|
181 |
|
|
accessPolicyEditor.setPublicAccess();
|
182 |
|
|
sysMeta.setAccessPolicy(accessPolicyEditor.getAccessPolicy());
|
183 |
|
|
|
184 |
|
|
// size
|
185 |
|
|
sysMeta.setSize(BigInteger.valueOf(bytes.length));
|
186 |
|
|
sysMeta.setChecksum(ChecksumUtil.checksum(bytes, "MD5"));
|
187 |
|
|
sysMeta.setFileName("run.xml");
|
188 |
|
|
|
189 |
|
|
// timestamps
|
190 |
|
|
Date now = Calendar.getInstance().getTime();
|
191 |
|
|
sysMeta.setDateSysMetadataModified(now);
|
192 |
|
|
sysMeta.setDateUploaded(now);
|
193 |
|
|
|
194 |
|
|
return sysMeta;
|
195 |
|
|
}
|
196 |
|
|
}
|