|
1 |
package edu.ucsb.nceas.metacat.mdq;
|
|
2 |
|
|
3 |
import java.io.ByteArrayInputStream;
|
|
4 |
import java.io.ByteArrayOutputStream;
|
|
5 |
import java.io.InputStream;
|
|
6 |
import java.math.BigInteger;
|
|
7 |
import java.util.Calendar;
|
|
8 |
import java.util.Date;
|
|
9 |
import java.util.concurrent.ExecutorService;
|
|
10 |
import java.util.concurrent.Executors;
|
|
11 |
|
|
12 |
import org.apache.commons.io.IOUtils;
|
|
13 |
import org.apache.http.HttpEntity;
|
|
14 |
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
15 |
import org.apache.http.client.methods.HttpPost;
|
|
16 |
import org.apache.http.impl.client.CloseableHttpClient;
|
|
17 |
import org.apache.http.impl.client.HttpClients;
|
|
18 |
import org.apache.log4j.Logger;
|
|
19 |
import org.dataone.client.types.AccessPolicyEditor;
|
|
20 |
import org.dataone.client.v2.formats.ObjectFormatCache;
|
|
21 |
import org.dataone.configuration.Settings;
|
|
22 |
import org.dataone.mimemultipart.SimpleMultipartEntity;
|
|
23 |
import org.dataone.service.types.v1.Identifier;
|
|
24 |
import org.dataone.service.types.v1.ObjectFormatIdentifier;
|
|
25 |
import org.dataone.service.types.v1.Session;
|
|
26 |
import org.dataone.service.types.v1.util.ChecksumUtil;
|
|
27 |
import org.dataone.service.types.v2.ObjectFormat;
|
|
28 |
import org.dataone.service.types.v2.SystemMetadata;
|
|
29 |
import org.dataone.service.util.TypeMarshaller;
|
|
30 |
|
|
31 |
import edu.ucsb.nceas.metacat.IdentifierManager;
|
|
32 |
import edu.ucsb.nceas.metacat.MetacatHandler;
|
|
33 |
import edu.ucsb.nceas.metacat.dataone.MNodeService;
|
|
34 |
|
|
35 |
public class MDQClient {
|
|
36 |
|
|
37 |
private static boolean mdqEnabled = Settings.getConfiguration().getBoolean("mdq.service.enabled", false);
|
|
38 |
|
|
39 |
private static String mdqURL = Settings.getConfiguration().getString("mdq.service.url", "https://quality.nceas.ucsb.edu/quality/suites/arctic.data.center.suite.1/run");
|
|
40 |
|
|
41 |
private static String mdqRunNamespace = Settings.getConfiguration().getString("mdq.run.namespace", "https://nceas.ucsb.edu/mdqe/v1#run");
|
|
42 |
|
|
43 |
private static Logger logMetacat = Logger.getLogger(MDQClient.class);
|
|
44 |
|
|
45 |
private static ExecutorService executor = Executors.newSingleThreadExecutor();
|
|
46 |
|
|
47 |
public static void submit(final SystemMetadata sysMeta) {
|
|
48 |
|
|
49 |
if (!mdqEnabled) {
|
|
50 |
logMetacat.info("MDQ not enabled, skipping quality check for " + sysMeta.getIdentifier().getValue());
|
|
51 |
return;
|
|
52 |
}
|
|
53 |
|
|
54 |
// can we even run QC on this object?
|
|
55 |
try {
|
|
56 |
// check that it is a ME
|
|
57 |
ObjectFormat objFormat = ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId());
|
|
58 |
// must know what we are dealing with
|
|
59 |
if (objFormat == null) {
|
|
60 |
logMetacat.info("Object format not found for formatId: " + sysMeta.getFormatId());
|
|
61 |
return;
|
|
62 |
}
|
|
63 |
// only METADATA types
|
|
64 |
if (!objFormat.getFormatType().equals("METADATA")) {
|
|
65 |
logMetacat.info("MDQ not applicable to non METADATA object of: " + objFormat.getFormatType());
|
|
66 |
return;
|
|
67 |
}
|
|
68 |
// don't run QC on a QC document
|
|
69 |
if (objFormat.getFormatId().getValue().equals(mdqRunNamespace)) {
|
|
70 |
logMetacat.info("Cannot run MDQ on a run document");
|
|
71 |
return;
|
|
72 |
}
|
|
73 |
} catch (Exception e) {
|
|
74 |
logMetacat.error("Could not inspect object format: " + e.getMessage(), e);
|
|
75 |
return;
|
|
76 |
}
|
|
77 |
|
|
78 |
// run the MDQ routine in a new thread
|
|
79 |
Runnable task = new Runnable() {
|
|
80 |
@Override
|
|
81 |
public void run() {
|
|
82 |
try {
|
|
83 |
InputStream run = MDQClient.run(sysMeta);
|
|
84 |
logMetacat.debug("Generated MDQ run for pid: " + sysMeta.getIdentifier().getValue());
|
|
85 |
Identifier id = MDQClient.saveRun(run, sysMeta);
|
|
86 |
logMetacat.info("Saved MDQ run " + id.getValue());
|
|
87 |
} catch (Exception e) {
|
|
88 |
logMetacat.error(e.getMessage(), e);
|
|
89 |
}
|
|
90 |
}
|
|
91 |
};
|
|
92 |
executor.submit(task);
|
|
93 |
}
|
|
94 |
|
|
95 |
/**
|
|
96 |
* Runs MDQ suite for object identified in SystemMetadata param
|
|
97 |
* @param sysMeta
|
|
98 |
* @return InputStream for run result (XML)
|
|
99 |
* @throws Exception
|
|
100 |
*/
|
|
101 |
private static InputStream run(SystemMetadata sysMeta) throws Exception {
|
|
102 |
|
|
103 |
InputStream runResult = null;
|
|
104 |
|
|
105 |
// get the metadata content
|
|
106 |
String docid = IdentifierManager.getInstance().getLocalId(sysMeta.getIdentifier().getValue());
|
|
107 |
InputStream docStream = MetacatHandler.read(docid);
|
|
108 |
|
|
109 |
// Construct the REST call
|
|
110 |
HttpPost post = new HttpPost(mdqURL);
|
|
111 |
|
|
112 |
// add document
|
|
113 |
SimpleMultipartEntity entity = new SimpleMultipartEntity();
|
|
114 |
entity.addFilePart("document", docStream);
|
|
115 |
|
|
116 |
// add sysMeta
|
|
117 |
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
118 |
TypeMarshaller.marshalTypeToOutputStream(sysMeta, baos);
|
|
119 |
entity.addFilePart("systemMetadata", new ByteArrayInputStream(baos.toByteArray()));
|
|
120 |
|
|
121 |
// send to service
|
|
122 |
post.setEntity(entity);
|
|
123 |
CloseableHttpClient client = HttpClients.createDefault();
|
|
124 |
CloseableHttpResponse response = client.execute(post);
|
|
125 |
|
|
126 |
// retrieve results
|
|
127 |
HttpEntity reponseEntity = response.getEntity();
|
|
128 |
if (reponseEntity != null) {
|
|
129 |
runResult = reponseEntity.getContent();
|
|
130 |
}
|
|
131 |
|
|
132 |
return runResult;
|
|
133 |
}
|
|
134 |
|
|
135 |
private static Identifier saveRun(InputStream runStream, SystemMetadata metadataSysMeta) throws Exception {
|
|
136 |
MNodeService mn = MNodeService.getInstance(null);
|
|
137 |
|
|
138 |
// copy the properties from the metadata sysmeta to the run sysmeta
|
|
139 |
byte[] bytes = IOUtils.toByteArray(runStream);
|
|
140 |
SystemMetadata sysmeta = generateSystemMetadata(bytes, metadataSysMeta);
|
|
141 |
|
|
142 |
// generate an identifier for the run result
|
|
143 |
Session session = new Session();
|
|
144 |
session.setSubject(sysmeta.getRightsHolder());
|
|
145 |
Identifier pid = mn.generateIdentifier(session, "UUID", null);
|
|
146 |
sysmeta.setIdentifier(pid);
|
|
147 |
|
|
148 |
// save to this repo
|
|
149 |
Identifier id = mn.create(session, pid, new ByteArrayInputStream(bytes), sysmeta);
|
|
150 |
|
|
151 |
return id;
|
|
152 |
}
|
|
153 |
|
|
154 |
private static SystemMetadata generateSystemMetadata(byte[] bytes, SystemMetadata origSysMeta)
|
|
155 |
throws Exception {
|
|
156 |
|
|
157 |
SystemMetadata sysMeta = new SystemMetadata();
|
|
158 |
|
|
159 |
// format id for the run
|
|
160 |
ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
|
|
161 |
formatId.setValue(mdqRunNamespace);
|
|
162 |
sysMeta.setFormatId(formatId);
|
|
163 |
|
|
164 |
// roles
|
|
165 |
sysMeta.setRightsHolder(origSysMeta.getRightsHolder());
|
|
166 |
sysMeta.setSubmitter(origSysMeta.getRightsHolder());
|
|
167 |
sysMeta.setAuthoritativeMemberNode(origSysMeta.getAuthoritativeMemberNode());
|
|
168 |
sysMeta.setOriginMemberNode(origSysMeta.getOriginMemberNode());
|
|
169 |
|
|
170 |
// for now, make them all public for easier debugging
|
|
171 |
AccessPolicyEditor accessPolicyEditor = new AccessPolicyEditor(null);
|
|
172 |
accessPolicyEditor.setPublicAccess();
|
|
173 |
sysMeta.setAccessPolicy(accessPolicyEditor.getAccessPolicy());
|
|
174 |
|
|
175 |
// size
|
|
176 |
sysMeta.setSize(BigInteger.valueOf(bytes.length));
|
|
177 |
sysMeta.setChecksum(ChecksumUtil.checksum(bytes, "MD5"));
|
|
178 |
sysMeta.setFileName("run.xml");
|
|
179 |
|
|
180 |
// timestamps
|
|
181 |
Date now = Calendar.getInstance().getTime();
|
|
182 |
sysMeta.setDateSysMetadataModified(now);
|
|
183 |
sysMeta.setDateUploaded(now);
|
|
184 |
|
|
185 |
return sysMeta;
|
|
186 |
}
|
|
187 |
}
|
0 |
188 |
|
Trigger MDQ when inserting or updating metadata documents. https://projects.ecoinformatics.org/ecoinfo/issues/7171 and https://github.com/NCEAS/mdqengine/issues/73