1
|
package edu.ucsb.nceas.metacat.mdq;
|
2
|
|
3
|
import java.io.ByteArrayInputStream;
|
4
|
import java.io.ByteArrayOutputStream;
|
5
|
import java.io.InputStream;
|
6
|
import java.math.BigInteger;
|
7
|
import java.util.Calendar;
|
8
|
import java.util.Date;
|
9
|
import java.util.concurrent.ExecutorService;
|
10
|
import java.util.concurrent.Executors;
|
11
|
|
12
|
import javax.servlet.http.HttpServletRequest;
|
13
|
|
14
|
import org.apache.commons.io.IOUtils;
|
15
|
import org.apache.http.HttpEntity;
|
16
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
17
|
import org.apache.http.client.methods.HttpPost;
|
18
|
import org.apache.http.impl.client.CloseableHttpClient;
|
19
|
import org.apache.http.impl.client.HttpClients;
|
20
|
import org.apache.log4j.Logger;
|
21
|
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
|
22
|
import org.dataone.client.types.AccessPolicyEditor;
|
23
|
import org.dataone.client.v2.formats.ObjectFormatCache;
|
24
|
import org.dataone.configuration.Settings;
|
25
|
import org.dataone.mimemultipart.SimpleMultipartEntity;
|
26
|
import org.dataone.service.types.v1.Identifier;
|
27
|
import org.dataone.service.types.v1.ObjectFormatIdentifier;
|
28
|
import org.dataone.service.types.v1.Session;
|
29
|
import org.dataone.service.types.v1.util.ChecksumUtil;
|
30
|
import org.dataone.service.types.v2.ObjectFormat;
|
31
|
import org.dataone.service.types.v2.SystemMetadata;
|
32
|
import org.dataone.service.util.TypeMarshaller;
|
33
|
|
34
|
import edu.ucsb.nceas.metacat.IdentifierManager;
|
35
|
import edu.ucsb.nceas.metacat.MetacatHandler;
|
36
|
import edu.ucsb.nceas.metacat.dataone.MNodeService;
|
37
|
|
38
|
public class MDQClient {
|
39
|
|
40
|
private static boolean mdqEnabled = Settings.getConfiguration().getBoolean("mdq.service.enabled", false);
|
41
|
|
42
|
private static String mdqURL = Settings.getConfiguration().getString("mdq.service.url", "https://quality.nceas.ucsb.edu/quality/suites/arctic.data.center.suite.1/run");
|
43
|
|
44
|
private static String mdqRunNamespace = Settings.getConfiguration().getString("mdq.run.namespace", "https://nceas.ucsb.edu/mdqe/v1#run");
|
45
|
|
46
|
private static Logger logMetacat = Logger.getLogger(MDQClient.class);
|
47
|
|
48
|
private static ExecutorService executor = Executors.newSingleThreadExecutor();
|
49
|
|
50
|
public static void submit(final SystemMetadata sysMeta) {
|
51
|
|
52
|
if (!mdqEnabled) {
|
53
|
logMetacat.info("MDQ not enabled, skipping quality check for " + sysMeta.getIdentifier().getValue());
|
54
|
return;
|
55
|
}
|
56
|
|
57
|
// can we even run QC on this object?
|
58
|
try {
|
59
|
// check that it is a ME
|
60
|
ObjectFormat objFormat = ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId());
|
61
|
// must know what we are dealing with
|
62
|
if (objFormat == null) {
|
63
|
logMetacat.info("Object format not found for formatId: " + sysMeta.getFormatId());
|
64
|
return;
|
65
|
}
|
66
|
// only METADATA types
|
67
|
if (!objFormat.getFormatType().equals("METADATA")) {
|
68
|
logMetacat.info("MDQ not applicable to non METADATA object of: " + objFormat.getFormatType());
|
69
|
return;
|
70
|
}
|
71
|
// don't run QC on a QC document
|
72
|
if (objFormat.getFormatId().getValue().equals(mdqRunNamespace)) {
|
73
|
logMetacat.info("Cannot run MDQ on a run document");
|
74
|
return;
|
75
|
}
|
76
|
} catch (Exception e) {
|
77
|
logMetacat.error("Could not inspect object format: " + e.getMessage(), e);
|
78
|
return;
|
79
|
}
|
80
|
|
81
|
// run the MDQ routine in a new thread
|
82
|
Runnable task = new Runnable() {
|
83
|
@Override
|
84
|
public void run() {
|
85
|
try {
|
86
|
InputStream run = MDQClient.run(sysMeta);
|
87
|
logMetacat.debug("Generated MDQ run for pid: " + sysMeta.getIdentifier().getValue());
|
88
|
Identifier id = MDQClient.saveRun(run, sysMeta);
|
89
|
logMetacat.info("Saved MDQ run " + id.getValue());
|
90
|
} catch (Exception e) {
|
91
|
logMetacat.error(e.getMessage(), e);
|
92
|
}
|
93
|
}
|
94
|
};
|
95
|
executor.submit(task);
|
96
|
}
|
97
|
|
98
|
/**
|
99
|
* Runs MDQ suite for object identified in SystemMetadata param
|
100
|
* @param sysMeta
|
101
|
* @return InputStream for run result (XML)
|
102
|
* @throws Exception
|
103
|
*/
|
104
|
private static InputStream run(SystemMetadata sysMeta) throws Exception {
|
105
|
|
106
|
InputStream runResult = null;
|
107
|
|
108
|
// get the metadata content
|
109
|
String docid = IdentifierManager.getInstance().getLocalId(sysMeta.getIdentifier().getValue());
|
110
|
InputStream docStream = MetacatHandler.read(docid);
|
111
|
|
112
|
// Construct the REST call
|
113
|
HttpPost post = new HttpPost(mdqURL);
|
114
|
|
115
|
// add document
|
116
|
SimpleMultipartEntity entity = new SimpleMultipartEntity();
|
117
|
entity.addFilePart("document", docStream);
|
118
|
|
119
|
// add sysMeta
|
120
|
logMetacat.debug("marshalling sysMeta for: " + sysMeta.getIdentifier().getValue());
|
121
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
122
|
TypeMarshaller.marshalTypeToOutputStream(sysMeta, baos);
|
123
|
entity.addFilePart("systemMetadata", new ByteArrayInputStream(baos.toByteArray()));
|
124
|
|
125
|
// make sure we get XML back
|
126
|
post.addHeader("Accept", "application/xml");
|
127
|
|
128
|
// send to service
|
129
|
post.setEntity(entity);
|
130
|
CloseableHttpClient client = HttpClients.createDefault();
|
131
|
CloseableHttpResponse response = client.execute(post);
|
132
|
|
133
|
// retrieve results
|
134
|
HttpEntity reponseEntity = response.getEntity();
|
135
|
if (reponseEntity != null) {
|
136
|
runResult = reponseEntity.getContent();
|
137
|
}
|
138
|
|
139
|
return runResult;
|
140
|
}
|
141
|
|
142
|
private static Identifier saveRun(InputStream runStream, SystemMetadata metadataSysMeta) throws Exception {
|
143
|
|
144
|
HttpServletRequest request = new MockHttpServletRequest(null, null, null);
|
145
|
MNodeService mn = MNodeService.getInstance(request);
|
146
|
|
147
|
// copy the properties from the metadata sysmeta to the run sysmeta
|
148
|
byte[] bytes = IOUtils.toByteArray(runStream);
|
149
|
SystemMetadata sysmeta = generateSystemMetadata(bytes, metadataSysMeta);
|
150
|
|
151
|
// generate an identifier for the run result
|
152
|
Session session = new Session();
|
153
|
session.setSubject(sysmeta.getRightsHolder());
|
154
|
Identifier pid = mn.generateIdentifier(session, "UUID", null);
|
155
|
sysmeta.setIdentifier(pid);
|
156
|
|
157
|
// save to this repo
|
158
|
Identifier id = mn.create(session, pid, new ByteArrayInputStream(bytes), sysmeta);
|
159
|
|
160
|
return id;
|
161
|
}
|
162
|
|
163
|
private static SystemMetadata generateSystemMetadata(byte[] bytes, SystemMetadata origSysMeta)
|
164
|
throws Exception {
|
165
|
|
166
|
SystemMetadata sysMeta = new SystemMetadata();
|
167
|
|
168
|
// format id for the run
|
169
|
ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
|
170
|
formatId.setValue(mdqRunNamespace);
|
171
|
sysMeta.setFormatId(formatId);
|
172
|
|
173
|
// roles
|
174
|
sysMeta.setRightsHolder(origSysMeta.getRightsHolder());
|
175
|
sysMeta.setSubmitter(origSysMeta.getRightsHolder());
|
176
|
sysMeta.setAuthoritativeMemberNode(origSysMeta.getAuthoritativeMemberNode());
|
177
|
sysMeta.setOriginMemberNode(origSysMeta.getOriginMemberNode());
|
178
|
|
179
|
// for now, make them all public for easier debugging
|
180
|
AccessPolicyEditor accessPolicyEditor = new AccessPolicyEditor(null);
|
181
|
accessPolicyEditor.setPublicAccess();
|
182
|
sysMeta.setAccessPolicy(accessPolicyEditor.getAccessPolicy());
|
183
|
|
184
|
// size
|
185
|
sysMeta.setSize(BigInteger.valueOf(bytes.length));
|
186
|
sysMeta.setChecksum(ChecksumUtil.checksum(bytes, "MD5"));
|
187
|
sysMeta.setFileName("run.xml");
|
188
|
|
189
|
// timestamps
|
190
|
Date now = Calendar.getInstance().getTime();
|
191
|
sysMeta.setDateSysMetadataModified(now);
|
192
|
sysMeta.setDateUploaded(now);
|
193
|
|
194
|
return sysMeta;
|
195
|
}
|
196
|
}
|