Project

General

Profile

« Previous | Next » 

Revision 10172

Trigger MDQ when inserting or updating metadata documents. https://projects.ecoinformatics.org/ecoinfo/issues/7171 and https://github.com/NCEAS/mdqengine/issues/73

View differences:

lib/metacat.properties
719 719
#Solr-home for the http solr server is used to store some files. It can be not really solr home.
720 720
#solr.homeDir=/var/metacat/solr-home
721 721

  
722
############# MDQ Section ###########################################
723
mdq.service.enabled=false
724
mdq.service.url=https://quality.nceas.ucsb.edu/quality/suites/arctic.data.center.suite.1/run
725
mdq.run.namespace=https://nceas.ucsb.edu/mdqe/v1#run
src/edu/ucsb/nceas/metacat/MetacatHandler.java
82 82
import org.ecoinformatics.eml.EMLParser;
83 83

  
84 84
import au.com.bytecode.opencsv.CSVWriter;
85

  
86 85
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87 86
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlForSingleFile;
88 87
import edu.ucsb.nceas.utilities.access.AccessControlInterface;
......
100 99
import edu.ucsb.nceas.metacat.event.MetacatDocumentEvent;
101 100
import edu.ucsb.nceas.metacat.event.MetacatEventService;
102 101
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
102
import edu.ucsb.nceas.metacat.mdq.MDQClient;
103 103
import edu.ucsb.nceas.metacat.properties.PropertyService;
104 104
import edu.ucsb.nceas.metacat.replication.ForceReplicationHandler;
105 105
import edu.ucsb.nceas.metacat.service.SessionService;
......
1874 1874
                    // submit for indexing
1875 1875
                    MetacatSolrIndex.getInstance().submit(sysMeta.getIdentifier(), sysMeta, null, true);
1876 1876
                    
1877
                    // queue for QC reporting
1878
                    MDQClient.submit(sysMeta);
1879
                    
1877 1880
                    // [re]index the resource map now that everything is saved
1878 1881
                    // see: https://projects.ecoinformatics.org/ecoinfo/issues/6520
1879 1882
                    Identifier potentialOreIdentifier = new Identifier();
src/edu/ucsb/nceas/metacat/dataone/MNodeService.java
155 155
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
156 156
import edu.ucsb.nceas.metacat.index.MetacatSolrEngineDescriptionHandler;
157 157
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
158
import edu.ucsb.nceas.metacat.mdq.MDQClient;
158 159
import edu.ucsb.nceas.metacat.properties.PropertyService;
159 160
import edu.ucsb.nceas.metacat.shared.MetacatUtilException;
160 161
import edu.ucsb.nceas.metacat.util.DeleteOnCloseFileInputStream;
......
572 573
            // log the update event
573 574
            EventLog.getInstance().log(request.getRemoteAddr(), request.getHeader("User-Agent"), subject.getValue(), localId, Event.UPDATE.toString());
574 575
            
576
            // queue for QC reporting
577
            MDQClient.submit(sysmeta);
578
            
575 579
            long end4 =System.currentTimeMillis();
576 580
            logMetacat.debug("MNodeService.update - the time spending on updating/saving system metadata  of the old pid "+pid.getValue()+" and the new pid "+newPid.getValue()+" and saving the log information is "+(end4- end3)+ " milli seconds.");
577 581
            
......
683 687
            throw sf;
684 688
		}
685 689
        
690
        // queue for QC reporting
691
        MDQClient.submit(sysmeta);
692
        
686 693
        // return 
687 694
		return resultPid ;
688 695
    }
src/edu/ucsb/nceas/metacat/mdq/MDQClient.java
1
package edu.ucsb.nceas.metacat.mdq;
2

  
3
import java.io.ByteArrayInputStream;
4
import java.io.ByteArrayOutputStream;
5
import java.io.InputStream;
6
import java.math.BigInteger;
7
import java.util.Calendar;
8
import java.util.Date;
9
import java.util.concurrent.ExecutorService;
10
import java.util.concurrent.Executors;
11

  
12
import org.apache.commons.io.IOUtils;
13
import org.apache.http.HttpEntity;
14
import org.apache.http.client.methods.CloseableHttpResponse;
15
import org.apache.http.client.methods.HttpPost;
16
import org.apache.http.impl.client.CloseableHttpClient;
17
import org.apache.http.impl.client.HttpClients;
18
import org.apache.log4j.Logger;
19
import org.dataone.client.types.AccessPolicyEditor;
20
import org.dataone.client.v2.formats.ObjectFormatCache;
21
import org.dataone.configuration.Settings;
22
import org.dataone.mimemultipart.SimpleMultipartEntity;
23
import org.dataone.service.types.v1.Identifier;
24
import org.dataone.service.types.v1.ObjectFormatIdentifier;
25
import org.dataone.service.types.v1.Session;
26
import org.dataone.service.types.v1.util.ChecksumUtil;
27
import org.dataone.service.types.v2.ObjectFormat;
28
import org.dataone.service.types.v2.SystemMetadata;
29
import org.dataone.service.util.TypeMarshaller;
30

  
31
import edu.ucsb.nceas.metacat.IdentifierManager;
32
import edu.ucsb.nceas.metacat.MetacatHandler;
33
import edu.ucsb.nceas.metacat.dataone.MNodeService;
34

  
35
public class MDQClient {
36
	
37
	private static boolean mdqEnabled =  Settings.getConfiguration().getBoolean("mdq.service.enabled", false);
38

  
39
	private static String mdqURL =  Settings.getConfiguration().getString("mdq.service.url", "https://quality.nceas.ucsb.edu/quality/suites/arctic.data.center.suite.1/run");
40
	
41
	private static String mdqRunNamespace = Settings.getConfiguration().getString("mdq.run.namespace", "https://nceas.ucsb.edu/mdqe/v1#run");
42

  
43
	private static Logger logMetacat = Logger.getLogger(MDQClient.class);
44
	
45
	private static ExecutorService executor = Executors.newSingleThreadExecutor();
46

  
47
	public static void submit(final SystemMetadata sysMeta) {
48
				
49
		if (!mdqEnabled) {
50
			logMetacat.info("MDQ not enabled, skipping quality check for " + sysMeta.getIdentifier().getValue());
51
			return;
52
		}
53
		
54
		// can we even run QC on this object?
55
		try {
56
			// check that it is a ME
57
			ObjectFormat objFormat = ObjectFormatCache.getInstance().getFormat(sysMeta.getFormatId());
58
			// must know what we are dealing with
59
			if (objFormat == null) {
60
				logMetacat.info("Object format not found for formatId: " + sysMeta.getFormatId());
61
				return;
62
			}
63
			// only METADATA types
64
			if (!objFormat.getFormatType().equals("METADATA")) {
65
				logMetacat.info("MDQ not applicable to non METADATA object of: " + objFormat.getFormatType());
66
				return;
67
			}
68
			// don't run QC on a QC document
69
			if (objFormat.getFormatId().getValue().equals(mdqRunNamespace)) {
70
				logMetacat.info("Cannot run MDQ on a run document");
71
				return;
72
			}
73
		} catch (Exception e) {
74
			logMetacat.error("Could not inspect object format: " +  e.getMessage(), e);
75
			return;
76
		}
77
		
78
		// run the MDQ routine in a new thread
79
		Runnable task = new Runnable() {
80
			@Override
81
			public void run() {
82
				try {
83
					InputStream run = MDQClient.run(sysMeta);
84
					logMetacat.debug("Generated MDQ run for pid: " + sysMeta.getIdentifier().getValue());
85
					Identifier id = MDQClient.saveRun(run, sysMeta);
86
					logMetacat.info("Saved MDQ run " + id.getValue());
87
				} catch (Exception e) {
88
					logMetacat.error(e.getMessage(), e);
89
				}
90
			}
91
		};
92
		executor.submit(task);	
93
	}
94
	
95
	/**
96
	 * Runs MDQ suite for object identified in SystemMetadata param
97
	 * @param sysMeta
98
	 * @return InputStream for run result (XML)
99
	 * @throws Exception
100
	 */
101
	private static InputStream run(SystemMetadata sysMeta) throws Exception {
102
		
103
		InputStream runResult = null;
104
		
105
		// get the metadata content
106
		String docid = IdentifierManager.getInstance().getLocalId(sysMeta.getIdentifier().getValue());
107
		InputStream docStream = MetacatHandler.read(docid);
108
		
109
		// Construct the REST call
110
		HttpPost post = new HttpPost(mdqURL);
111
		
112
		// add document
113
		SimpleMultipartEntity entity = new SimpleMultipartEntity();
114
		entity.addFilePart("document", docStream);
115
		
116
		// add sysMeta 
117
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
118
		TypeMarshaller.marshalTypeToOutputStream(sysMeta, baos);
119
		entity.addFilePart("systemMetadata", new ByteArrayInputStream(baos.toByteArray()));
120
		
121
		// send to service
122
		post.setEntity(entity);
123
		CloseableHttpClient client = HttpClients.createDefault();
124
		CloseableHttpResponse response = client.execute(post);
125
		
126
		// retrieve results
127
		HttpEntity reponseEntity = response.getEntity();
128
		if (reponseEntity != null) {
129
			runResult  = reponseEntity.getContent(); 
130
		}
131

  
132
		return runResult;
133
	}
134
	
135
	private static Identifier saveRun(InputStream runStream, SystemMetadata metadataSysMeta) throws Exception {
136
		MNodeService mn = MNodeService.getInstance(null);
137
		
138
		// copy the properties from the metadata sysmeta to the run sysmeta
139
		byte[] bytes = IOUtils.toByteArray(runStream);
140
		SystemMetadata sysmeta = generateSystemMetadata(bytes, metadataSysMeta);
141
		
142
		// generate an identifier for the run result
143
		Session session = new Session();
144
		session.setSubject(sysmeta.getRightsHolder());
145
		Identifier pid = mn.generateIdentifier(session, "UUID", null);
146
		sysmeta.setIdentifier(pid);
147
		
148
		// save to this repo
149
		Identifier id = mn.create(session, pid, new ByteArrayInputStream(bytes), sysmeta);
150
		
151
		return id;
152
	}
153
	
154
	private static SystemMetadata generateSystemMetadata(byte[] bytes, SystemMetadata origSysMeta) 
155
			throws Exception {
156
		
157
		SystemMetadata sysMeta = new SystemMetadata();	
158
		
159
		// format id for the run
160
		ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
161
		formatId.setValue(mdqRunNamespace);
162
		sysMeta.setFormatId(formatId);
163
		
164
		// roles
165
		sysMeta.setRightsHolder(origSysMeta.getRightsHolder());	
166
		sysMeta.setSubmitter(origSysMeta.getRightsHolder());
167
		sysMeta.setAuthoritativeMemberNode(origSysMeta.getAuthoritativeMemberNode());
168
		sysMeta.setOriginMemberNode(origSysMeta.getOriginMemberNode());
169
		
170
		// for now, make them all public for easier debugging
171
		AccessPolicyEditor accessPolicyEditor = new AccessPolicyEditor(null);
172
		accessPolicyEditor.setPublicAccess();
173
		sysMeta.setAccessPolicy(accessPolicyEditor.getAccessPolicy());
174
				
175
		// size
176
		sysMeta.setSize(BigInteger.valueOf(bytes.length));
177
		sysMeta.setChecksum(ChecksumUtil.checksum(bytes, "MD5"));
178
		sysMeta.setFileName("run.xml");
179

  
180
		// timestamps
181
		Date now = Calendar.getInstance().getTime();
182
		sysMeta.setDateSysMetadataModified(now);
183
		sysMeta.setDateUploaded(now);
184
		
185
		return sysMeta;
186
	}
187
}
0 188

  

Also available in: Unified diff