Project

General

Profile

1
/**
2
 * This program is free software; you can redistribute it and/or modify
3
 * it under the terms of the GNU General Public License as published by
4
 * the Free Software Foundation; either version 2 of the License, or
5
 * (at your option) any later version.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15
 */
16
package edu.ucsb.nceas.metacat.index.resourcemap;
17

    
18
import java.io.IOException;
19
import java.net.MalformedURLException;
20
import java.util.ArrayList;
21
import java.util.Date;
22
import java.util.HashMap;
23
import java.util.List;
24
import java.util.Map;
25

    
26
import javax.xml.parsers.ParserConfigurationException;
27
import javax.xml.xpath.XPathExpressionException;
28

    
29
import org.apache.commons.codec.EncoderException;
30
import org.apache.commons.logging.Log;
31
import org.apache.commons.logging.LogFactory;
32
import org.apache.solr.client.solrj.SolrServer;
33
import org.apache.solr.client.solrj.SolrServerException;
34
import org.apache.solr.client.solrj.response.QueryResponse;
35
import org.apache.solr.common.SolrDocument;
36
import org.apache.solr.common.params.SolrParams;
37
import org.apache.solr.schema.IndexSchema;
38
import org.apache.solr.servlet.SolrRequestParsers;
39
import org.dataone.cn.indexer.convert.SolrDateConverter;
40
import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor;
41
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
42
import org.dataone.cn.indexer.resourcemap.ResourceMap;
43
import org.dataone.cn.indexer.resourcemap.ResourceMapFactory;
44
import org.dataone.cn.indexer.solrhttp.SolrDoc;
45
import org.dataone.cn.indexer.solrhttp.SolrElementField;
46
import org.dataone.service.exceptions.NotFound;
47
import org.dataone.service.exceptions.NotImplemented;
48
import org.dataone.service.exceptions.UnsupportedType;
49
import org.dataone.service.util.DateTimeMarshaller;
50
import org.dspace.foresite.OREParserException;
51
import org.w3c.dom.Document;
52
import org.xml.sax.SAXException;
53

    
54
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
55
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
56
import edu.ucsb.nceas.metacat.index.SolrIndex;
57

    
58

    
59
/**
60
 * A solr index parser for the ResourceMap file.
61
 * The solr doc of the ResourceMap self only has the system metadata information.
62
 * The solr docs of the science metadata doc and data file have the resource map package information.
63
 */
64
public class ResourceMapSubprocessor extends AbstractDocumentSubprocessor implements IDocumentSubprocessor {
65

    
66
    private static final String QUERY ="q=id:";
67
    private static final String QUERY2="q="+SolrElementField.FIELD_RESOURCEMAP+":";
68
    private static Log log = LogFactory.getLog(SolrIndex.class);
69
    private static SolrServer solrServer =  null;
70
    static {
71
        try {
72
            solrServer = SolrServerFactory.createSolrServer();
73
        } catch (Exception e) {
74
            log.error("ResourceMapSubprocessor - can't generate the SolrServer since - "+e.getMessage());
75
        }
76
    }
77
          
78
    @Override
79
    public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs,
80
    Document doc) throws IOException, EncoderException, SAXException,
81
    XPathExpressionException, ParserConfigurationException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException {
82
        SolrDoc resourceMapDoc = docs.get(identifier);
83
        List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc);
84
        Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
85
        for (SolrDoc processedDoc : processedDocs) {
86
            processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);
87
        }
88
        return processedDocsMap;
89
    }
90

    
91
    private List<SolrDoc> processResourceMap(SolrDoc indexDocument, Document resourceMapDocument)
92
                    throws XPathExpressionException, IOException, SAXException, ParserConfigurationException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException{
93
        //ResourceMap resourceMap = new ResourceMap(resourceMapDocument);
94
        ResourceMap resourceMap = ResourceMapFactory.buildResourceMap(resourceMapDocument);
95
        List<String> documentIds = resourceMap.getAllDocumentIDs();//this list includes the resourceMap id itself.
96
        //List<SolrDoc> updateDocuments = getHttpService().getDocuments(getSolrQueryUri(), documentIds);
97
        List<SolrDoc> updateDocuments = getSolrDocs(resourceMap.getIdentifier(), documentIds);
98
        List<SolrDoc> mergedDocuments = resourceMap.mergeIndexedDocuments(updateDocuments);
99
        /*if(mergedDocuments != null) {
100
            for(SolrDoc doc : mergedDocuments) {
101
                ByteArrayOutputStream out = new ByteArrayOutputStream();
102
                doc.serialize(out, "UTF-8");
103
                String result = new String(out.toByteArray(), "UTF-8");
104
                System.out.println("after updated document===========================");
105
                System.out.println(result);
106
            }
107
        }*/
108
        mergedDocuments.add(indexDocument);
109
        return mergedDocuments;
110
    }
111
    
112
    private List<SolrDoc> getSolrDocs(String resourceMapId, List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, ResourceMapException {
113
        List<SolrDoc> list = new ArrayList<SolrDoc>();
114
        if(ids != null) {
115
            for(String id : ids) {
116
            	SolrDoc doc = getSolrDoc(id);
117
                if(doc != null) {
118
                    list.add(doc);
119
                } else if ( !id.equals(resourceMapId)) {
120
                    throw new ResourceMapException("Solr index doesn't have the information about the id "+id+" which is a component in the resource map "+resourceMapId+". Metacat-Index can't process the resource map prior to its components.");
121
                }
122
            }
123
        }
124
        return list;
125
    } 
126
    
127
    /*
128
     * Get the SolrDoc list for the list of the ids.
129
     */
130
    public static List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
131
        List<SolrDoc> list = new ArrayList<SolrDoc>();
132
        if(ids != null) {
133
            for(String id : ids) {
134
            	SolrDoc doc = getSolrDoc(id);
135
                if(doc != null) {
136
                    list.add(doc);
137
                }
138
            }
139
        }
140
        return list;
141
    }
142
    
143
	/*
144
	 * Get the SolrDoc for the specified id
145
	 */
146
	public static SolrDoc getSolrDoc(String id) throws SolrServerException,
147
			IOException, ParserConfigurationException, SAXException,
148
			XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
149
	    int targetIndex = 0;
150
		SolrDoc doc = null;
151
		String query = QUERY + "\"" + id + "\"";
152
	    List<SolrDoc> list = getDocumentsByQuery(query);
153
	    if(list != null && !list.isEmpty()) {
154
	        doc = list.get(targetIndex);
155
	    }
156
		return doc;
157
	}
158
	
159
	/**
160
	 * Get a list of solr documents which's resourcemap field matches the given value.
161
	 * @param resourceMapId - the target resource map id
162
	 * @return the list of solr document 
163
	 * @throws MalformedURLException
164
	 * @throws UnsupportedType
165
	 * @throws NotFound
166
	 * @throws SolrServerException
167
	 * @throws ParserConfigurationException
168
	 * @throws IOException
169
	 * @throws SAXException
170
	 */
171
	public static List<SolrDoc> getDocumentsByResourceMap(String resourceMapId) throws MalformedURLException, 
172
	            UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, IOException, SAXException {
173
	    String query = QUERY2 + "\"" + resourceMapId + "\"";
174
	    return getDocumentsByQuery(query);
175
	}
176
	
177
	/**
178
	 * Get a list of slor docs which match the query.
179
	 * @param query - a string of a query
180
	 * @return
181
	 * @throws SolrServerException
182
	 * @throws MalformedURLException
183
	 * @throws UnsupportedType
184
	 * @throws NotFound
185
	 * @throws ParserConfigurationException
186
	 * @throws IOException
187
	 * @throws SAXException
188
	 */
189
	public static List<SolrDoc> getDocumentsByQuery(String query) throws SolrServerException, MalformedURLException, UnsupportedType, 
190
	                                                                NotFound, ParserConfigurationException, IOException, SAXException {
191
	    List<SolrDoc> docs = new ArrayList<SolrDoc>();
192
	    if (solrServer != null && query != null && !query.trim().equals("")) {
193
            SolrParams solrParams = SolrRequestParsers.parseQueryString(query);
194
            QueryResponse qr = solrServer.query(solrParams);
195
            if (qr != null && qr.getResults() != null) {
196
                for(int i=0; i<qr.getResults().size(); i++) {
197
                    SolrDocument orig = qr.getResults().get(i);
198
                    SolrDoc doc = new SolrDoc();
199
                    IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
200
                    for (String fieldName : orig.getFieldNames()) {
201
                        // don't transfer the copyTo fields, otherwise there are errors
202
                        if (indexSchema.isCopyFieldTarget(indexSchema.getField(fieldName))) {
203
                            continue;
204
                        }
205
                        for (Object value : orig.getFieldValues(fieldName)) {
206
                            String stringValue = value.toString();
207
                            // special handling for dates in ISO 8601
208
                            if (value instanceof Date) {
209
                                stringValue = DateTimeMarshaller.serializeDateToUTC((Date) value);
210
                                SolrDateConverter converter = new SolrDateConverter();
211
                                stringValue = converter.convert(stringValue);
212
                            }
213
                            SolrElementField field = new SolrElementField(fieldName, stringValue);
214
                            log.debug("Adding field: " + fieldName);
215
                            doc.addField(field);
216
                        }
217
                    }
218
                    docs.add(doc);
219
                }
220
                
221
            }
222
	    }
223
	    return docs;
224
	}
225

    
226

    
227
}
(2-2/2)