Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index.resourcemap;
28

    
29
import java.io.ByteArrayInputStream;
30
import java.io.ByteArrayOutputStream;
31
import java.io.IOException;
32
import java.io.InputStream;
33
import java.io.StringWriter;
34
import java.io.Writer;
35
import java.net.MalformedURLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.List;
41
import java.util.Map;
42
import java.util.Set;
43

    
44
import javax.xml.parsers.DocumentBuilder;
45
import javax.xml.parsers.DocumentBuilderFactory;
46
import javax.xml.parsers.ParserConfigurationException;
47
import javax.xml.xpath.XPathConstants;
48
import javax.xml.xpath.XPathExpressionException;
49
import javax.xml.xpath.XPathFactory;
50

    
51
import org.apache.commons.codec.EncoderException;
52
import org.apache.commons.logging.Log;
53
import org.apache.commons.logging.LogFactory;
54
import org.apache.solr.client.solrj.SolrServer;
55
import org.apache.solr.client.solrj.SolrServerException;
56
import org.apache.solr.client.solrj.response.QueryResponse;
57
import org.apache.solr.common.SolrDocument;
58
import org.apache.solr.common.SolrDocumentList;
59
import org.apache.solr.common.params.SolrParams;
60
import org.apache.solr.core.CoreContainer;
61
import org.apache.solr.core.SolrCore;
62
import org.apache.solr.request.LocalSolrQueryRequest;
63
import org.apache.solr.response.QueryResponseWriter;
64
import org.apache.solr.response.SolrQueryResponse;
65
import org.apache.solr.schema.DateField;
66
import org.apache.solr.schema.IndexSchema;
67
import org.apache.solr.schema.SchemaField;
68
import org.apache.solr.servlet.SolrRequestParsers;
69
import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor;
70
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
71
import org.dataone.cn.indexer.resourcemap.ResourceMap;
72
import org.dataone.cn.indexer.solrhttp.SolrDoc;
73
import org.dataone.cn.indexer.solrhttp.SolrElementField;
74
import org.dataone.service.exceptions.NotFound;
75
import org.dataone.service.exceptions.NotImplemented;
76
import org.dataone.service.exceptions.UnsupportedType;
77
import org.dataone.service.types.v1.Subject;
78
import org.w3c.dom.Document;
79
import org.w3c.dom.Element;
80
import org.w3c.dom.NodeList;
81
import org.xml.sax.SAXException;
82

    
83
import edu.ucsb.nceas.metacat.common.query.SolrQueryResponseTransformer;
84
import edu.ucsb.nceas.metacat.common.query.SolrQueryResponseWriterFactory;
85
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
86
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
87
import edu.ucsb.nceas.metacat.index.SolrIndex;
88

    
89

    
90
/**
91
 * A solr index parser for the ResourceMap file.
92
 * The solr doc of the ResourceMap self only has the system metadata information.
93
 * The solr docs of the science metadata doc and data file have the resource map package information.
94
 */
95
public class ResourceMapSubprocessor extends AbstractDocumentSubprocessor implements IDocumentSubprocessor {
96

    
97
    private static final String QUERY ="q=id:";
98
    private static Log log = LogFactory.getLog(SolrIndex.class);
99
    private static SolrServer solrServer =  null;
100
    private static SolrCore solrCore = null;
101
    private static CoreContainer solrCoreContainer = null;
102
    static {
103
        try {
104
            solrServer = SolrServerFactory.createSolrServer();
105
            CoreContainer solrCoreContainer = SolrServerFactory.getCoreContainer();
106
            String coreName = SolrServerFactory.getCollectionName();
107
            solrCore = solrCoreContainer.getCore(coreName);
108
        } catch (Exception e) {
109
            log.error("ResourceMapSubprocessor - can't generate the SolrServer since - "+e.getMessage());
110
        }
111
    }
112
          
113
    @Override
114
    public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs,
115
    Document doc) throws IOException, EncoderException, SAXException,
116
    XPathExpressionException, ParserConfigurationException, SolrServerException, NotImplemented, NotFound, UnsupportedType {
117
        SolrDoc resourceMapDoc = docs.get(identifier);
118
        List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc);
119
        Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
120
        for (SolrDoc processedDoc : processedDocs) {
121
            processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);
122
        }
123
        return processedDocsMap;
124
    }
125

    
126
    private List<SolrDoc> processResourceMap(SolrDoc indexDocument, Document resourceMapDocument)
127
                    throws XPathExpressionException, IOException, SAXException, ParserConfigurationException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
128
        ResourceMap resourceMap = new ResourceMap(resourceMapDocument);
129
        List<String> documentIds = resourceMap.getAllDocumentIDs();//this list includes the resourceMap id itself.
130
        //List<SolrDoc> updateDocuments = getHttpService().getDocuments(getSolrQueryUri(), documentIds);
131
        List<SolrDoc> updateDocuments = getSolrDocs(resourceMap.getIdentifier(), documentIds);
132
        List<SolrDoc> mergedDocuments = resourceMap.mergeIndexedDocuments(updateDocuments);
133
        /*if(mergedDocuments != null) {
134
            for(SolrDoc doc : mergedDocuments) {
135
                ByteArrayOutputStream out = new ByteArrayOutputStream();
136
                doc.serialize(out, "UTF-8");
137
                String result = new String(out.toByteArray(), "UTF-8");
138
                System.out.println("after updated document===========================");
139
                System.out.println(result);
140
            }
141
        }*/
142
        mergedDocuments.add(indexDocument);
143
        return mergedDocuments;
144
    }
145
    
146
    private List<SolrDoc> getSolrDocs(String resourceMapId, List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
147
        List<SolrDoc> list = new ArrayList<SolrDoc>();
148
        if(ids != null) {
149
            for(String id : ids) {
150
                SolrDoc doc = getSolrDoc(id);
151
                if(doc != null) {
152
                    list.add(doc);
153
                } else if ( !id.equals(resourceMapId)) {
154
                    throw new SolrServerException("Solr index doesn't have the information about the id "+id+" which is a component in the resource map "+resourceMapId+". Metacat-Index can't process the resource map prior to its components.");
155
                }
156
            }
157
        }
158
        return list;
159
    } 
160
    
161
    /*
162
     * Get the SolrDoc list for the list of the ids.
163
     */
164
    public static List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
165
        List<SolrDoc> list = new ArrayList<SolrDoc>();
166
        if(ids != null) {
167
            for(String id : ids) {
168
                SolrDoc doc = getSolrDoc(id);
169
                if(doc != null) {
170
                    list.add(doc);
171
                }
172
            }
173
        }
174
        return list;
175
    }
176
    
177
    /*
178
     * Get the SolrDoc for the specified id 
179
     */
180
    public static SolrDoc getSolrDoc(String id) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
181
        SolrDoc solrDoc = null;
182
        if(solrServer != null) {
183
           String query = QUERY+"\""+id+"\"";
184
           SolrParams solrParams = SolrRequestParsers.parseQueryString(query);
185
           Set<Subject>subjects = null;//when subjects are null, there will not be any access rules.
186
           InputStream response = SolrQueryServiceController.getInstance().query(solrParams, subjects);
187
           solrDoc = transformQueryResponseToSolrDoc(solrParams, response);
188
           
189
           /*if(solrDoc != null) {
190
               ByteArrayOutputStream out = new ByteArrayOutputStream();
191
               solrDoc.serialize(out, "UTF-8");
192
               String result = new String(out.toByteArray(), "UTF-8");
193
               System.out.println("need to be updated document ===========================");
194
               System.out.println(result);
195
           }*/
196
           
197
        }
198
        return solrDoc;
199
    }
200
    
201
    /*
202
     * Transform a Solr QueryReponse to a SolrDoc. The QueryReponse contains a list of
203
     * SolrDocuments. This method will transform the first SolrDocuments (in the Solr lib) to
204
     * the SolrDoc (in the d1_cn_index_processor lib).
205
     * @param reponse
206
     * @return
207
     */
208
    private static SolrDoc transformQueryResponseToSolrDoc(SolrParams solrParams, InputStream response) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, UnsupportedType, NotFound {
209
        SolrDoc solrDoc = null;
210
        if(response != null) {
211
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
212
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
213
            Document doc = dBuilder.parse(response);
214
            solrDoc = parseResults(doc);
215
        }
216
        return solrDoc;
217
    }
218
    
219
   
220
    
221
    /*
222
     * Parse the query result document. This method only choose the first one from a list.
223
     */
224
    private static SolrDoc parseResults(Document document) throws XPathExpressionException, MalformedURLException, UnsupportedType, NotFound, ParserConfigurationException, IOException, SAXException {
225
        SolrDoc solrDoc = null;
226
        NodeList nodeList = (NodeList) XPathFactory.newInstance().newXPath()
227
                .evaluate("/response/result/doc", document, XPathConstants.NODESET);
228
        if(nodeList != null && nodeList.getLength() >0) {
229
            Element docElement = (Element) nodeList.item(0);
230
            solrDoc = parseDoc(docElement);
231
        }
232
        return solrDoc;
233
    }
234

    
235
    
236
    /*
237
     * Parse an element
238
     */
239
    private static SolrDoc parseDoc(Element docElement) throws MalformedURLException, UnsupportedType, NotFound, ParserConfigurationException, IOException, SAXException {
240
        List<String> validSolrFieldNames = SolrQueryServiceController.getInstance().getValidSchemaFields();
241
        SolrDoc doc = new SolrDoc();
242
        doc.LoadFromElement(docElement, validSolrFieldNames);
243
        return doc;
244
    }
245
    
246
    
247
    /**
248
     * Get the valid schema fields from the solr server.
249
     * @return
250
     */
251
    /*private static List<String> getValidSchemaField() {
252
        List<String> validSolrFieldNames = new ArrayList<String>();
253
        IndexSchema schema = solrCore.getSchema();
254
        Map<String, SchemaField> fieldMap = schema.getFields();
255
        Set<String> fieldNames = fieldMap.keySet();
256
        for(String fieldName : fieldNames) {
257
            SchemaField field = fieldMap.get(fieldName);
258
            //remove the field which is the target field of a CopyField.
259
            if(field != null && !schema.isCopyFieldTarget(field)) {
260
                 validSolrFieldNames.add(fieldName);
261
            }
262
        }
263
        //System.out.println("the valid file name is\n"+validSolrFieldNames);
264
        return validSolrFieldNames;
265
    }*/
266

    
267
}
    (1-1/1)