Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index.resourcemap;
28

    
29
import java.io.ByteArrayInputStream;
30
import java.io.ByteArrayOutputStream;
31
import java.io.IOException;
32
import java.io.InputStream;
33
import java.io.StringWriter;
34
import java.io.Writer;
35
import java.net.MalformedURLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.List;
41
import java.util.Map;
42
import java.util.Set;
43

    
44
import javax.xml.parsers.DocumentBuilder;
45
import javax.xml.parsers.DocumentBuilderFactory;
46
import javax.xml.parsers.ParserConfigurationException;
47
import javax.xml.xpath.XPathConstants;
48
import javax.xml.xpath.XPathExpressionException;
49
import javax.xml.xpath.XPathFactory;
50

    
51
import org.apache.commons.codec.EncoderException;
52
import org.apache.commons.logging.Log;
53
import org.apache.commons.logging.LogFactory;
54
import org.apache.solr.client.solrj.SolrServer;
55
import org.apache.solr.client.solrj.SolrServerException;
56
import org.apache.solr.client.solrj.response.QueryResponse;
57
import org.apache.solr.common.SolrDocument;
58
import org.apache.solr.common.SolrDocumentList;
59
import org.apache.solr.common.params.SolrParams;
60
import org.apache.solr.core.CoreContainer;
61
import org.apache.solr.core.SolrCore;
62
import org.apache.solr.request.LocalSolrQueryRequest;
63
import org.apache.solr.response.QueryResponseWriter;
64
import org.apache.solr.response.SolrQueryResponse;
65
import org.apache.solr.schema.DateField;
66
import org.apache.solr.schema.IndexSchema;
67
import org.apache.solr.schema.SchemaField;
68
import org.apache.solr.servlet.SolrRequestParsers;
69
import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor;
70
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
71
import org.dataone.cn.indexer.resourcemap.ResourceMap;
72
import org.dataone.cn.indexer.solrhttp.SolrDoc;
73
import org.dataone.cn.indexer.solrhttp.SolrElementField;
74
import org.dataone.service.exceptions.NotFound;
75
import org.dataone.service.exceptions.NotImplemented;
76
import org.dataone.service.exceptions.UnsupportedType;
77
import org.dataone.service.types.v1.Subject;
78
import org.w3c.dom.Document;
79
import org.w3c.dom.Element;
80
import org.w3c.dom.NodeList;
81
import org.xml.sax.SAXException;
82

    
83
import edu.ucsb.nceas.metacat.common.query.SolrQueryResponseTransformer;
84
import edu.ucsb.nceas.metacat.common.query.SolrQueryResponseWriterFactory;
85
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
86
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
87
import edu.ucsb.nceas.metacat.index.SolrIndex;
88

    
89

    
90
/**
91
 * A solr index parser for the ResourceMap file.
92
 * The solr doc of the ResourceMap self only has the system metadata information.
93
 * The solr docs of the science metadata doc and data file have the resource map package information.
94
 */
95
public class ResourceMapSubprocessor extends AbstractDocumentSubprocessor implements IDocumentSubprocessor {
96

    
97
    private static final String QUERY ="q=id:";
98
    private static Log log = LogFactory.getLog(SolrIndex.class);
99
    private static SolrServer solrServer =  null;
100
    private static SolrCore solrCore = null;
101
    private static CoreContainer solrCoreContainer = null;
102
    static {
103
        try {
104
            solrServer = SolrServerFactory.createSolrServer();
105
            CoreContainer solrCoreContainer = SolrServerFactory.getCoreContainer();
106
            String coreName = SolrServerFactory.getCollectionName();
107
            solrCore = solrCoreContainer.getCore(coreName);
108
        } catch (Exception e) {
109
            log.error("ResourceMapSubprocessor - can't generate the SolrServer since - "+e.getMessage());
110
        }
111
    }
112
          
113
    @Override
114
    public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs,
115
    Document doc) throws IOException, EncoderException, SAXException,
116
    XPathExpressionException, ParserConfigurationException, SolrServerException, NotImplemented, NotFound, UnsupportedType {
117
        SolrDoc resourceMapDoc = docs.get(identifier);
118
        List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc);
119
        Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
120
        for (SolrDoc processedDoc : processedDocs) {
121
            processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);
122
        }
123
        return processedDocsMap;
124
    }
125

    
126
    private List<SolrDoc> processResourceMap(SolrDoc indexDocument, Document resourceMapDocument)
127
                    throws XPathExpressionException, IOException, SAXException, ParserConfigurationException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
128
        ResourceMap resourceMap = new ResourceMap(resourceMapDocument);
129
        List<String> documentIds = resourceMap.getAllDocumentIDs();
130
        //List<SolrDoc> updateDocuments = getHttpService().getDocuments(getSolrQueryUri(), documentIds);
131
        List<SolrDoc> updateDocuments = getSolrDocs(documentIds);
132
        List<SolrDoc> mergedDocuments = resourceMap.mergeIndexedDocuments(updateDocuments);
133
        /*if(mergedDocuments != null) {
134
            for(SolrDoc doc : mergedDocuments) {
135
                ByteArrayOutputStream out = new ByteArrayOutputStream();
136
                doc.serialize(out, "UTF-8");
137
                String result = new String(out.toByteArray(), "UTF-8");
138
                System.out.println("after updated document===========================");
139
                System.out.println(result);
140
            }
141
        }*/
142
        mergedDocuments.add(indexDocument);
143
        return mergedDocuments;
144
    }
145
    
146
    /*
147
     * Get the SolrDoc list for the list of the ids.
148
     */
149
    public static List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
150
        List<SolrDoc> list = new ArrayList<SolrDoc>();
151
        if(ids != null) {
152
            for(String id : ids) {
153
                SolrDoc doc = getSolrDoc(id);
154
                if(doc != null) {
155
                    list.add(doc);
156
                }
157
            }
158
        }
159
        return list;
160
    }
161
    
162
    /*
163
     * Get the SolrDoc for the specified id 
164
     */
165
    public static SolrDoc getSolrDoc(String id) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
166
        SolrDoc solrDoc = null;
167
        if(solrServer != null) {
168
           String query = QUERY+"\""+id+"\"";
169
           SolrParams solrParams = SolrRequestParsers.parseQueryString(query);
170
           Set<Subject>subjects = null;//when subjects are null, there will not be any access rules.
171
           InputStream response = SolrQueryServiceController.getInstance().query(solrParams, subjects);
172
           solrDoc = transformQueryResponseToSolrDoc(solrParams, response);
173
           
174
           /*if(solrDoc != null) {
175
               ByteArrayOutputStream out = new ByteArrayOutputStream();
176
               solrDoc.serialize(out, "UTF-8");
177
               String result = new String(out.toByteArray(), "UTF-8");
178
               System.out.println("need to be updated document ===========================");
179
               System.out.println(result);
180
           }*/
181
           
182
        }
183
        return solrDoc;
184
    }
185
    
186
    /*
187
     * Transform a Solr QueryReponse to a SolrDoc. The QueryReponse contains a list of
188
     * SolrDocuments. This method will transform the first SolrDocuments (in the Solr lib) to
189
     * the SolrDoc (in the d1_cn_index_processor lib).
190
     * @param reponse
191
     * @return
192
     */
193
    private static SolrDoc transformQueryResponseToSolrDoc(SolrParams solrParams, InputStream response) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, UnsupportedType, NotFound {
194
        SolrDoc solrDoc = null;
195
        if(response != null) {
196
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
197
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
198
            Document doc = dBuilder.parse(response);
199
            solrDoc = parseResults(doc);
200
        }
201
        return solrDoc;
202
    }
203
    
204
   
205
    
206
    /*
207
     * Parse the query result document. This method only choose the first one from a list.
208
     */
209
    private static SolrDoc parseResults(Document document) throws XPathExpressionException, MalformedURLException, UnsupportedType, NotFound, ParserConfigurationException, IOException, SAXException {
210
        SolrDoc solrDoc = null;
211
        NodeList nodeList = (NodeList) XPathFactory.newInstance().newXPath()
212
                .evaluate("/response/result/doc", document, XPathConstants.NODESET);
213
        if(nodeList != null && nodeList.getLength() >0) {
214
            Element docElement = (Element) nodeList.item(0);
215
            solrDoc = parseDoc(docElement);
216
        }
217
        return solrDoc;
218
    }
219

    
220
    
221
    /*
222
     * Parse an element
223
     */
224
    private static SolrDoc parseDoc(Element docElement) throws MalformedURLException, UnsupportedType, NotFound, ParserConfigurationException, IOException, SAXException {
225
        List<String> validSolrFieldNames = SolrQueryServiceController.getInstance().getValidSchemaFields();
226
        SolrDoc doc = new SolrDoc();
227
        doc.LoadFromElement(docElement, validSolrFieldNames);
228
        return doc;
229
    }
230
    
231
    
232
    /**
233
     * Get the valid schema fields from the solr server.
234
     * @return
235
     */
236
    /*private static List<String> getValidSchemaField() {
237
        List<String> validSolrFieldNames = new ArrayList<String>();
238
        IndexSchema schema = solrCore.getSchema();
239
        Map<String, SchemaField> fieldMap = schema.getFields();
240
        Set<String> fieldNames = fieldMap.keySet();
241
        for(String fieldName : fieldNames) {
242
            SchemaField field = fieldMap.get(fieldName);
243
            //remove the field which is the target field of a CopyField.
244
            if(field != null && !schema.isCopyFieldTarget(field)) {
245
                 validSolrFieldNames.add(fieldName);
246
            }
247
        }
248
        //System.out.println("the valid file name is\n"+validSolrFieldNames);
249
        return validSolrFieldNames;
250
    }*/
251

    
252
}
    (1-1/1)