Project

General

Profile

1
/**
2
 * This program is free software; you can redistribute it and/or modify
3
 * it under the terms of the GNU General Public License as published by
4
 * the Free Software Foundation; either version 2 of the License, or
5
 * (at your option) any later version.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15
 */
16
package edu.ucsb.nceas.metacat.index.resourcemap;
17

    
18
import java.io.ByteArrayInputStream;
19
import java.io.ByteArrayOutputStream;
20
import java.io.IOException;
21
import java.io.InputStream;
22
import java.io.StringWriter;
23
import java.io.Writer;
24
import java.net.MalformedURLException;
25
import java.util.ArrayList;
26
import java.util.Collections;
27
import java.util.Date;
28
import java.util.HashMap;
29
import java.util.List;
30
import java.util.Map;
31
import java.util.Set;
32

    
33
import javax.xml.parsers.DocumentBuilder;
34
import javax.xml.parsers.DocumentBuilderFactory;
35
import javax.xml.parsers.ParserConfigurationException;
36
import javax.xml.xpath.XPathConstants;
37
import javax.xml.xpath.XPathExpressionException;
38
import javax.xml.xpath.XPathFactory;
39

    
40
import org.apache.commons.codec.EncoderException;
41
import org.apache.commons.logging.Log;
42
import org.apache.commons.logging.LogFactory;
43
import org.apache.solr.client.solrj.SolrServer;
44
import org.apache.solr.client.solrj.SolrServerException;
45
import org.apache.solr.client.solrj.response.QueryResponse;
46
import org.apache.solr.common.SolrDocument;
47
import org.apache.solr.common.SolrDocumentList;
48
import org.apache.solr.common.params.SolrParams;
49
import org.apache.solr.core.CoreContainer;
50
import org.apache.solr.core.SolrCore;
51
import org.apache.solr.request.LocalSolrQueryRequest;
52
import org.apache.solr.response.QueryResponseWriter;
53
import org.apache.solr.response.SolrQueryResponse;
54
import org.apache.solr.schema.DateField;
55
import org.apache.solr.schema.IndexSchema;
56
import org.apache.solr.schema.SchemaField;
57
import org.apache.solr.servlet.SolrRequestParsers;
58
import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor;
59
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
60
import org.dataone.cn.indexer.resourcemap.ResourceMap;
61
import org.dataone.cn.indexer.solrhttp.SolrDoc;
62
import org.dataone.cn.indexer.solrhttp.SolrElementField;
63
import org.dataone.service.exceptions.NotFound;
64
import org.dataone.service.exceptions.NotImplemented;
65
import org.dataone.service.exceptions.UnsupportedType;
66
import org.dataone.service.types.v1.Subject;
67
import org.dspace.foresite.OREParserException;
68
import org.w3c.dom.Document;
69
import org.w3c.dom.Element;
70
import org.w3c.dom.NodeList;
71
import org.xml.sax.SAXException;
72

    
73
import edu.ucsb.nceas.metacat.common.query.SolrQueryResponseTransformer;
74
import edu.ucsb.nceas.metacat.common.query.SolrQueryResponseWriterFactory;
75
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
76
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
77
import edu.ucsb.nceas.metacat.index.SolrIndex;
78

    
79

    
80
/**
81
 * A solr index parser for the ResourceMap file.
82
 * The solr doc of the ResourceMap self only has the system metadata information.
83
 * The solr docs of the science metadata doc and data file have the resource map package information.
84
 */
85
public class ResourceMapSubprocessor extends AbstractDocumentSubprocessor implements IDocumentSubprocessor {
86

    
87
    private static final String QUERY ="q=id:";
88
    private static Log log = LogFactory.getLog(SolrIndex.class);
89
    private static SolrServer solrServer =  null;
90
    private static SolrCore solrCore = null;
91
    private static CoreContainer solrCoreContainer = null;
92
    static {
93
        try {
94
            solrServer = SolrServerFactory.createSolrServer();
95
            CoreContainer solrCoreContainer = SolrServerFactory.getCoreContainer();
96
            String coreName = SolrServerFactory.getCollectionName();
97
            solrCore = solrCoreContainer.getCore(coreName);
98
        } catch (Exception e) {
99
            log.error("ResourceMapSubprocessor - can't generate the SolrServer since - "+e.getMessage());
100
        }
101
    }
102
          
103
    @Override
104
    public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs,
105
    Document doc) throws IOException, EncoderException, SAXException,
106
    XPathExpressionException, ParserConfigurationException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException {
107
        SolrDoc resourceMapDoc = docs.get(identifier);
108
        List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc);
109
        Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
110
        for (SolrDoc processedDoc : processedDocs) {
111
            processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);
112
        }
113
        return processedDocsMap;
114
    }
115

    
116
    private List<SolrDoc> processResourceMap(SolrDoc indexDocument, Document resourceMapDocument)
117
                    throws XPathExpressionException, IOException, SAXException, ParserConfigurationException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException{
118
        //ResourceMap resourceMap = new ResourceMap(resourceMapDocument);
119
        ResourceMap resourceMap = new ResourceMap(resourceMapDocument);
120
        List<String> documentIds = resourceMap.getAllDocumentIDs();//this list includes the resourceMap id itself.
121
        //List<SolrDoc> updateDocuments = getHttpService().getDocuments(getSolrQueryUri(), documentIds);
122
        List<SolrDoc> updateDocuments = getSolrDocs(resourceMap.getIdentifier(), documentIds);
123
        List<SolrDoc> mergedDocuments = resourceMap.mergeIndexedDocuments(updateDocuments);
124
        /*if(mergedDocuments != null) {
125
            for(SolrDoc doc : mergedDocuments) {
126
                ByteArrayOutputStream out = new ByteArrayOutputStream();
127
                doc.serialize(out, "UTF-8");
128
                String result = new String(out.toByteArray(), "UTF-8");
129
                System.out.println("after updated document===========================");
130
                System.out.println(result);
131
            }
132
        }*/
133
        mergedDocuments.add(indexDocument);
134
        return mergedDocuments;
135
    }
136
    
137
    private List<SolrDoc> getSolrDocs(String resourceMapId, List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, ResourceMapException {
138
        List<SolrDoc> list = new ArrayList<SolrDoc>();
139
        if(ids != null) {
140
            for(String id : ids) {
141
                SolrDoc doc = getSolrDoc(id);
142
                if(doc != null) {
143
                    list.add(doc);
144
                } else if ( !id.equals(resourceMapId)) {
145
                    throw new ResourceMapException("Solr index doesn't have the information about the id "+id+" which is a component in the resource map "+resourceMapId+". Metacat-Index can't process the resource map prior to its components.");
146
                }
147
            }
148
        }
149
        return list;
150
    } 
151
    
152
    /*
153
     * Get the SolrDoc list for the list of the ids.
154
     */
155
    public static List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
156
        List<SolrDoc> list = new ArrayList<SolrDoc>();
157
        if(ids != null) {
158
            for(String id : ids) {
159
                SolrDoc doc = getSolrDoc(id);
160
                if(doc != null) {
161
                    list.add(doc);
162
                }
163
            }
164
        }
165
        return list;
166
    }
167
    
168
    /*
169
     * Get the SolrDoc for the specified id 
170
     */
171
    public static SolrDoc getSolrDoc(String id) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
172
        SolrDoc solrDoc = null;
173
        if(solrServer != null) {
174
           String query = QUERY+"\""+id+"\"";
175
           SolrParams solrParams = SolrRequestParsers.parseQueryString(query);
176
           Set<Subject>subjects = null;//when subjects are null, there will not be any access rules.
177
           InputStream response = SolrQueryServiceController.getInstance().query(solrParams, subjects);
178
           solrDoc = transformQueryResponseToSolrDoc(solrParams, response);
179
           
180
           /*if(solrDoc != null) {
181
               ByteArrayOutputStream out = new ByteArrayOutputStream();
182
               solrDoc.serialize(out, "UTF-8");
183
               String result = new String(out.toByteArray(), "UTF-8");
184
               System.out.println("need to be updated document ===========================");
185
               System.out.println(result);
186
           }*/
187
           
188
        }
189
        return solrDoc;
190
    }
191
    
192
    /*
193
     * Transform a Solr QueryReponse to a SolrDoc. The QueryReponse contains a list of
194
     * SolrDocuments. This method will transform the first SolrDocuments (in the Solr lib) to
195
     * the SolrDoc (in the d1_cn_index_processor lib).
196
     * @param reponse
197
     * @return
198
     */
199
    private static SolrDoc transformQueryResponseToSolrDoc(SolrParams solrParams, InputStream response) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, UnsupportedType, NotFound {
200
        SolrDoc solrDoc = null;
201
        if(response != null) {
202
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
203
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
204
            Document doc = dBuilder.parse(response);
205
            solrDoc = parseResults(doc);
206
        }
207
        return solrDoc;
208
    }
209
    
210
   
211
    
212
    /*
213
     * Parse the query result document. This method only choose the first one from a list.
214
     */
215
    private static SolrDoc parseResults(Document document) throws XPathExpressionException, MalformedURLException, UnsupportedType, NotFound, ParserConfigurationException, IOException, SAXException {
216
        SolrDoc solrDoc = null;
217
        NodeList nodeList = (NodeList) XPathFactory.newInstance().newXPath()
218
                .evaluate("/response/result/doc", document, XPathConstants.NODESET);
219
        if(nodeList != null && nodeList.getLength() >0) {
220
            Element docElement = (Element) nodeList.item(0);
221
            solrDoc = parseDoc(docElement);
222
        }
223
        return solrDoc;
224
    }
225

    
226
    
227
    /*
228
     * Parse an element
229
     */
230
    private static SolrDoc parseDoc(Element docElement) throws MalformedURLException, UnsupportedType, NotFound, ParserConfigurationException, IOException, SAXException {
231
        List<String> validSolrFieldNames = SolrQueryServiceController.getInstance().getValidSchemaFields();
232
        SolrDoc doc = new SolrDoc();
233
        doc.LoadFromElement(docElement, validSolrFieldNames);
234
        return doc;
235
    }
236
    
237
    
238
    /**
239
     * Get the valid schema fields from the solr server.
240
     * @return
241
     */
242
    /*private static List<String> getValidSchemaField() {
243
        List<String> validSolrFieldNames = new ArrayList<String>();
244
        IndexSchema schema = solrCore.getSchema();
245
        Map<String, SchemaField> fieldMap = schema.getFields();
246
        Set<String> fieldNames = fieldMap.keySet();
247
        for(String fieldName : fieldNames) {
248
            SchemaField field = fieldMap.get(fieldName);
249
            //remove the field which is the target field of a CopyField.
250
            if(field != null && !schema.isCopyFieldTarget(field)) {
251
                 validSolrFieldNames.add(fieldName);
252
            }
253
        }
254
        //System.out.println("the valid file name is\n"+validSolrFieldNames);
255
        return validSolrFieldNames;
256
    }*/
257

    
258
}
(2-2/2)