Project

General

Profile

1
/**
2
 * This program is free software; you can redistribute it and/or modify
3
 * it under the terms of the GNU General Public License as published by
4
 * the Free Software Foundation; either version 2 of the License, or
5
 * (at your option) any later version.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15
 */
16
package edu.ucsb.nceas.metacat.index.resourcemap;
17

    
18
import java.io.IOException;
19
import java.io.InputStream;
20
import java.net.MalformedURLException;
21
import java.util.ArrayList;
22
import java.util.Date;
23
import java.util.HashMap;
24
import java.util.List;
25
import java.util.Map;
26

    
27
import javax.xml.parsers.ParserConfigurationException;
28
import javax.xml.xpath.XPathExpressionException;
29

    
30
import org.apache.commons.codec.EncoderException;
31
import org.apache.commons.logging.Log;
32
import org.apache.commons.logging.LogFactory;
33
import org.apache.solr.client.solrj.SolrServer;
34
import org.apache.solr.client.solrj.SolrServerException;
35
import org.apache.solr.client.solrj.response.QueryResponse;
36
import org.apache.solr.common.SolrDocument;
37
import org.apache.solr.common.params.SolrParams;
38
import org.apache.solr.schema.IndexSchema;
39
import org.apache.solr.servlet.SolrRequestParsers;
40
import org.dataone.cn.indexer.XmlDocumentUtility;
41
import org.dataone.cn.indexer.convert.SolrDateConverter;
42
import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor;
43
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
44
import org.dataone.cn.indexer.resourcemap.ResourceMap;
45
import org.dataone.cn.indexer.resourcemap.ResourceMapFactory;
46
import org.dataone.cn.indexer.solrhttp.SolrDoc;
47
import org.dataone.cn.indexer.solrhttp.SolrElementField;
48
import org.dataone.service.exceptions.NotFound;
49
import org.dataone.service.exceptions.NotImplemented;
50
import org.dataone.service.exceptions.UnsupportedType;
51
import org.dataone.service.util.DateTimeMarshaller;
52
import org.dspace.foresite.OREParserException;
53
import org.w3c.dom.Document;
54
import org.xml.sax.SAXException;
55

    
56
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
57
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
58
import edu.ucsb.nceas.metacat.index.SolrIndex;
59

    
60

    
61
/**
62
 * A solr index parser for the ResourceMap file.
63
 * The solr doc of the ResourceMap self only has the system metadata information.
64
 * The solr docs of the science metadata doc and data file have the resource map package information.
65
 */
66
public class ResourceMapSubprocessor extends BaseXPathDocumentSubprocessor implements IDocumentSubprocessor {
67

    
68
    private static final String QUERY ="q=id:";
69
    private static final String QUERY2="q="+SolrElementField.FIELD_RESOURCEMAP+":";
70
    private static Log log = LogFactory.getLog(SolrIndex.class);
71
    private static SolrServer solrServer =  null;
72
    static {
73
        try {
74
            solrServer = SolrServerFactory.createSolrServer();
75
        } catch (Exception e) {
76
            log.error("ResourceMapSubprocessor - can't generate the SolrServer since - "+e.getMessage());
77
        }
78
    }
79
          
80
    @Override
81
    public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs,
82
    InputStream is) throws IOException, EncoderException, SAXException,
83
    XPathExpressionException, ParserConfigurationException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException {
84
        SolrDoc resourceMapDoc = docs.get(identifier);
85
        Document doc = XmlDocumentUtility.generateXmlDocument(is);
86
		List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc );
87
        Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
88
        for (SolrDoc processedDoc : processedDocs) {
89
            processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);
90
        }
91
        return processedDocsMap;
92
    }
93

    
94
    private List<SolrDoc> processResourceMap(SolrDoc indexDocument, Document resourceMapDocument)
95
                    throws XPathExpressionException, IOException, SAXException, ParserConfigurationException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException{
96
        //ResourceMap resourceMap = new ResourceMap(resourceMapDocument);
97
        ResourceMap resourceMap = ResourceMapFactory.buildResourceMap(resourceMapDocument);
98
        List<String> documentIds = resourceMap.getAllDocumentIDs();//this list includes the resourceMap id itself.
99
        //List<SolrDoc> updateDocuments = getHttpService().getDocuments(getSolrQueryUri(), documentIds);
100
        List<SolrDoc> updateDocuments = getSolrDocs(resourceMap.getIdentifier(), documentIds);
101
        List<SolrDoc> mergedDocuments = resourceMap.mergeIndexedDocuments(updateDocuments);
102
        /*if(mergedDocuments != null) {
103
            for(SolrDoc doc : mergedDocuments) {
104
                ByteArrayOutputStream out = new ByteArrayOutputStream();
105
                doc.serialize(out, "UTF-8");
106
                String result = new String(out.toByteArray(), "UTF-8");
107
                System.out.println("after updated document===========================");
108
                System.out.println(result);
109
            }
110
        }*/
111
        mergedDocuments.add(indexDocument);
112
        return mergedDocuments;
113
    }
114
    
115
    private List<SolrDoc> getSolrDocs(String resourceMapId, List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, ResourceMapException {
116
        List<SolrDoc> list = new ArrayList<SolrDoc>();
117
        if(ids != null) {
118
            for(String id : ids) {
119
            	SolrDoc doc = getSolrDoc(id);
120
                if(doc != null) {
121
                    list.add(doc);
122
                } else if ( !id.equals(resourceMapId)) {
123
                    throw new ResourceMapException("Solr index doesn't have the information about the id "+id+" which is a component in the resource map "+resourceMapId+". Metacat-Index can't process the resource map prior to its components.");
124
                }
125
            }
126
        }
127
        return list;
128
    } 
129
    
130
    /*
131
     * Get the SolrDoc list for the list of the ids.
132
     */
133
    public static List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
134
        List<SolrDoc> list = new ArrayList<SolrDoc>();
135
        if(ids != null) {
136
            for(String id : ids) {
137
            	SolrDoc doc = getSolrDoc(id);
138
                if(doc != null) {
139
                    list.add(doc);
140
                }
141
            }
142
        }
143
        return list;
144
    }
145
    
146
	/*
147
	 * Get the SolrDoc for the specified id
148
	 */
149
	public static SolrDoc getSolrDoc(String id) throws SolrServerException,
150
			IOException, ParserConfigurationException, SAXException,
151
			XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
152
	    int targetIndex = 0;
153
		SolrDoc doc = null;
154
		String query = QUERY + "\"" + id + "\"";
155
	    List<SolrDoc> list = getDocumentsByQuery(query);
156
	    if(list != null && !list.isEmpty()) {
157
	        doc = list.get(targetIndex);
158
	    }
159
		return doc;
160
	}
161
	
162
	/**
163
	 * Get a list of solr documents which's resourcemap field matches the given value.
164
	 * @param resourceMapId - the target resource map id
165
	 * @return the list of solr document 
166
	 * @throws MalformedURLException
167
	 * @throws UnsupportedType
168
	 * @throws NotFound
169
	 * @throws SolrServerException
170
	 * @throws ParserConfigurationException
171
	 * @throws IOException
172
	 * @throws SAXException
173
	 */
174
	public static List<SolrDoc> getDocumentsByResourceMap(String resourceMapId) throws MalformedURLException, 
175
	            UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, IOException, SAXException {
176
	    String query = QUERY2 + "\"" + resourceMapId + "\"";
177
	    return getDocumentsByQuery(query);
178
	}
179
	
180
	/**
181
	 * Get a list of slor docs which match the query.
182
	 * @param query - a string of a query
183
	 * @return
184
	 * @throws SolrServerException
185
	 * @throws MalformedURLException
186
	 * @throws UnsupportedType
187
	 * @throws NotFound
188
	 * @throws ParserConfigurationException
189
	 * @throws IOException
190
	 * @throws SAXException
191
	 */
192
	public static List<SolrDoc> getDocumentsByQuery(String query) throws SolrServerException, MalformedURLException, UnsupportedType, 
193
	                                                                NotFound, ParserConfigurationException, IOException, SAXException {
194
	    List<SolrDoc> docs = new ArrayList<SolrDoc>();
195
	    if (solrServer != null && query != null && !query.trim().equals("")) {
196
            SolrParams solrParams = SolrRequestParsers.parseQueryString(query);
197
            QueryResponse qr = solrServer.query(solrParams);
198
            if (qr != null && qr.getResults() != null) {
199
                for(int i=0; i<qr.getResults().size(); i++) {
200
                    SolrDocument orig = qr.getResults().get(i);
201
                    SolrDoc doc = new SolrDoc();
202
                    IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
203
                    for (String fieldName : orig.getFieldNames()) {
204
                        // don't transfer the copyTo fields, otherwise there are errors
205
                        if (indexSchema.isCopyFieldTarget(indexSchema.getField(fieldName))) {
206
                            continue;
207
                        }
208
                        for (Object value : orig.getFieldValues(fieldName)) {
209
                            String stringValue = value.toString();
210
                            // special handling for dates in ISO 8601
211
                            if (value instanceof Date) {
212
                                stringValue = DateTimeMarshaller.serializeDateToUTC((Date) value);
213
                                SolrDateConverter converter = new SolrDateConverter();
214
                                stringValue = converter.convert(stringValue);
215
                            }
216
                            SolrElementField field = new SolrElementField(fieldName, stringValue);
217
                            log.debug("Adding field: " + fieldName);
218
                            doc.addField(field);
219
                        }
220
                    }
221
                    docs.add(doc);
222
                }
223
                
224
            }
225
	    }
226
	    return docs;
227
	}
228

    
229

    
230
}
(2-2/2)