Project

General

Profile

1
.. raw:: latex
2

    
3
  \newpage
4

    
5

    
6
Metacat Indexing
7
===========================
8
Lorem ipsum
9

    
10
SOLR background information
11
---------------------------
12
Features:
13

    
14
* something
15
* something
16
* more
17
* even more
18

    
19
Something to explain the advantage of solr over the old metacat index approach
20

    
21
Indexed documents and fields
22
-----------------------------
23
Metacat reuses the default DataONE index which includes many common metadata formats
24
out-of-the-box
25

    
26
1. EML
27
2. FGDC
28
3. Dryad
29

    
30

    
31
Default indexed fields
32
-----------------------
33
Describe the existing fields like in the DataONE docs, with link to them
34

    
35

    
36
Index configuration overview
37
----------------------------
38
Describe the configuration files and extension points for the implementation
39

    
40

    
41
Adding additional document types and fields
42
--------------------------------------------
43
Step-by-step guide for adding new documents and indexed fields.
44

    
45

    
46
Querying the index
47
--------------------
48
Provide example SOLR queries and expected results. Show a variety of return types
49
and query facets.
50

    
51

    
52
Access Policy enforcement
53
-------------------------
54
Explain how access control is processed and honored when utilizing the index.
55

    
56

    
57
Regenerating the index from scratch
58
-----------------------------------
59
When the SOLR index has been drastically modified, a complete regenration of the 
60
index may be necessary. In order to accomplish this:
61

    
62
Step-by-step instructions
63

    
64
NOTE: this may take a long time depending on the size of your Metacat store.
65

    
66

    
67

    
68
Class design overview
69
----------------------
70

    
71
.. figure:: images/indexing-class-diagram.png
72

    
73
   Figure 1. Class design overview.
74
   
75
..
76
  @startuml images/indexing-class-diagram.png
77
  
78
	package cn-index-processor.parser {
79
	
80
		interface IDocumentSubprocessor {
81
			+ boolean canProcess(Document doc)
82
			+ initExpression(XPath xpath)
83
			+ Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs, Document doc)
84
		}
85
		class AbstractDocumentSubprocessor {
86
			- List<SolrField> fields
87
			+ setMatchDocument(String matchDocument)
88
			+ setFieldList(List<SolrField> fieldList) 
89
		}
90
		class ResourceMapSubprocessor {
91
		}
92
		class ScienceMetadataDocumentSubprocessor {
93
		}
94
			  
95
		interface ISolrField {
96
			+ initExpression(XPath xpathObject)
97
			+ List<SolrElementField> getFields(Document doc, String identifier)
98
		}
99
		class SolrField {
100
			- String name
101
			- String xpath
102
			- boolean multivalue
103
		}
104
		class CommonRootSolrField {
105
		}
106
		class FullTextSolrField {
107
		}
108
		class MergeSolrField {
109
		}
110
		class ResolveSolrField {
111
		}
112
		class SolrFieldResourceMap {
113
		}
114
		
115
		class SolrDoc {
116
		      - List<SolrElementField> fieldList
117
		}
118
		
119
		class SolrElementField {
120
		      - String name
121
		      - String value
122
		}
123
		    
124
	}
125
	
126
	IDocumentSubprocessor <|-- AbstractDocumentSubprocessor
127
	AbstractDocumentSubprocessor <|-- ResourceMapSubprocessor
128
	AbstractDocumentSubprocessor <|-- ScienceMetadataDocumentSubprocessor
129

    
130
	ISolrField <|-- SolrField
131
	SolrField <|-- CommonRootSolrField
132
	SolrField <|-- FullTextSolrField
133
	SolrField <|-- MergeSolrField
134
	SolrField <|-- ResolveSolrField			
135
	SolrField <|-- SolrFieldResourceMap		
136
	
137
	
138
	AbstractDocumentSubprocessor o--"*" ISolrField
139
	
140
	IDocumentSubprocessor --> SolrDoc
141
	
142
	SolrDoc o--"*" SolrElementField
143
	
144
	package solr {
145
          
146
        abstract class SolrServer {
147
            + add(SolrInputDocument doc)
148
            + deleteByQuery(String id)
149
            + query(SolrQuery query)
150
        }
151
        class EmbeddedSolrServer {
152
        }
153
        class HttpSolrServer {
154
        }
155
    
156
    }
157
    
158
    SolrServer <|-- EmbeddedSolrServer
159
    SolrServer <|-- HttpSolrServer
160
	
161
	
162
	package edu.ucsb.nceas.metacat.indexer {
163
		  
164
		class MetacatSolrIndex {
165
			- List<IDocumentSubprocessor> subprocessors
166
			- IDocumentSubprocessorFactory subprocessorFactory
167
			- EmbeddedSolrServer solrServer
168
			+ insert(String pid, InputStream data)
169
			+ update(String pid, InputStream data)
170
			+ remove(String pid)
171
			+ OutputStream query(String solrQuery)
172
		}
173
		
174
		class IDocumentSubprocessorFactory {
175
		     + IDocumentSubprocessor getIDocumentSubprocessor(InputStream config)
176
		}
177
	
178
	}
179
	
180
	MetacatSolrIndex *--"1" EmbeddedSolrServer
181
	MetacatSolrIndex --> IDocumentSubprocessorFactory
182
	MetacatSolrIndex o--"*" IDocumentSubprocessor
183
	IDocumentSubprocessorFactory --> IDocumentSubprocessor
184
	
185

    
186
	
187
	
188
	
189
  
190
  @enduml
(19-19/22)