1
|
.. raw:: latex
|
2
|
|
3
|
\newpage
|
4
|
|
5
|
|
6
|
Metacat Indexing
|
7
|
===========================
|
8
|
Lorem ipsum
|
9
|
|
10
|
SOLR background information
|
11
|
---------------------------
|
12
|
Features:
|
13
|
|
14
|
* something
|
15
|
* something
|
16
|
* more
|
17
|
* even more
|
18
|
|
19
|
Something to explain the advantage of solr over the old metacat index approach
|
20
|
|
21
|
Indexed documents and fields
|
22
|
-----------------------------
|
23
|
Metacat reuses the default DataONE index which includes many common metadata formats
|
24
|
out-of-the-box
|
25
|
|
26
|
1. EML
|
27
|
2. FGDC
|
28
|
3. Dryad
|
29
|
|
30
|
|
31
|
Default indexed fields
|
32
|
-----------------------
|
33
|
Describe the existing fields like in the DataONE docs, with link to them
|
34
|
|
35
|
|
36
|
Index configuration overview
|
37
|
----------------------------
|
38
|
Describe the configuration files and extension points for the implementation
|
39
|
|
40
|
|
41
|
Adding additional document types and fields
|
42
|
--------------------------------------------
|
43
|
Step-by-step guide for adding new documents and indexed fields.
|
44
|
|
45
|
|
46
|
Querying the index
|
47
|
--------------------
|
48
|
Provide example SOLR queries and expected results. Show a variety of return types
|
49
|
and query facets.
|
50
|
|
51
|
|
52
|
Access Policy enforcement
|
53
|
-------------------------
|
54
|
Explain how access control is processed and honored when utilizing the index.
|
55
|
|
56
|
|
57
|
Regenerating the index from scratch
|
58
|
-----------------------------------
|
59
|
When the SOLR index has been drastically modified, a complete regenration of the
|
60
|
index may be necessary. In order to accomplish this:
|
61
|
|
62
|
Step-by-step instructions
|
63
|
|
64
|
NOTE: this may take a long time depending on the size of your Metacat store.
|
65
|
|
66
|
|
67
|
|
68
|
Class design overview
|
69
|
----------------------
|
70
|
|
71
|
.. figure:: images/indexing-class-diagram.png
|
72
|
|
73
|
Figure 1. Class design overview.
|
74
|
|
75
|
..
|
76
|
@startuml images/indexing-class-diagram.png
|
77
|
|
78
|
package cn-index-processor.parser {
|
79
|
|
80
|
interface IDocumentSubprocessor {
|
81
|
+ boolean canProcess(Document doc)
|
82
|
+ initExpression(XPath xpath)
|
83
|
+ Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs, Document doc)
|
84
|
}
|
85
|
class AbstractDocumentSubprocessor {
|
86
|
- List<SolrField> fields
|
87
|
+ setMatchDocument(String matchDocument)
|
88
|
+ setFieldList(List<SolrField> fieldList)
|
89
|
}
|
90
|
class ResourceMapSubprocessor {
|
91
|
}
|
92
|
class ScienceMetadataDocumentSubprocessor {
|
93
|
}
|
94
|
|
95
|
interface ISolrField {
|
96
|
+ initExpression(XPath xpathObject)
|
97
|
+ List<SolrElementField> getFields(Document doc, String identifier)
|
98
|
}
|
99
|
class SolrField {
|
100
|
- String name
|
101
|
- String xpath
|
102
|
- boolean multivalue
|
103
|
}
|
104
|
class CommonRootSolrField {
|
105
|
}
|
106
|
class FullTextSolrField {
|
107
|
}
|
108
|
class MergeSolrField {
|
109
|
}
|
110
|
class ResolveSolrField {
|
111
|
}
|
112
|
class SolrFieldResourceMap {
|
113
|
}
|
114
|
|
115
|
class SolrDoc {
|
116
|
- List<SolrElementField> fieldList
|
117
|
}
|
118
|
|
119
|
class SolrElementField {
|
120
|
- String name
|
121
|
- String value
|
122
|
}
|
123
|
|
124
|
}
|
125
|
|
126
|
IDocumentSubprocessor <|-- AbstractDocumentSubprocessor
|
127
|
AbstractDocumentSubprocessor <|-- ResourceMapSubprocessor
|
128
|
AbstractDocumentSubprocessor <|-- ScienceMetadataDocumentSubprocessor
|
129
|
|
130
|
ISolrField <|-- SolrField
|
131
|
SolrField <|-- CommonRootSolrField
|
132
|
SolrField <|-- FullTextSolrField
|
133
|
SolrField <|-- MergeSolrField
|
134
|
SolrField <|-- ResolveSolrField
|
135
|
SolrField <|-- SolrFieldResourceMap
|
136
|
|
137
|
|
138
|
AbstractDocumentSubprocessor o--"*" ISolrField
|
139
|
|
140
|
IDocumentSubprocessor --> SolrDoc
|
141
|
|
142
|
SolrDoc o--"*" SolrElementField
|
143
|
|
144
|
package solr {
|
145
|
|
146
|
abstract class SolrServer {
|
147
|
+ add(SolrInputDocument doc)
|
148
|
+ deleteByQuery(String id)
|
149
|
+ query(SolrQuery query)
|
150
|
}
|
151
|
class EmbeddedSolrServer {
|
152
|
}
|
153
|
class HttpSolrServer {
|
154
|
}
|
155
|
|
156
|
}
|
157
|
|
158
|
SolrServer <|-- EmbeddedSolrServer
|
159
|
SolrServer <|-- HttpSolrServer
|
160
|
|
161
|
|
162
|
package edu.ucsb.nceas.metacat.indexer {
|
163
|
|
164
|
class MetacatSolrIndex {
|
165
|
- List<IDocumentSubprocessor> subprocessors
|
166
|
- IDocumentSubprocessorFactory subprocessorFactory
|
167
|
- EmbeddedSolrServer solrServer
|
168
|
+ insert(String pid, InputStream data)
|
169
|
+ update(String pid, InputStream data)
|
170
|
+ remove(String pid)
|
171
|
+ OutputStream query(String solrQuery)
|
172
|
}
|
173
|
|
174
|
class IDocumentSubprocessorFactory {
|
175
|
+ IDocumentSubprocessor getIDocumentSubprocessor(InputStream config)
|
176
|
}
|
177
|
|
178
|
}
|
179
|
|
180
|
MetacatSolrIndex *--"1" EmbeddedSolrServer
|
181
|
MetacatSolrIndex --> IDocumentSubprocessorFactory
|
182
|
MetacatSolrIndex o--"*" IDocumentSubprocessor
|
183
|
IDocumentSubprocessorFactory --> IDocumentSubprocessor
|
184
|
|
185
|
|
186
|
|
187
|
|
188
|
|
189
|
|
190
|
@enduml
|