1
|
.. raw:: latex
|
2
|
|
3
|
\newpage
|
4
|
|
5
|
|
6
|
Metacat Indexing
|
7
|
===========================
|
8
|
Lorem ipsum
|
9
|
|
10
|
SOLR background information
|
11
|
---------------------------
|
12
|
Features:
|
13
|
|
14
|
* something
|
15
|
* something
|
16
|
* more
|
17
|
* even more
|
18
|
|
19
|
Something to explain the advantage of solr over the old metacat index approach
|
20
|
|
21
|
Indexed documents and fields
|
22
|
-----------------------------
|
23
|
Metacat reuses the default DataONE index which includes many common metadata formats
|
24
|
out-of-the-box
|
25
|
|
26
|
1. EML
|
27
|
2. FGDC
|
28
|
3. Dryad
|
29
|
|
30
|
|
31
|
Default indexed fields
|
32
|
-----------------------
|
33
|
Describe the existing fields like in the DataONE docs, with link to them
|
34
|
|
35
|
|
36
|
Index configuration overview
|
37
|
----------------------------
|
38
|
Describe the configuration files and extension points for the implementation
|
39
|
|
40
|
|
41
|
Adding additional document types and fields
|
42
|
--------------------------------------------
|
43
|
Step-by-step guide for adding new documents and indexed fields.
|
44
|
|
45
|
|
46
|
Querying the index
|
47
|
--------------------
|
48
|
Provide example SOLR queries and expected results. Show a variety of return types
|
49
|
and query facets.
|
50
|
|
51
|
|
52
|
Access Policy enforcement
|
53
|
-------------------------
|
54
|
Explain how access control is processed and honored when utilizing the index.
|
55
|
|
56
|
|
57
|
Regenerating the index from scratch
|
58
|
-----------------------------------
|
59
|
When the SOLR index has been drastically modified, a complete regenration of the
|
60
|
index may be necessary. In order to accomplish this:
|
61
|
|
62
|
Step-by-step instructions
|
63
|
|
64
|
NOTE: this may take a long time depending on the size of your Metacat store.
|
65
|
|
66
|
|
67
|
|
68
|
Class design overview
|
69
|
----------------------
|
70
|
|
71
|
.. figure:: images/indexing-class-diagram.png
|
72
|
|
73
|
Figure 1. Class design overview.
|
74
|
|
75
|
..
|
76
|
@startuml images/indexing-class-diagram.png
|
77
|
|
78
|
package "Current cn-index-processor (library)" {
|
79
|
|
80
|
interface IDocumentSubprocessor {
|
81
|
+ boolean canProcess(Document doc)
|
82
|
+ initExpression(XPath xpath)
|
83
|
+ Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs, Document doc)
|
84
|
}
|
85
|
class AbstractDocumentSubprocessor {
|
86
|
- List<SolrField> fields
|
87
|
+ setMatchDocument(String matchDocument)
|
88
|
+ setFieldList(List<SolrField> fieldList)
|
89
|
}
|
90
|
class ResourceMapSubprocessor {
|
91
|
}
|
92
|
class ScienceMetadataDocumentSubprocessor {
|
93
|
}
|
94
|
|
95
|
interface ISolrField {
|
96
|
+ initExpression(XPath xpathObject)
|
97
|
+ List<SolrElementField> getFields(Document doc, String identifier)
|
98
|
}
|
99
|
class SolrField {
|
100
|
- String name
|
101
|
- String xpath
|
102
|
- boolean multivalue
|
103
|
}
|
104
|
class CommonRootSolrField {
|
105
|
}
|
106
|
class RootElement {
|
107
|
}
|
108
|
class LeafElement {
|
109
|
}
|
110
|
class FullTextSolrField {
|
111
|
}
|
112
|
class MergeSolrField {
|
113
|
}
|
114
|
class ResolveSolrField {
|
115
|
}
|
116
|
class SolrFieldResourceMap {
|
117
|
}
|
118
|
|
119
|
class SolrDoc {
|
120
|
- List<SolrElementField> fieldList
|
121
|
}
|
122
|
|
123
|
class SolrElementField {
|
124
|
- String name
|
125
|
- String value
|
126
|
}
|
127
|
|
128
|
}
|
129
|
|
130
|
IDocumentSubprocessor <|-- AbstractDocumentSubprocessor
|
131
|
AbstractDocumentSubprocessor <|-- ResourceMapSubprocessor
|
132
|
AbstractDocumentSubprocessor <|-- ScienceMetadataDocumentSubprocessor
|
133
|
|
134
|
ISolrField <|-- SolrField
|
135
|
SolrField <|-- CommonRootSolrField
|
136
|
CommonRootSolrField o--"1" RootElement
|
137
|
RootElement o--"*" LeafElement
|
138
|
SolrField <|-- FullTextSolrField
|
139
|
SolrField <|-- MergeSolrField
|
140
|
SolrField <|-- ResolveSolrField
|
141
|
SolrField <|-- SolrFieldResourceMap
|
142
|
|
143
|
AbstractDocumentSubprocessor o--"*" ISolrField
|
144
|
|
145
|
IDocumentSubprocessor --> SolrDoc
|
146
|
|
147
|
SolrDoc o--"*" SolrElementField
|
148
|
|
149
|
package "SOLR (library)" {
|
150
|
|
151
|
abstract class SolrServer {
|
152
|
+ add(SolrInputDocument doc)
|
153
|
+ deleteByQuery(String id)
|
154
|
+ query(SolrQuery query)
|
155
|
}
|
156
|
class EmbeddedSolrServer {
|
157
|
}
|
158
|
class HttpSolrServer {
|
159
|
}
|
160
|
|
161
|
}
|
162
|
|
163
|
SolrServer <|-- EmbeddedSolrServer
|
164
|
SolrServer <|-- HttpSolrServer
|
165
|
|
166
|
package "Stand-alone indexer (webapp or daemon)" {
|
167
|
|
168
|
class ApplicationController {
|
169
|
- List<SolrIndex> solrIndex
|
170
|
+ regenerateIndex()
|
171
|
}
|
172
|
|
173
|
class SolrIndex {
|
174
|
- List<IDocumentSubprocessor> subprocessors
|
175
|
- SolrServer solrServer
|
176
|
+ insert(String pid, InputStream data)
|
177
|
+ update(String pid, InputStream data)
|
178
|
+ remove(String pid)
|
179
|
}
|
180
|
|
181
|
class SystemMetadataEventListener {
|
182
|
- SolrIndex solrIndex
|
183
|
- IMap hzSystemMetadata
|
184
|
- IMap hzObjectPath
|
185
|
+ entryAdded(EntryEvent<Identifier, SystemMetadata>)
|
186
|
+ entryUpdated(EntryEvent<Identifier, SystemMetadata>)
|
187
|
+ entryRemoved(EntryEvent<Identifier, SystemMetadata>)
|
188
|
}
|
189
|
|
190
|
}
|
191
|
|
192
|
package "Metacat (webapp)" {
|
193
|
|
194
|
class MetacatSolrIndex {
|
195
|
- SolrServer solrServer
|
196
|
+ InputStream query(SolrQuery)
|
197
|
}
|
198
|
|
199
|
class HazelcastService {
|
200
|
- IMap hzSystemMetadata
|
201
|
}
|
202
|
|
203
|
class ObjectPathMap {
|
204
|
- IMap hzObjectPath
|
205
|
}
|
206
|
}
|
207
|
|
208
|
MetacatSolrIndex o--"1" SolrServer
|
209
|
HazelcastService .. SystemMetadataEventListener
|
210
|
ObjectPathMap .. SystemMetadataEventListener
|
211
|
|
212
|
ApplicationController o--"*" SolrIndex
|
213
|
SolrIndex o--"1" SolrServer
|
214
|
SolrIndex "1"--o SystemMetadataEventListener
|
215
|
SolrIndex o--"*" IDocumentSubprocessor: Assembled using Spring bean configuration
|
216
|
|
217
|
|
218
|
|
219
|
|
220
|
@enduml
|