Project

General

Profile

1
.. raw:: latex
2

    
3
  \newpage
4

    
5

    
6
Metacat Indexing
7
===========================
8
Lorem ipsum
9

    
10
SOLR background information
11
---------------------------
12
Features:
13

    
14
* something
15
* something
16
* more
17
* even more
18

    
19
Something to explain the advantage of solr over the old metacat index approach
20

    
21
Indexed documents and fields
22
-----------------------------
23
Metacat reuses the default DataONE index which includes many common metadata formats
24
out-of-the-box
25

    
26
1. EML
27
2. FGDC
28
3. Dryad
29

    
30

    
31
Default indexed fields
32
-----------------------
33
Describe the existing fields like in the DataONE docs, with link to them
34

    
35

    
36
Index configuration overview
37
----------------------------
38
Describe the configuration files and extension points for the implementation
39

    
40

    
41
Adding additional document types and fields
42
--------------------------------------------
43
Step-by-step guide for adding new documents and indexed fields.
44

    
45

    
46
Querying the index
47
--------------------
48
Provide example SOLR queries and expected results. Show a variety of return types
49
and query facets.
50

    
51

    
52
Access Policy enforcement
53
-------------------------
54
Explain how access control is processed and honored when utilizing the index.
55

    
56

    
57
Regenerating the index from scratch
58
-----------------------------------
59
When the SOLR index has been drastically modified, a complete regenration of the 
60
index may be necessary. In order to accomplish this:
61

    
62
Step-by-step instructions
63

    
64
NOTE: this may take a long time depending on the size of your Metacat store.
65

    
66

    
67

    
68
Class design overview
69
----------------------
70

    
71
.. figure:: images/indexing-class-diagram.png
72

    
73
   Figure 1. Class design overview.
74
   
75
..
76
  @startuml images/indexing-class-diagram.png
77
  
78
	package "Current cn-index-processor (library)" {
79
	
80
		interface IDocumentSubprocessor {
81
			+ boolean canProcess(Document doc)
82
			+ initExpression(XPath xpath)
83
			+ Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs, Document doc)
84
		}
85
		class AbstractDocumentSubprocessor {
86
			- List<SolrField> fields
87
			+ setMatchDocument(String matchDocument)
88
			+ setFieldList(List<SolrField> fieldList) 
89
		}
90
		class ResourceMapSubprocessor {
91
		}
92
		class ScienceMetadataDocumentSubprocessor {
93
		}
94
			  
95
		interface ISolrField {
96
			+ initExpression(XPath xpathObject)
97
			+ List<SolrElementField> getFields(Document doc, String identifier)
98
		}
99
		class SolrField {
100
			- String name
101
			- String xpath
102
			- boolean multivalue
103
		}
104
		class CommonRootSolrField {
105
		}
106
		class RootElement {
107
		}
108
		class LeafElement {
109
		}
110
		class FullTextSolrField {
111
		}
112
		class MergeSolrField {
113
		}
114
		class ResolveSolrField {
115
		}
116
		class SolrFieldResourceMap {
117
		}
118
		
119
		class SolrDoc {
120
		      - List<SolrElementField> fieldList
121
		}
122
		
123
		class SolrElementField {
124
		      - String name
125
		      - String value
126
		}
127
		    
128
	}
129
	
130
	IDocumentSubprocessor <|-- AbstractDocumentSubprocessor
131
	AbstractDocumentSubprocessor <|-- ResourceMapSubprocessor
132
	AbstractDocumentSubprocessor <|-- ScienceMetadataDocumentSubprocessor
133

    
134
	ISolrField <|-- SolrField
135
	SolrField <|-- CommonRootSolrField
136
	CommonRootSolrField o--"1" RootElement
137
	RootElement o--"*" LeafElement
138
	SolrField <|-- FullTextSolrField
139
	SolrField <|-- MergeSolrField
140
	SolrField <|-- ResolveSolrField			
141
	SolrField <|-- SolrFieldResourceMap
142
	
143
	AbstractDocumentSubprocessor o--"*" ISolrField
144
	
145
	IDocumentSubprocessor --> SolrDoc
146
	
147
	SolrDoc o--"*" SolrElementField
148
	
149
	package "SOLR (library)" {
150
          
151
        abstract class SolrServer {
152
            + add(SolrInputDocument doc)
153
            + deleteByQuery(String id)
154
            + query(SolrQuery query)
155
        }
156
        class EmbeddedSolrServer {
157
        }
158
        class HttpSolrServer {
159
        }
160
    
161
    }
162
    
163
    SolrServer <|-- EmbeddedSolrServer
164
    SolrServer <|-- HttpSolrServer
165
	
166
	package "Stand-alone indexer (webapp or daemon)" {
167
		  
168
		class ApplicationController {
169
		    - List<SolrIndex> solrIndex
170
		    + regenerateIndex()
171
		}
172
		
173
		class SolrIndex {
174
			- List<IDocumentSubprocessor> subprocessors
175
			- SolrServer solrServer
176
			+ insert(String pid, InputStream data)
177
			+ update(String pid, InputStream data)
178
			+ remove(String pid)
179
		}
180

    
181
		class SystemMetadataEventListener {
182
			- SolrIndex solrIndex
183
			- IMap hzSystemMetadata
184
			- IMap hzObjectPath
185
			+ entryAdded(EntryEvent<Identifier, SystemMetadata>)
186
			+ entryUpdated(EntryEvent<Identifier, SystemMetadata>)
187
			+ entryRemoved(EntryEvent<Identifier, SystemMetadata>)
188
		}
189
	
190
	}
191
	
192
	package "Metacat (webapp)" {
193
		  
194
		class MetacatSolrIndex {
195
			- SolrServer solrServer
196
			+ InputStream query(SolrQuery)
197
		}
198
		
199
		class HazelcastService {
200
			- IMap hzSystemMetadata
201
		}
202
		
203
		class ObjectPathMap {
204
			- IMap hzObjectPath
205
		}
206
	}
207
	
208
	MetacatSolrIndex o--"1" SolrServer
209
	HazelcastService .. SystemMetadataEventListener
210
	ObjectPathMap .. SystemMetadataEventListener
211
	
212
	ApplicationController o--"*" SolrIndex
213
	SolrIndex o--"1" SolrServer	
214
	SolrIndex "1"--o SystemMetadataEventListener
215
	SolrIndex o--"*" IDocumentSubprocessor: Assembled using Spring bean configuration
216
	
217
	
218
	
219
  
220
  @enduml
(19-19/22)