Project

General

Profile

metacat / docs / user / metacat / source / query-index.rst @ 7532

1
.. raw:: latex
2

    
3
  \newpage
4

    
5

    
6
Metacat Indexing
7
===========================
8
Lorem ipsum
9

    
10
SOLR background information
11
---------------------------
12
Features:
13

    
14
* something
15
* something
16
* more
17
* even more
18

    
19
Something to explain the advantage of solr over the old metacat index approach
20

    
21
Indexed documents and fields
22
-----------------------------
23
Metacat reuses the default DataONE index which includes many common metadata formats
24
out-of-the-box
25

    
26
1. EML
27
2. FGDC
28
3. Dryad
29

    
30

    
31
Default indexed fields
32
-----------------------
33
Describe the existing fields like in the DataONE docs, with link to them
34

    
35

    
36
Index configuration overview
37
----------------------------
38
Describe the configuration files and extension points for the implementation
39

    
40

    
41
Adding additional document types and fields
42
--------------------------------------------
43
Step-by-step guide for adding new documents and indexed fields.
44

    
45

    
46
Querying the index
47
--------------------
48
Provide example SOLR queries and expected results. Show a variety of return types
49
and query facets.
50

    
51

    
52
Access Policy enforcement
53
-------------------------
54
Explain how access control is processed and honored when utilizing the index.
55

    
56

    
57
Regenerating the index from scratch
58
-----------------------------------
59
When the SOLR index has been drastically modified, a complete regenration of the 
60
index may be necessary. In order to accomplish this:
61

    
62
Step-by-step instructions
63

    
64
NOTE: this may take a long time depending on the size of your Metacat store.
65

    
66

    
67

    
68
Class design overview
69
----------------------
70

    
71
.. figure:: images/indexing-class-diagram.png
72

    
73
   Figure 1. Class design overview.
74
   
75
..
76
  @startuml images/indexing-class-diagram.png
77
  
78
	package "Current cn-index-processor (library)" {
79
	
80
		interface IDocumentSubprocessor {
81
			+ boolean canProcess(Document doc)
82
			+ initExpression(XPath xpath)
83
			+ Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs, Document doc)
84
		}
85
		class AbstractDocumentSubprocessor {
86
			- List<SolrField> fields
87
			+ setMatchDocument(String matchDocument)
88
			+ setFieldList(List<SolrField> fieldList) 
89
		}
90
		class ResourceMapSubprocessor {
91
		}
92
		class ScienceMetadataDocumentSubprocessor {
93
		}
94
			  
95
		interface ISolrField {
96
			+ initExpression(XPath xpathObject)
97
			+ List<SolrElementField> getFields(Document doc, String identifier)
98
		}
99
		class SolrField {
100
			- String name
101
			- String xpath
102
			- boolean multivalue
103
		}
104
		class CommonRootSolrField {
105
		}
106
		class RootElement {
107
		}
108
		class LeafElement {
109
		}
110
		class FullTextSolrField {
111
		}
112
		class MergeSolrField {
113
		}
114
		class ResolveSolrField {
115
		}
116
		class SolrFieldResourceMap {
117
		}
118
		
119
		class SolrDoc {
120
		      - List<SolrElementField> fieldList
121
		}
122
		
123
		class SolrElementField {
124
		      - String name
125
		      - String value
126
		}
127
		    
128
	}
129
	
130
	IDocumentSubprocessor <|-- AbstractDocumentSubprocessor
131
	AbstractDocumentSubprocessor <|-- ResourceMapSubprocessor
132
	AbstractDocumentSubprocessor <|-- ScienceMetadataDocumentSubprocessor
133

    
134
	ISolrField <|-- SolrField
135
	SolrField <|-- CommonRootSolrField
136
	CommonRootSolrField o--"1" RootElement
137
	RootElement o--"*" LeafElement
138
	SolrField <|-- FullTextSolrField
139
	SolrField <|-- MergeSolrField
140
	SolrField <|-- ResolveSolrField			
141
	SolrField <|-- SolrFieldResourceMap
142
	
143
	AbstractDocumentSubprocessor o--"*" ISolrField
144
	
145
	IDocumentSubprocessor --> SolrDoc
146
	
147
	SolrDoc o--"*" SolrElementField
148
	
149
	package "SOLR (library)" {
150
          
151
        abstract class SolrServer {
152
            + add(SolrInputDocument doc)
153
            + deleteByQuery(String id)
154
            + query(SolrQuery query)
155
        }
156
        class EmbeddedSolrServer {
157
        }
158
        class HttpSolrServer {
159
        }
160
    
161
    }
162
    
163
    SolrServer <|-- EmbeddedSolrServer
164
    SolrServer <|-- HttpSolrServer
165
	
166
	package "Stand-alone indexer (webapp or daemon)" {
167
		  
168
		class SolrIndex {
169
			- List<IDocumentSubprocessor> subprocessors
170
			- IDocumentSubprocessorFactory subprocessorFactory
171
			- SolrServer solrServer
172
			+ insert(String pid, InputStream data)
173
			+ update(String pid, InputStream data)
174
			+ remove(String pid)
175
		}
176

    
177
		class SystemMetadataEventListener {
178
			- SolrIndex solrIndex
179
			- IMap hzSystemMetadata
180
			- IMap hzObjectPath
181
			+ entryAdded(EntryEvent<Identifier, SystemMetadata>)
182
			+ entryUpdated(EntryEvent<Identifier, SystemMetadata>)
183
			+ entryRemoved(EntryEvent<Identifier, SystemMetadata>)
184
		}
185
	
186
	}
187
	
188
	package "Metacat (webapp)" {
189
		  
190
		class MetacatSolrIndex {
191
			- SolrServer solrServer
192
			+ InputStream query(SolrQuery)
193
		}
194
		
195
		class HazelcastService {
196
			- IMap hzSystemMetadata
197
		}
198
		
199
		class ObjectPathMap {
200
			- IMap hzObjectPath
201
		}
202
	}
203
	
204
	MetacatSolrIndex o--"1" SolrServer
205
	HazelcastService .. SystemMetadataEventListener
206
	ObjectPathMap .. SystemMetadataEventListener
207
	
208
	SolrIndex o--"1" SolrServer	
209
	SolrIndex "1"--o SystemMetadataEventListener
210
	SolrIndex o--"*" IDocumentSubprocessor: Assembled using Spring bean configuration
211
	
212
	
213
	
214
  
215
  @enduml