Project

General

Profile

1 7521 leinfelder
.. raw:: latex
2
3
  \newpage
4
5
6 7501 leinfelder
Metacat Indexing
7
===========================
8
Lorem ipsum
9
10
SOLR background information
11
---------------------------
12
Features:
13
14
* something
15
* something
16
* more
17
* even more
18
19
Something to explain the advantage of solr over the old metacat index approach
20
21
Indexed documents and fields
22
-----------------------------
23
Metacat reuses the default DataONE index which includes many common metadata formats
24
out-of-the-box
25
26
1. EML
27
2. FGDC
28
3. Dryad
29
30
31
Default indexed fields
32
-----------------------
33
Describe the existing fields like in the DataONE docs, with link to them
34
35
36
Index configuration overview
37
----------------------------
38
Describe the configuration files and extension points for the implementation
39
40
41
Adding additional document types and fields
42
--------------------------------------------
43
Step-by-step guide for adding new documents and indexed fields.
44
45
46
Querying the index
47
--------------------
48
Provide example SOLR queries and expected results. Show a variety of return types
49
and query facets.
50
51
52
Access Policy enforcement
53
-------------------------
54
Explain how access control is processed and honored when utilizing the index.
55
56
57
Regenerating the index from scratch
58
-----------------------------------
59
When the SOLR index has been drastically modified, a complete regenration of the
60
index may be necessary. In order to accomplish this:
61
62
Step-by-step instructions
63
64
NOTE: this may take a long time depending on the size of your Metacat store.
65 7521 leinfelder
66
67
68
Class design overview
69
----------------------
70
71
.. figure:: images/indexing-class-diagram.png
72
73
   Figure 1. Class design overview.
74
75
..
76
  @startuml images/indexing-class-diagram.png
77
78 7526 leinfelder
	package cn-index-processor.parser {
79
80
		interface IDocumentSubprocessor {
81
			+ boolean canProcess(Document doc)
82
			+ initExpression(XPath xpath)
83
			+ Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs, Document doc)
84 7521 leinfelder
		}
85 7526 leinfelder
		class AbstractDocumentSubprocessor {
86
			- List<SolrField> fields
87 7528 tao
			+ setMatchDocument(String matchDocument)
88
			+ setFieldList(List<SolrField> fieldList)
89 7521 leinfelder
		}
90 7526 leinfelder
		class ResourceMapSubprocessor {
91
		}
92
		class ScienceMetadataDocumentSubprocessor {
93
		}
94
95
		interface ISolrField {
96
			+ initExpression(XPath xpathObject)
97
			+ List<SolrElementField> getFields(Document doc, String identifier)
98
		}
99
		class SolrField {
100
			- String name
101 7521 leinfelder
			- String xpath
102 7526 leinfelder
			- boolean multivalue
103 7521 leinfelder
		}
104 7526 leinfelder
		class CommonRootSolrField {
105 7522 leinfelder
		}
106 7526 leinfelder
		class FullTextSolrField {
107
		}
108
		class MergeSolrField {
109
		}
110
		class ResolveSolrField {
111
		}
112
		class SolrFieldResourceMap {
113
		}
114 7528 tao
115
		class SolrDoc {
116
		      - List<SolrElementField> fieldList
117
		}
118
119
		class SolrElementField {
120
		      - String name
121
		      - String value
122
		}
123 7521 leinfelder
124
	}
125
126 7526 leinfelder
	IDocumentSubprocessor <|-- AbstractDocumentSubprocessor
127
	AbstractDocumentSubprocessor <|-- ResourceMapSubprocessor
128
	AbstractDocumentSubprocessor <|-- ScienceMetadataDocumentSubprocessor
129
130
	ISolrField <|-- SolrField
131
	SolrField <|-- CommonRootSolrField
132
	SolrField <|-- FullTextSolrField
133
	SolrField <|-- MergeSolrField
134
	SolrField <|-- ResolveSolrField
135
	SolrField <|-- SolrFieldResourceMap
136 7521 leinfelder
137 7528 tao
138 7526 leinfelder
	AbstractDocumentSubprocessor o--"*" ISolrField
139 7522 leinfelder
140 7528 tao
	IDocumentSubprocessor --> SolrDoc
141
142
	SolrDoc o--"*" SolrElementField
143
144
	package solr {
145
146
        abstract class SolrServer {
147
            + add(SolrInputDocument doc)
148
            + deleteByQuery(String id)
149
            + query(SolrQuery query)
150
        }
151
        class EmbeddedSolrServer {
152
        }
153
        class HttpSolrServer {
154
        }
155
156
    }
157
158
    SolrServer <|-- EmbeddedSolrServer
159
    SolrServer <|-- HttpSolrServer
160
161
162 7527 tao
	package edu.ucsb.nceas.metacat.indexer {
163 7522 leinfelder
164 7528 tao
		class MetacatSolrIndex {
165 7527 tao
			- List<IDocumentSubprocessor> subprocessors
166
			- SolorFiledParser solrFieldParser
167
			- EmbeddedSolrServer solrServer
168
			+ insert(String pid, InputStream data)
169
			+ update(String pid, InputStream data)
170 7526 leinfelder
			+ remove(String pid)
171
			+ OutputStream query(String solrQuery)
172 7522 leinfelder
		}
173
174 7527 tao
		class SolrFieldParser {
175
		     - List<SolrField> solrFields
176
		     + SolrFieldParser(InputStream config)
177
		     + List<SolrField> getSolrFields()
178 7522 leinfelder
		}
179 7526 leinfelder
180
	}
181
182 7528 tao
	MetacatSolrIndex *--"1" EmbeddedSolrServer
183
	MetacatSolrIndex --> SolrFieldParser
184
	MetacatSolrIndex o--"*" IDocumentSubprocessor
185
	SolrFieldParser --> SolrField
186
187 7526 leinfelder
188 7522 leinfelder
189
190 7526 leinfelder
191 7521 leinfelder
192
  @enduml