Project

General

Profile

1 7542 tao
<?xml version="1.0" ?>
2
<!--
3
THE OFFICIAL DataONE Index Solr Schema definition file.
4
This schema is copied into the dataone-cn-index buildout for deployment on cn nodes.
5
6
The Solr schema file. This file should be named "schema.xml" and
7
 should be in the conf directory under the solr home
8
 (i.e. ./solr/conf/schema.xml by default)
9
 or located where the classloader for the Solr webapp can find it.
10
11
 For more information, on how to customize this file, please see...
12
 http://wiki.apache.org/solr/SchemaXml
13
-->
14
15
<schema name="dataone" version="1.1">
16
    <types>
17
        <!-- field type definitions. The "name" attribute is
18
   just a label to be used by field definitions.  The "class"
19
   attribute and any other attributes determine the real
20
   behavior of the fieldtype.  -->
21
22
        <!-- The StringField type is not analyzed, but indexed/stored verbatim  -->
23
        <fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
24
25
        <!-- boolean type: "true" or "false" -->
26
        <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
27
28
        <!-- The optional sortMissingLast and sortMissingFirst attributes are
29
             currently supported on types that are sorted internally as a strings.
30
           - If sortMissingLast="true" then a sort on this field will cause documents
31
           without the field to come after documents with the field,
32
           regardless of the requested sort order (asc or desc).
33
           - If sortMissingFirst="true" then a sort on this field will cause documents
34
           without the field to come before documents with the field,
35
           regardless of the requested sort order.
36
           - If sortMissingLast="false" and sortMissingFirst="false" (the default),
37
           then default lucene sorting will be used which places docs without the field
38
           first in an ascending sort and last in a descending sort.
39
        -->
40
41
        <!-- numeric field types that store and index the text
42
value verbatim (and hence don't support range queries since the
43
lexicographic ordering isn't equal to the numeric ordering) -->
44
        <fieldtype name="integer" class="solr.IntField"/>
45
        <fieldtype name="long" class="solr.LongField"/>
46
        <fieldtype name="float" class="solr.FloatField"/>
47
        <fieldtype name="double" class="solr.DoubleField"/>
48
49
        <!-- Numeric field types that manipulate the value into
50
  a string value that isn't human readable in it's internal form,
51
  but with a lexicographic ordering the same as the numeric ordering
52
  so that range queries correctly work. -->
53
        <fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true"/>
54
        <fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true"/>
55
        <fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true"/>
56
        <fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true"/>
57
58
        <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
59
   is a more restricted form of the canonical representation of dateTime
60
   http://www.w3.org/TR/xmlschema-2/#dateTime
61
   The trailing "Z" designates UTC time and is mandatory.
62
   Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
63
   All other components are mandatory. -->
64
        <fieldtype name="date" class="solr.DateField" sortMissingLast="true"/>
65
66
        <!-- solr.TextField allows the specification of custom text analyzers
67
            specified as a tokenizer and a list of token filters. Different
68
            analyzers may be specified for indexing and querying.
69
70
            The optional positionIncrementGap puts space between multiple fields of
71
            this type on the same document, with the purpose of preventing false phrase
72
            matching across fields.
73
74
            For more info on customizing your analyzer chain, please see...
75
         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
76
77
        -->
78
79
        <!-- Standard analyzer commonly used by Lucene developers
80
        -->
81
        <!-- Standard analyzer commonly used by Lucene developers -->
82
<!--
83
         <fieldtype name="text_lu" class="solr.TextField" positionIncrementGap="100">
84
            <analyzer>
85
                <tokenizer class="solr.StandardTokenizerFactory"/>
86
                <filter class="solr.StandardFilterFactory"/>
87
                <filter class="solr.LowerCaseFilterFactory"/>
88
                <filter class="solr.StopFilterFactory"/>
89
                <filter class="solr.EnglishPorterFilterFactory"/>
90
            </analyzer>
91
        </fieldtype>
92
-->
93
        <!-- One could also specify an existing Analyzer implementation in Java
94
             via the class attribute on the analyzer element:
95
        <fieldtype name="text_lu" class="solr.TextField">
96
          <analyzer class="org.apache.lucene.analysis.snowball.SnowballAnalyzer"/>
97
        </fieldType>
98
        -->
99
100
        <!-- A text field that only splits on whitespace for more exact matching -->
101
        <fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="100">
102
            <analyzer>
103
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
104
            </analyzer>
105
        </fieldtype>
106
107
        <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
108
            <analyzer type="index">
109
          		<tokenizer class="solr.WhitespaceTokenizerFactory"/>
110
                <filter class="solr.WordDelimiterFilterFactory"
111
                	generateWordParts="1"
112
                	catenateWords="1"
113
                	generateNumberParts="1"
114
                	splitOnNumerics="0"/>
115
116
                <filter class="solr.LowerCaseFilterFactory"/>
117
                <filter class="solr.StopFilterFactory" />
118
                <filter class="solr.PorterStemFilterFactory" />
119
                <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"/>
120
            </analyzer>
121
            <analyzer type="query">
122
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
123
124
                <filter class="solr.WordDelimiterFilterFactory"
125
                	generateWordParts="1"
126
                	catenateWords="0"
127
                	generateNumberParts="1"
128
                	splitOnNumerics="0"/>
129
130
                <filter class="solr.LowerCaseFilterFactory"/>
131
                <filter class="solr.StopFilterFactory"/>
132
                <filter class="solr.PorterStemFilterFactory" />
133
            </analyzer>
134
        </fieldtype>
135
136
        <fieldtype name="text_no_token" class="solr.TextField" positionIncrementGap="100">
137
            <analyzer type="index">
138
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
139
                <filter class="solr.LowerCaseFilterFactory"/>
140
                <filter class="solr.StopFilterFactory" />
141
            </analyzer>
142
            <analyzer type="query">
143
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
144
                <filter class="solr.LowerCaseFilterFactory"/>
145
                <filter class="solr.StopFilterFactory"/>
146
            </analyzer>
147
        </fieldtype>
148
149
<!--
150
         <fieldtype name="text_all" class="solr.TextField" positionIncrementGap="100">
151
            <analyzer type="index">
152
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
153
                in this example, we will only use synonyms at query time
154
                <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
155
156
                <filter class="solr.StopFilterFactory" ignoreCase="true"/>
157
                <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
158
                        catenateWords="1" catenateNumbers="1" catenateAll="0"/>
159
                <filter class="solr.LowerCaseFilterFactory"/>
160
                <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
161
            </analyzer>
162
            <analyzer type="query">
163
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
164
                <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
165
                <filter class="solr.StopFilterFactory" ignoreCase="true"/>
166
                <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
167
                        catenateWords="0" catenateNumbers="0" catenateAll="0"/>
168
                <filter class="solr.LowerCaseFilterFactory"/>
169
                <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
170
            </analyzer>
171
        </fieldtype>
172
 -->
173
174
        <!-- Less flexible matching, but less false matches.  Probably not ideal for product names
175
but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
176
         <fieldtype name="textTight" class="solr.TextField" positionIncrementGap="100">
177
            <analyzer>
178
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
179
                <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
180
                <filter class="solr.StopFilterFactory" ignoreCase="true"/>
181
                <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0"
182
                        catenateWords="1" catenateNumbers="1" catenateAll="0"/>
183
                <filter class="solr.LowerCaseFilterFactory"/>
184
                <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
185
            </analyzer>
186
        </fieldtype>
187
188
    </types>
189
190
191
<!-- ======================================================================= -->
192
    <fields>
193
        <!-- Valid attributes for fields:
194
            name: mandatory - the name for the field
195
            type: mandatory - the name of a previously defined type from the <types> section
196
            indexed: true if this field should be indexed (searchable)
197
            stored: true if this field should be retrievable
198
            multiValued: true if this field may contain multiple values per document
199
            omitNorms: (expert) set to true to omit the norms associated with this field
200
                       (this disables length normalization and index-time boosting for the field)
201
        -->
202
203
        <field name="id"              	type="string"   indexed="true" stored="true" multiValued="false"/>
204
        <field name="formatId"    		type="string"   indexed="true" stored="true" multiValued="false"/>
205
		<field name="formatType"		type="string"	indexed="true"	stored="true"	multiValued="false"/>
206
        <field name="size"            	type="slong"     indexed="true" stored="true" multiValued="false"/>
207
        <field name="checksum"        	type="string"   indexed="false" stored="true" multiValued="false"/>
208
        <field name="checksumAlgorithm" type="string" indexed="false" stored="true"/>
209
        <field name="dateUploaded"    type="date"     indexed="true" stored="true" multiValued="false"/>
210
        <field name="dateModified"    type="date"     indexed="true" stored="true" multiValued="false"/>
211
        <field name="submitter"       type="string"   indexed="true" stored="true" multiValued="false"/>
212
        <field name="rightsHolder"    type="string"   indexed="true" stored="true" multiValued="false"/>
213
        <field name="authoritativeMN"       	type="string"   indexed="false" stored="true" multiValued="false"/>
214
        <field name="replicationAllowed"     	type="boolean"  indexed="false" stored="true" multiValued="false"/>
215
        <field name="numberReplicas"      		type="integer"  indexed="false" stored="true" multiValued="false"/>
216
        <field name="preferredReplicationMN"   	type="string"   indexed="false" stored="true" multiValued="true"/>
217
        <field name="blockedReplicationMN"  	type="string"   indexed="false" stored="true" multiValued="true"/>
218
        <field name="replicaMN"      		type="string"   indexed="false" stored="true" multiValued="true"/>
219
        <field name="replicaVerifiedDate" 	type="date"    indexed="false" stored="true" multiValued="true"/>
220
        <field name="datasource"       		type="string"  indexed="true" stored="true" multiValued="false"/>
221
        <field name="obsoletes"       		type="string"   indexed="true" stored="true" multiValued="false"/>
222
        <field name="obsoletedBy"      		type="string"   indexed="true" stored="true" multiValued="false"/>
223
224
        <!-- Object relationships -->
225
        <field name="resourceMap"		type="string"   indexed="true" stored="true" multiValued="true"/>
226
        <field name="documents"			type="string"   indexed="true" stored="true" multiValued="true"/>
227
        <field name="isDocumentedBy"	type="string"   indexed="true" stored="true" multiValued="true"/>
228
229
        <!--Permissions-->
230
        <field name="readPermission"    type="string"   indexed="true" stored="true" multiValued="true"/>
231
        <field name="writePermission"   type="string"   indexed="true" stored="true" multiValued="true"/>
232
        <field name="changePermission"  type="string"   indexed="true" stored="true" multiValued="true"/>
233
        <field name="isPublic"          type="boolean"  indexed="true" stored="true" />
234
235
        <!-- Science metadata properties -->
236
        <field name="abstract"                 type="text"      multiValued="false" indexed="true" stored="true" />
237
        <field name="author"                   type="string"    multiValued="false" indexed="true" stored="true" />
238
        <field name="authorLastName"             type="string"    multiValued="true" indexed="true" stored="true" />
239
        <field name="keywords"                 type="string"    multiValued="true" indexed="true" stored="true" />
240
        <field name="keyConcept"               type="string"    multiValued="true" indexed="true" stored="true" />
241
        <field name="southBoundCoord"          type="sfloat"     multiValued="false" indexed="true" stored="true"/>
242
        <field name="northBoundCoord"          type="sfloat"     multiValued="false" indexed="true" stored="true"/>
243
        <field name="westBoundCoord"           type="sfloat"     multiValued="false" indexed="true" stored="true"/>
244
        <field name="eastBoundCoord"           type="sfloat"     multiValued="false" indexed="true" stored="true"/>
245
        <field name="namedLocation"            type="string"    multiValued="true" indexed="true" stored="true" />
246
        <field name="beginDate"                type="date"      multiValued="false" indexed="true" stored="true" />
247
        <field name="endDate"                  type="date"      multiValued="false" indexed="true" stored="true" />
248
249
        <field name="title"                    type="text"    multiValued="false" indexed="true" stored="true" />
250
        <field name="scientificName"           type="string"    multiValued="true" indexed="true" stored="true" />
251
        <field name="relatedOrganizations"     type="string"    multiValued="true" indexed="true" stored="true" />
252
        <field name="datePublished"            type="date"      multiValued="false" indexed="true" stored="true" />
253
        <field name="pubDate" 				   type="date" 							indexed="true" stored="true"/>
254
255
		<field name="investigator" 		type="string" 	indexed="true" stored="true" multiValued="true"/>
256
		<field name="investigatorText" 	type="text" 	indexed="true" stored="false" multiValued="true"/>
257
        <field name="ogcUrl" 			type="text" 	indexed="false" stored="true"/>
258
        <field name="sku" 				type="textTight" indexed="true" stored="true"/>
259
        <field name="identifier" 		type="textTight" indexed="true" stored="true"/>
260
        <field name="LTERSite"	 		type="string" 	indexed="true" stored="true"/>
261
        <field name="origin" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
262
        <field name="originText" 		type="text" 	indexed="true" stored="false" multiValued="true"/>
263
        <field name="titlestr" 			type="string" 	indexed="true" stored="false"/>
264
        <field name="geoform" 			type="string" 	indexed="true" stored="true"/>
265
        <field name="presentationCat" 	type="string" 	indexed="true" stored="true"/>
266
        <field name="purpose" 			type="text" 	indexed="true" stored="true"/>
267
        <field name="updateDate" 		type="date" 	indexed="true" stored="true"/>
268
        <field name="edition" 			type="text" 	indexed="true" stored="true"/>
269
        <field name="dataUrl" 			type="string" 	indexed="false" stored="true"/>
270
        <field name="originator" 		type="string" 	indexed="true" stored="true" multiValued="true"/>
271
        <field name="originatorText" 	type="text"	  	indexed="true" stored="false" multiValued="true"/>
272
        <field name="family" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
273
        <field name="species" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
274
        <field name="genus" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
275
        <field name="kingdom" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
276
        <field name="phylum" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
277
        <field name="order" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
278
        <field name="class" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
279
280
        <field name="webUrl" 			type="string" 	indexed="false" stored="true" multiValued="true"/>
281
282
        <field name="contactOrganization" 		type="string" indexed="true" stored="true" multiValued="true"/>
283
        <field name="contactOrganizationText" 	type="text"   indexed="true" stored="false" multiValued="true"/>
284
285
        <field name="keywordsText" 		type="text" 	indexed="true" stored="false" multiValued="true"/>
286
        <field name="placeKey" 			type="text" 	indexed="true" stored="true" multiValued="true"/>
287
288
        <field name="noBoundingBox" 	type="string" 	indexed="true" stored="true"/>
289
        <field name="isSpatial" 		type="string" 	indexed="true" stored="true"/>
290
291
        <field name="decade" 			type="string" 	indexed="true" stored="true"/>
292
        <field name="gcmdKeyword" 		type="text" 	indexed="true" stored="true" multiValued="true"/>
293
294
        <!-- these are ornl daac fields, may not be populated for nbii, but are required to be here for indexing purpose-->
295
        <field name="project" 			type="string" 	indexed="true" stored="true"/>
296
        <field name="projectText" 		type="text" 	indexed="true" stored="false"/>
297
298
        <field name="site" 				type="string" 	indexed="true" stored="true" multiValued="true"/>
299
        <field name="siteText" 			type="text" 	indexed="true" stored="false" multiValued="true"/>
300
301
        <field name="parameter" 		type="string" 	indexed="true" stored="true" multiValued="true"/>
302
        <field name="parameterText"		type="text" 	indexed="true" stored="false" multiValued="true"/>
303
304
        <field name="sensor" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
305
        <field name="sensorText" 		type="text" 	indexed="true" stored="false" multiValued="true"/>
306
307
        <field name="source" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
308
        <field name="sourceText" 		type="text" 	indexed="true" stored="false" multiValued="true"/>
309
310
        <field name="term" 				type="string" 	indexed="true" stored="true" multiValued="true"/>
311
        <field name="termText" 			type="text" 	indexed="true" stored="false" multiValued="true"/>
312
313
        <field name="topic" 			type="string" 	indexed="true" stored="true" multiValued="true"/>
314
        <field name="topicText" 		type="text" 	indexed="true" stored="false" multiValued="true"/>
315
316
        <field name="fileID" 			type="string" 	indexed="true" stored="true"/>
317
   		<field name="text"				type="text"		indexed="true" stored="true" multiValued="false" />
318
<!-- ======================================================================= -->
319
        <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
320
will be used if the name matches any of the patterns.
321
RESTRICTION: the glob-like pattern in the name attribute must have
322
a "*" only at the start or the end.
323
EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
324
Longer patterns will be matched first.  if equal size patterns
325
both match, the first appearing in the schema will be used.  -->
326
        <dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
327
        <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
328
        <dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
329
        <dynamicField name="*_t" type="text" indexed="true" stored="true"/>
330
        <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
331
        <dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
332
        <dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
333
        <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
334
    </fields>
335
336
<!-- ======================================================================= -->
337
    <!-- field to use to determine and enforce document uniqueness. -->
338
    <uniqueKey>id</uniqueKey>
339
340
    <!-- field for the QueryParser to use when an explicit fieldname is absent -->
341
    <defaultSearchField>text</defaultSearchField>
342
343
    <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
344
    <solrQueryParser defaultOperator="AND"/>
345
346
<!-- ======================================================================= -->
347
    <!-- copyField commands copy one field to another at the time a document
348
is added to the index.  It's used either to index the same field different
349
ways, or to add multiple fields to the same field for easier/faster searching.  -->
350
351
    <copyField source="id" dest="sku"/>
352
    <copyField source="id" dest="identifier"/>
353
    <copyfield source="replicaMN" dest="datasource" />
354
355
    <copyField source="title" dest="titlestr"/>
356
357
    <copyField source="origin" dest="originText"/>
358
    <copyField source="origin" dest="originatorText"/>
359
360
    <copyField source="project" dest="projectText"/>
361
362
    <copyField source="site" dest="siteText"/>
363
    <copyField source="parameter" dest="parameterText"/>
364
    <copyField source="sensor" dest="sensorText"/>
365
    <copyField source="source" dest="sourceText"/>
366
    <copyField source="term" dest="termText"/>
367
    <copyField source="topic" dest="topicText"/>
368
    <copyField source="investigator" dest="investigatorText"/>
369
    <copyField source="keywords" dest="keywordsText"/>
370
    <copyField source="pubDate" dest="datePublished" />
371
    <copyField source="dateUploaded" dest="updateDate" />
372
    <copyField source="contactOrganization" dest="contactOrganizationText"/>
373
374
375
    <!-- Similarity is the scoring routine for each document vs a query.
376
A custom similarity may be specified here, but the default is fine
377
for most applications.  -->
378
    <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
379
380
</schema>