Project

General

Profile

Bug #485 » eml-physicalDFH.xsd

Dan Higgins, 05/16/2002 09:44 AM

 
1
<?xml version="1.0"?>
2
<xs:schema targetNamespace="eml:physical-2.0.0beta8" 
3
           xmlns="eml:physical-2.0.0beta8" 
4
           xmlns:doc="eml:documentation-2.0.0beta8" 
5
           xmlns:xs="http://www.w3.org/2001/XMLSchema">
6
  <xs:import namespace="eml:documentation-2.0.0beta8" 
7
             schemaLocation="eml-documentation.xsd"/>
8
  <xs:annotation>
9
    <xs:documentation>
10
       '$RCSfile: eml-physical.xsd,v $'
11
       Copyright: 2000 Regents of the University of California and the
12
                  National Center for Ecological Analysis and Synthesis
13
     For Details: http://knb.ecoinformatics.org/
14
  
15
        '$Author: higgins $'
16
          '$Date: 2002/04/21 22:45:30 $'
17
      '$Revision: 1.11 $'
18
    
19
    This program is free software; you can redistribute it and/or modify
20
    it under the terms of the GNU General Public License as published by
21
    the Free Software Foundation; either version 2 of the License, or
22
    (at your option) any later version.
23
  
24
    This program is distributed in the hope that it will be useful,
25
    but WITHOUT ANY WARRANTY; without even the implied warranty of
26
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27
    GNU General Public License for more details.
28
  
29
    You should have received a copy of the GNU General Public License
30
    along with this program; if not, write to the Free Software
31
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
32
    </xs:documentation>
33
    <xs:appinfo>
34
      <doc:moduleName>eml-physical</doc:moduleName>
35
      <doc:moduleDescription>
36
        The eml-physical Module defines the structural
37
        characteristics of data formats as delivered over the wire or
38
        as found in a file system.  One physical object (which can be a
39
        bytestream or an object in a file system) might contain multiple
40
        entities (for example, this would be typical in a MS Access file
41
        that contained multiple tables of data).  However, it is typically
42
        used to describe a file or stream that is in some text-based 
43
        format such as ASCII or UTF-8, and includes the information needed
44
        to parse the data stream to extract the entity and its attributes
45
        from the stream.
46
      </doc:moduleDescription>
47
    </xs:appinfo>
48
  </xs:annotation>
49
  <xs:element name="eml-physical">
50
    <xs:annotation>
51
      <xs:appinfo>
52
        <doc:tooltip>Physical structure.        </doc:tooltip>
53
        <doc:summary>Physical structure of an entity or entities.</doc:summary>
54
        <doc:description>
55
          Physical structure of an entity or entities.  This generally is a detailed 
56
          description of a text representation that shows how the columns and rows 
57
          of a table are represented, or simply the name of a well-known binary or 
58
          proprietary format (e.g., Microsoft Excel 2000).
59
        </doc:description>
60
        <doc:example/>
61
        <doc:lineage>
62
          The eml-physical was introduced into EML 1.4 as eml-file.
63
        </doc:lineage>
64
      </xs:appinfo>
65
    </xs:annotation>
66
    <xs:complexType>
67
      <xs:sequence>
68
        <xs:element name="identifier" maxOccurs="unbounded">
69
          <xs:annotation>
70
            <xs:appinfo>
71
              <doc:tooltip>
72
                Unique identifier
73
              </doc:tooltip>
74
              <doc:summary>
75
                The unique identifier of this metadata file or object.
76
              </doc:summary>
77
              <doc:description>
78
                The identifier field provides a unique identifier for this
79
                metadata documentation.  It will most likely be part of a 
80
                sequence of numbers or letters that are meaningful in a 
81
                larger context, such as a metadata catalog. That larger
82
                system can be identified in the "system" attribute. Multiple
83
                identifiers can be listed corresponding to different catalog
84
                systems.
85
              </doc:description>
86
              <doc:example><![CDATA[<identifier system="metacat">nceas.3.2</identifier>]]></doc:example>
87
              <doc:lineage>
88
                The 'identifier' field is derived from the eml-dataset 
89
                meta_file_id filed in EML 1.4.
90
              </doc:lineage>
91
            </xs:appinfo>
92
          </xs:annotation>
93
          <xs:complexType>
94
            <xs:simpleContent>
95
              <xs:extension base="xs:string">
96
                <xs:attribute name="system" type="xs:string" use="optional">
97
                  <xs:annotation>
98
                    <xs:appinfo>
99
                      <doc:tooltip>
100
                    Catalog system
101
                  </doc:tooltip>
102
                      <doc:summary>
103
                    The catalog system in which this identifier is used.
104
                  </doc:summary>
105
                      <doc:description>
106
                    This element gives the name of the catalog system in which
107
                    this identifier is used.  It is useful to determine the
108
                    scope of the identifier, and to determine the semantics
109
                    of the various subparts of the identifier. Unresolved issue:
110
                    can or should this be a URI/URL pointing to the catalog
111
                    system, or just the name?
112
                  </doc:description>
113
                      <doc:example><![CDATA[
114
                    <identifier system="metacat">nceas.3.2</identifier>]]></doc:example>
115
                      <doc:lineage>
116
                    New to EML 2.0.
117
                  </doc:lineage>
118
                    </xs:appinfo>
119
                  </xs:annotation>
120
                </xs:attribute>
121
              </xs:extension>
122
            </xs:simpleContent>
123
          </xs:complexType>
124
        </xs:element>
125
        <xs:element name="format">
126
          <xs:annotation>
127
            <xs:appinfo>
128
              <doc:tooltip>
129
                File format
130
              </doc:tooltip>
131
              <doc:summary>
132
                Contains the name of the format for this file.
133
              </doc:summary>
134
              <doc:description>
135
                This element contains the name of the file's format.
136
                The file's format is typically ASCII, Unicode, or some
137
                well-known binary format (e.g., Microsoft Excel 2000).  
138
                It is recommended to include a 
139
                complete MIME type here, such as image/jpeg or text/xml.  Note
140
                that this is the format of the physical file itself.
141
              </doc:description>
142
              <doc:example><![CDATA[<format>ASCII</format>]]></doc:example>
143
              <doc:lineage>
144
                The format element was introduced into EML 1.4.
145
              </doc:lineage>
146
            </xs:appinfo>
147
          </xs:annotation>
148

    
149
 	  <xs:complexType>
150
	    <xs:simpleContent>
151
	      <xs:extension base="xs:string">
152
		<xs:attribute name="version" type="xs:string" use="optional">
153
                  <xs:annotation>
154
                    <xs:appinfo>
155
                      <doc:tooltip>
156
                      </doc:tooltip>
157
                      <doc:summary>
158
                      </doc:summary>
159
                      <doc:description>
160
		        This attribute is designed for use in providing the 
161
			version of the format in use. For example, 'Excel'
162
			might be the format; with '3.1' being the version
163
                      </doc:description>
164
                      <doc:example></doc:example>
165
                      <doc:lineage>
166
                        New to EML 2.0.
167
                      </doc:lineage>
168
                    </xs:appinfo>
169
                  </xs:annotation>
170
                </xs:attribute>
171
		<xs:attribute name="citation" type="xs:string" use="optional">
172
                  <xs:annotation>
173
                    <xs:appinfo>
174
                      <doc:tooltip>
175
                      </doc:tooltip>
176
                      <doc:summary>
177
                      </doc:summary>
178
                      <doc:description>
179
		        Citation is a simple reference describing the format
180
			where one can find a detailed description of the
181
			format.
182
                      </doc:description>
183
                      <doc:example></doc:example>
184
                      <doc:lineage>
185
                        New to EML 2.0.
186
                      </doc:lineage>
187
                    </xs:appinfo>
188
                  </xs:annotation>
189
		</xs:attribute>
190
	      </xs:extension>
191
	    </xs:simpleContent>
192
	  </xs:complexType>
193

    
194
       </xs:element>
195

    
196
       <xs:element name="objectName" type="xs:string" minOccurs="0"/>
197

    
198
       <xs:element name="characterEncoding" type="xs:string" minOccurs="0">
199
          <xs:annotation>
200
            <xs:appinfo>
201
              <doc:tooltip>
202
                Character Encoding
203
              </doc:tooltip>
204
              <doc:summary>
205
                Contains the name of the chracter encoding used for the data.
206
              </doc:summary>
207
              <doc:description>
208
                This element contains the name of the character encoding.
209
                This is typically ASCII or UTF-8, or one of the other common encodings.
210
              </doc:description>
211
              <doc:example><![CDATA[<characterEncoding>UTF-8</characterEncoding>]]></doc:example>
212
              <doc:lineage>
213
                Introduced in EML 2.0
214
              </doc:lineage>
215
            </xs:appinfo>
216
          </xs:annotation>
217
        </xs:element>
218
        <xs:element name="size" minOccurs="0">
219
          <xs:annotation>
220
            <xs:appinfo>
221
              <doc:tooltip>
222
                Entity size
223
              </doc:tooltip>
224
              <doc:summary>
225
                Describes the physical size of the entity.
226
              </doc:summary>
227
              <doc:description>
228
                This element contains information of the physical size 
229
                of the entity, typically in bytes.
230
              </doc:description>
231
              <doc:example><![CDATA[<entitySize unit="bytes">13</entitySize>]]></doc:example>
232
              <doc:lineage>
233
                The entitySize was introduced into EML 1.4.
234
              </doc:lineage>
235
            </xs:appinfo>
236
          </xs:annotation>
237
          <xs:complexType>
238
            <xs:simpleContent>
239
              <xs:extension base="xs:string">
240
                <xs:attribute name="unit" use="required" default="bytes">
241
                  <xs:annotation>
242
                    <xs:appinfo>
243
                      <doc:tooltip>
244
                    Unit of measurement
245
                  </doc:tooltip>
246
                      <doc:summary>
247
                    Unit of measurement for the entity size, typically bytes
248
                  </doc:summary>
249
                      <doc:description>
250
                    This element gives the unit of measurement for the 
251
                    size of the entity, and is typically bytes.
252
                  </doc:description>
253
                      <doc:example><![CDATA[<entitySize unit="bytes">13</entitySize>]]></doc:example>
254
                      <doc:lineage>
255
                    The unit was introduced into EML 1.4.
256
                  </doc:lineage>
257
                    </xs:appinfo>
258
                  </xs:annotation>
259
                </xs:attribute>
260
              </xs:extension>
261
            </xs:simpleContent>
262
          </xs:complexType>
263
        </xs:element>
264
        <xs:element name="authentication" minOccurs="0" maxOccurs="unbounded">
265
          <xs:annotation>
266
            <xs:appinfo>
267
              <doc:tooltip>
268
                  Authentication method
269
              </doc:tooltip>
270
              <doc:summary>
271
                A value, typically a checksum, used to authenticate that the bitstream 
272
                delivered to the user is identical to the original.
273
              </doc:summary>
274
              <doc:description>
275
                  This element describes authentication procedures or 
276
                  techniques, typically by giving a checksum method (e.g., MD5) and 
277
                  checksum value for the bytestream.
278
              </doc:description>
279
              <doc:example>
280
                <![CDATA[
281
                  <authentication method="MD5">f5b2177ea03aea73de12da81f896fe40</authentication>
282
                ]]>
283
              </doc:example>
284
              <doc:lineage>
285
                The authentication element was introduced into EML 1.4.
286
              </doc:lineage>
287
            </xs:appinfo>
288
          </xs:annotation>
289
          <xs:complexType>
290
            <xs:simpleContent>
291
              <xs:extension base="xs:string">
292
                <xs:attribute name="method" type="xs:string" use="optional">
293
                  <xs:annotation>
294
                    <xs:appinfo>
295
                      <doc:tooltip>
296
                  Authentication method
297
                      </doc:tooltip>
298
                      <doc:summary>
299
                        The method used to calculate an authentication checksum.
300
                      </doc:summary>
301
                      <doc:description>
302
                        This element names the method used to calculate and 
303
                        authentication checksum that can be used to validate a 
304
                        bytestream.  Typical checksum methods include MD5 and CRC.
305
                      </doc:description>
306
                      <doc:example>
307
                        <![CDATA[
308
                          <authentication method="MD5">f5b2177ea03aea73de12da81f896fe40</authentication>
309
                        ]]>
310
                      </doc:example>
311
                      <doc:lineage>
312
                        The authentication element was introduced into EML 1.4.
313
                      </doc:lineage>
314
                    </xs:appinfo>
315
                  </xs:annotation>
316
                </xs:attribute>
317
              </xs:extension>
318
            </xs:simpleContent>
319
          </xs:complexType>
320
        </xs:element>
321
        <xs:element name="compressionMethod" type="xs:string" minOccurs="0">
322
          <xs:annotation>
323
            <xs:appinfo>
324
              <doc:tooltip>
325
                    Entity's compression method
326
              </doc:tooltip>
327
              <doc:summary>
328
                  Name ofthe entity's compression method
329
              </doc:summary>
330
              <doc:description>
331
                  This element describes any compression methods used to
332
                  compress the entity, such as zip, compress, etc.
333
              </doc:description>
334
              <doc:example/>
335
              <doc:lineage>
336
                The compressed element was introduced into EML 1.4.
337
              </doc:lineage>
338
            </xs:appinfo>
339
          </xs:annotation>
340
        </xs:element>
341
        <xs:element name="encodingMethod" type="xs:string" minOccurs="0">
342
          <xs:annotation>
343
            <xs:appinfo>
344
              <doc:tooltip>
345
                    Encoding Method
346
              </doc:tooltip>
347
              <doc:summary>
348
                  Method used for encoding the entity
349
              </doc:summary>
350
              <doc:description>
351
                  This element describes the entity's encoded method, such as
352
                  MIME base64 encoding or binhex encoding.
353
              </doc:description>
354
              <doc:example/>
355
              <doc:lineage>
356
                The encoded element was introduced into EML 1.4.
357
              </doc:lineage>
358
            </xs:appinfo>
359
          </xs:annotation>
360
        </xs:element>
361
        <xs:element name="numHeaderLines" type="xs:string" minOccurs="0">
362
          <xs:annotation>
363
            <xs:appinfo>
364
              <doc:tooltip>
365
                Header lines
366
              </doc:tooltip>
367
              <doc:summary>
368
                Header lines in the entity
369
              </doc:summary>
370
              <doc:description>
371
                Number of header lines or information that prepares data.
372
              </doc:description>
373
              <doc:example><![CDATA[<numHeaderLines>3</numHeaderLines>]]></doc:example>
374
              <doc:lineage>
375
                The numHeaderLines element was introduced into EML 1.4.
376
              </doc:lineage>
377
            </xs:appinfo>
378
          </xs:annotation>
379
        </xs:element>
380
        <xs:element name="recordDelimiter" type="xs:string" minOccurs="0">
381
          <xs:annotation>
382
            <xs:appinfo>
383
              <doc:tooltip>
384
                  Record delimiter character
385
              </doc:tooltip>
386
              <doc:summary>
387
                  Character used to delimit records.
388
              </doc:summary>
389
              <doc:description>
390
                This element specifies the record delimiter character
391
                when the format is text. The record delimiter is usually a 
392
                newline (\n) on UNIX, a carriage return (\r) on MacOS, or 
393
                both (\r\n) on Windows/DOS.  Multiline records are usually 
394
                delimited with two line ending characters, for example on UNIX 
395
                it would be two newline characters (\n\n).
396
              </doc:description>
397
              <doc:example><![CDATA[<recordDelimiter>\n\r</recordDelimiter>]]></doc:example>
398
              <doc:lineage>
399
                The recordDelimiter element was introduced into EML 1.4.
400
              </doc:lineage>
401
            </xs:appinfo>
402
          </xs:annotation>
403
        </xs:element>
404
        <xs:element name="maxRecordLength" type="xs:string" minOccurs="0"/>
405
        <xs:element name="quoteCharacter" type="xs:string" minOccurs="0" maxOccurs="unbounded">
406
          <xs:annotation>
407
            <xs:appinfo>
408
              <doc:tooltip>
409
                  Quote character
410
              </doc:tooltip>
411
              <doc:summary>
412
                  Character used to quote values for delimeter escaping
413
              </doc:summary>
414
              <doc:description>
415
                This element specifies a character to be used in the entity
416
                for quoting values so that field delimeters can be used within
417
                the value.  This basically allows delimeter "escaping".  The
418
                quoteChacter is typically a " or '.
419
              </doc:description>
420
              <doc:example><![CDATA[<quoteCharacter>"</quoteCharacter>]]></doc:example>
421
              <doc:lineage>
422
                The quoteCharacter element was taken from the NBII standard.
423
              </doc:lineage>
424
            </xs:appinfo>
425
          </xs:annotation>
426
        </xs:element>
427
        <xs:element name="literalCharacter" type="xs:string" minOccurs="0" maxOccurs="unbounded">
428
          <xs:annotation>
429
            <xs:appinfo>
430
              <doc:tooltip>
431
                  Literal character
432
              </doc:tooltip>
433
              <doc:summary>
434
                  Character used to escape other characters
435
              </doc:summary>
436
              <doc:description>
437
                This element specifies a character to be used for escaping 
438
                character values so that the following character is treated as its literal
439
                value.  This allows "escaping" for special characters like quotes, commas, 
440
                and spaces when they aren't intended as a delimiter value.  The
441
                literalChacter is typically a \.
442
              </doc:description>
443
              <doc:example><![CDATA[<literalCharacter>\</literalCharacter>]]></doc:example>
444
              <doc:lineage>
445
                Introduced in EML 2.0.
446
              </doc:lineage>
447
            </xs:appinfo>
448
          </xs:annotation>
449
        </xs:element>
450
        <xs:sequence minOccurs="0" maxOccurs="unbounded">
451
          <xs:element name="fieldStartColumn" type="xs:string" minOccurs="0">
452
            <xs:annotation>
453
              <xs:appinfo>
454
                <doc:tooltip>
455
                  Start column
456
                </doc:tooltip>
457
                <doc:summary>
458
                  The starting column number for a fixed format attribute.
459
                </doc:summary>
460
                <doc:description>
461
                  FixedWidth fields have a set length, thus 
462
                  the end of the field can always be determined
463
                  by adding the fieldWidth to the starting 
464
                  column number.
465
                </doc:description>
466
                <doc:example>
467
                  any positive integer, see example in "delimeter" description
468
                </doc:example>
469
                <doc:lineage>
470
                  Introduced into EML 2.0. 
471
                </doc:lineage>
472
              </xs:appinfo>
473
            </xs:annotation>
474
          </xs:element>
475
          <xs:choice>
476
            <xs:element name="fieldWidth" type="xs:string">
477
              <xs:annotation>
478
                <xs:appinfo>
479
                  <doc:tooltip>
480
                    Field width
481
                  </doc:tooltip>
482
                  <doc:summary>
483
                    FieldWidth specification for fixed field length.
484
                  </doc:summary>
485
                  <doc:description>
486
                    FixedWidth fields have a set length, thus 
487
                    the end of the field can always be determined
488
                    by adding the fieldWidth to the starting 
489
                    column number.
490
                  </doc:description>
491
                  <doc:example>
492
                    any positive integer, see example in "delimeter" 
493
                    description
494
                  </doc:example>
495
                  <doc:lineage>
496
                    The fieldWidth element was introduced into 
497
                    EML 1.4. Semantics changed to work identically to
498
                    the NBII DTD.
499
                  </doc:lineage>
500
                </xs:appinfo>
501
              </xs:annotation>
502
            </xs:element>
503
            <xs:element name="fieldDelimiter" type="xs:string">
504
              <xs:annotation>
505
                <xs:appinfo>
506
                  <doc:tooltip>
507
                          Attribute delimiter
508
                      </doc:tooltip>
509
                  <doc:summary>
510
                        The end of the attribute (field) is delimited by a 
511
                        special character called a field delimiter.
512
                      </doc:summary>
513
                  <doc:description>
514
                        Variable width format fields (attributes) can vary in their 
515
                        field length, thus the end of the field is 
516
                        delimited by a special character called a 
517
                        field delimiter (typically a comma or a space).
518

    
519
                        Data sets are generally classified as fixedWidth
520
                        format or variableWidth format, but we have
521
                        determined that this is actually a per-field
522
                        classification because one may encounter
523
                        fixedWidth fields mixed together in the same
524
                        data file with variableWidth fields.
525
                        
526
                        In our encoding scheme, the start of each field
527
                        is assumed to be the column after the last column
528
                        of the previous field, or the first column
529
                        if this is the first field in the dataset, unless 
530
                        the starting column is explicity enumerated using the
531
                        "fieldStartColumn" element.
532
                        The end column for each field is classified
533
                        using either a special character delimeter indicated
534
                        using the filedDelimiter element,
535
                        or a fixed field length indicated by using the "fieldWidth"
536
                        element.  The delimiter for the last field in the data set can be omitted.
537
                        variableWidth fields can vary in their field length, and the end of
538
                        the field is delimited by a special character
539
                        called a field delimiter, usually a comma or
540
                        a tab character.  fixedWidth fields have a set
541
                        length, and so the end of the field can always
542
                        be determined by adding the fieldWidth to the
543
                        starting column number.  Here is an example:
544
                        
545
                        Assume we have the following data in a data set:
546

    
547
                        May,100aaaa,1.2,
548
                        April,200aaaa,3.4,
549
                        June,300bbbb,4.6,
550

    
551
                        The metadata indicating the physical layout of the 4 fields would include the 
552
                        following:
553

    
554
                        <![CDATA[
555
                          <delimiter>,</delimiter>
556
                          <fieldWidth>3</fieldWidth>
557
                          <fieldWidth>3</fieldWidth>
558
                          <delimiter>,</delimiter>
559
                        ]]>
560

    
561
                        In a strictly fixed format file, the metadata would be slightly different:
562

    
563
                        May100aaaa1.2
564
                        Apr200aaaa3.4
565
                        Jun300bbbb4.6
566

    
567
                        <![CDATA[
568
                          <fieldWidth>3</fieldWidth>
569
                          <fieldWidth>3</fieldWidth>
570
                          <fieldWidth>4</fieldWidth>
571
                          <fieldWidth>3</fieldWidth>
572
                        ]]>
573

    
574
                        or, one could explicitly describe the starting columns:
575

    
576
                        <![CDATA[
577
                          <fieldStartColumn>1</fieldStartColumn>
578
                          <fieldWidth>3</fieldWidth>
579
                          <fieldStartColumn>4</fieldStartColumn>
580
                          <fieldWidth>3</fieldWidth>
581
                          <fieldStartColumn>7</fieldStartColumn>
582
                          <fieldWidth>4</fieldWidth>
583
                          <fieldStartColumn>11</fieldStartColumn>
584
                          <fieldWidth>3</fieldWidth>
585
                        ]]></doc:description>
586
                  <doc:example>
587
                        comma, tab, white space, etc.
588
                      </doc:example>
589
                  <doc:lineage>
590
                        The delimiter element was introduced into 
591
                        EML 1.4. Semantics changed to work identically to
592
                        the NBII DTD, and then modified to fit more cases.
593
                      </doc:lineage>
594
                </xs:appinfo>
595
              </xs:annotation>
596
            </xs:element>
597
          </xs:choice>
598
        </xs:sequence>
599
	
600
	<xs:element name="BinaryRasterInfo" minOccurs="0">
601
	  <xs:complexType>
602
	    <xs:sequence>
603
	      <xs:element name="nrows" type="xs:int"/>
604
	      <xs:element name="ncols" type="xs:int"/>
605
	      <xs:element name="nbands" type="xs:int"/>
606
	      <xs:element name="nbits"/>
607
              <xs:element name="byteorder"/>
608
	      <xs:element name="layout"/>
609
	      <xs:element name="skipbytes"/>
610
              <xs:element name="ulxmap"/>
611
	      <xs:element name="ulymap"/>
612
	      <xs:element name="xdim"/>
613
	      <xs:element name="ydim"/>
614
	      <xs:element name="bandrowbytes"/>
615
	      <xs:element name="totalrowbytes"/>
616
	      <xs:element name="bandgapbytes"/>
617
	   </xs:sequence>
618
	 </xs:complexType>
619
       </xs:element>
620

    
621
      </xs:sequence>
622
    </xs:complexType>
623
  </xs:element>
624
</xs:schema>
(1-1/2)