Project

General

Profile

1
<!--
2
       '$RCSfile$'
3
       Copyright: 2000 Regents of the University of California and the
4
                  National Center for Ecological Analysis and Synthesis
5
     For Details: http://knb.ecoinformatics.org/
6

    
7
        '$Author: jones $'
8
          '$Date: 2001-10-22 09:46:55 -0700 (Mon, 22 Oct 2001) $'
9
      '$Revision: 853 $'
10

    
11
    This program is free software; you can redistribute it and/or modify
12
    it under the terms of the GNU General Public License as published by
13
    the Free Software Foundation; either version 2 of the License, or
14
    (at your option) any later version.
15

    
16
    This program is distributed in the hope that it will be useful,
17
    but WITHOUT ANY WARRANTY; without even the implied warranty of
18
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
    GNU General Public License for more details.
20

    
21
    You should have received a copy of the GNU General Public License
22
    along with this program; if not, write to the Free Software
23
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24
-->
25
<!-- EML DTD document that defines the structural
26
     characteristics of physical objects -->
27
<!-- The root element, which contains an identifier and 
28
     an physical description -->
29
<!ELEMENT eml-physical (identifier, format, characterEncoding?, size?, authentication*, compressionMethod?, 
30
                                         encodingMethod?, numHeaderLines?, recordDelimiter?, maxRecordLength?, 
31
                                         quoteCharacter*, literalCharacter*, (fieldStartColumn?, (fieldDelimiter | fieldWidth))* )>
32
<!-- File identifier of the metadata document -->
33
<!ELEMENT identifier (#PCDATA)>
34
<!ATTLIST identifier
35
  system CDATA #IMPLIED
36
>
37
<!-- size -->
38
<!ELEMENT size (#PCDATA)>
39
<!ATTLIST size
40
  unit CDATA "bytes"
41
>
42
<!-- Authentication value and method -->
43
<!ELEMENT authentication (#PCDATA)>
44
<!ATTLIST authentication
45
  method CDATA #IMPLIED
46
>
47
<!-- Entity format (e.g., text, name of various binary formats [TIFF]) -->
48
<!ELEMENT format (#PCDATA)>
49
<!-- characterEncoding fro text files (e.g., ASCII, UTF-8) -->
50
<!ELEMENT characterEncoding (#PCDATA)>
51
<!-- Method of compression -->
52
<!ELEMENT compressionMethod (#PCDATA)>
53
<!-- Method of encoding -->
54
<!ELEMENT encodingMethod (#PCDATA)>
55
<!-- The character used to delimit records in the entity -->
56
<!ELEMENT recordDelimiter (#PCDATA)>
57
<!ELEMENT maxRecordLength (#PCDATA)>
58
<!-- The character used to delimit quote data values so that the
59
     filed delimeters can be used in the data value, typically
60
     " or ' -->
61
<!ELEMENT quoteCharacter (#PCDATA)>
62
<!-- The character used to escape special characters
63
     so that they are interpreted literally, usually \  -->
64
<!ELEMENT literalCharacter (#PCDATA)>
65
<!-- Number of header lines or information that prepares data -->
66
<!ELEMENT numHeaderLines (#PCDATA)>
67

    
68
<!--
69
                        Variable width format fields (attributes) can vary in their 
70
                        field length, thus the end of the field is 
71
                        delimited by a special character called a 
72
                        field delimiter (typically a comma or a space).
73

    
74
                        Data sets are generally classified as fixedWidth
75
                        format or variableWidth format, but we have
76
                        determined that this is actually a per-field
77
                        classification because one may encounter
78
                        fixedWidth fields mixed together in the same
79
                        data file with variableWidth fields.
80
                        
81
                        In our encoding scheme, the start of each field
82
                        is assumed to be the column after the last column
83
                        of the previous field, or the first column
84
                        if this is the first field in the dataset, unless 
85
                        the starting column is explicity enumerated using the
86
                        "fieldStartColumn" element.
87
                        The end column for each field is classified
88
                        using either a special character delimeter indicated
89
                        using the filedDelimiter element,
90
                        or a fixed field length indicated by using the "fieldWidth"
91
                        element.  The delimiter for the last field in the data set can be omitted.
92
                        variableWidth fields can vary in their field length, and the end of
93
                        the field is delimited by a special character
94
                        called a field delimiter, usually a comma or
95
                        a tab character.  fixedWidth fields have a set
96
                        length, and so the end of the field can always
97
                        be determined by adding the fieldWidth to the
98
                        starting column number.  Here is an example:
99
                        
100
                        Assume we have the following data in a data set:
101

    
102
                        May,100aaaa,1.2,
103
                        April,200aaaa,3.4,
104
                        June,300bbbb,4.6,
105

    
106
                        The metadata indicating the physical layout of the 4 fields would include the 
107
                        following:
108

    
109
                          <delimiter>,</delimiter>
110
                          <fieldWidth>3</fieldWidth>
111
                          <fieldWidth>3</fieldWidth>
112
                          <delimiter>,</delimiter>
113

    
114
                        In a strictly fixed format file, the metadata would be slightly different:
115

    
116
                        May100aaaa1.2
117
                        Apr200aaaa3.4
118
                        Jun300bbbb4.6
119

    
120
                          <fieldWidth>3</fieldWidth>
121
                          <fieldWidth>3</fieldWidth>
122
                          <fieldWidth>4</fieldWidth>
123
                          <fieldWidth>3</fieldWidth>           
124

    
125
                        or, one could explicitly describe the starting columns:
126
                
127
                          <fieldStartColumn>1</fieldStartColumn>
128
                          <fieldWidth>3</fieldWidth>
129
                          <fieldStartColumn>4</fieldStartColumn>
130
                          <fieldWidth>3</fieldWidth>
131
                          <fieldStartColumn>7</fieldStartColumn>
132
                          <fieldWidth>4</fieldWidth>
133
                          <fieldStartColumn>11</fieldStartColumn>
134
                          <fieldWidth>3</fieldWidth>                   
135
-->
136
<!ELEMENT fieldStartColumn (#PCDATA)>
137
<!ELEMENT fieldDelimiter (#PCDATA)>
138
<!ELEMENT fieldWidth (#PCDATA)>
139
<!-- End of file -->
(9-9/12)