/lib/dtd/eml-physical-2.0.dtd - Annotate - Metacat - Ecoinformatics Redmine

808

berkley

<!--

2

       '$RCSfile$'

3

       Copyright: 2000 Regents of the University of California and the

4

                  National Center for Ecological Analysis and Synthesis

5

     For Details: http://knb.ecoinformatics.org/

6

7

        '$Author$'

8

          '$Date$'

9

      '$Revision$'

10

11

    This program is free software; you can redistribute it and/or modify

12

    it under the terms of the GNU General Public License as published by

13

    the Free Software Foundation; either version 2 of the License, or

14

    (at your option) any later version.

15

16

    This program is distributed in the hope that it will be useful,

17

    but WITHOUT ANY WARRANTY; without even the implied warranty of

18

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

19

    GNU General Public License for more details.

20

21

    You should have received a copy of the GNU General Public License

22

    along with this program; if not, write to the Free Software

23

    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

24

-->

25

<!-- EML DTD document that defines the structural

26

     characteristics of physical objects -->

27

<!-- The root element, which contains an identifier and

28

     an physical description -->

29

<!ELEMENT eml-physical (identifier, format, characterEncoding?, size?, authentication*, compressionMethod?,

30

                                         encodingMethod?, numHeaderLines?, recordDelimiter?, maxRecordLength?,

31

                                         quoteCharacter*, literalCharacter*, (fieldStartColumn?, (fieldDelimiter | fieldWidth))* )>

32

<!-- File identifier of the metadata document -->

33

<!ELEMENT identifier (#PCDATA)>

34

<!ATTLIST identifier

35

  system CDATA #IMPLIED

36

37

<!-- size -->

38

<!ELEMENT size (#PCDATA)>

39

<!ATTLIST size

40

  unit CDATA "bytes"

41

42

<!-- Authentication value and method -->

43

<!ELEMENT authentication (#PCDATA)>

44

<!ATTLIST authentication

45

  method CDATA #IMPLIED

46

47

<!-- Entity format (e.g., text, name of various binary formats [TIFF]) -->

48

<!ELEMENT format (#PCDATA)>

49

<!-- characterEncoding fro text files (e.g., ASCII, UTF-8) -->

50

<!ELEMENT characterEncoding (#PCDATA)>

51

<!-- Method of compression -->

52

<!ELEMENT compressionMethod (#PCDATA)>

53

<!-- Method of encoding -->

54

<!ELEMENT encodingMethod (#PCDATA)>

55

<!-- The character used to delimit records in the entity -->

56

<!ELEMENT recordDelimiter (#PCDATA)>

57

<!ELEMENT maxRecordLength (#PCDATA)>

58

<!-- The character used to delimit quote data values so that the

59

     filed delimeters can be used in the data value, typically

60

     " or ' -->

61

<!ELEMENT quoteCharacter (#PCDATA)>

62

<!-- The character used to escape special characters

63

     so that they are interpreted literally, usually \  -->

64

<!ELEMENT literalCharacter (#PCDATA)>

65

<!-- Number of header lines or information that prepares data -->

66

<!ELEMENT numHeaderLines (#PCDATA)>

67

68

<!--

69

                        Variable width format fields (attributes) can vary in their

70

                        field length, thus the end of the field is

71

                        delimited by a special character called a

72

                        field delimiter (typically a comma or a space).

73

74

                        Data sets are generally classified as fixedWidth

75

                        format or variableWidth format, but we have

76

                        determined that this is actually a per-field

77

                        classification because one may encounter

78

                        fixedWidth fields mixed together in the same

79

                        data file with variableWidth fields.

80

81

                        In our encoding scheme, the start of each field

82

                        is assumed to be the column after the last column

83

                        of the previous field, or the first column

84

                        if this is the first field in the dataset, unless

85

                        the starting column is explicity enumerated using the

86

                        "fieldStartColumn" element.

87

                        The end column for each field is classified

88

                        using either a special character delimeter indicated

89

                        using the filedDelimiter element,

90

                        or a fixed field length indicated by using the "fieldWidth"

91

                        element.  The delimiter for the last field in the data set can be omitted.

92

                        variableWidth fields can vary in their field length, and the end of

93

                        the field is delimited by a special character

94

                        called a field delimiter, usually a comma or

95

                        a tab character.  fixedWidth fields have a set

96

                        length, and so the end of the field can always

97

                        be determined by adding the fieldWidth to the

98

                        starting column number.  Here is an example:

99

100

                        Assume we have the following data in a data set:

101

102

                        May,100aaaa,1.2,

103

                        April,200aaaa,3.4,

104

                        June,300bbbb,4.6,

105

106

                        The metadata indicating the physical layout of the 4 fields would include the

107

                        following:

108

109

                          <delimiter>,</delimiter>

110

                          <fieldWidth>3</fieldWidth>

111

                          <fieldWidth>3</fieldWidth>

112

                          <delimiter>,</delimiter>

113

114

                        In a strictly fixed format file, the metadata would be slightly different:

115

116

                        May100aaaa1.2

117

                        Apr200aaaa3.4

118

                        Jun300bbbb4.6

119

120

                          <fieldWidth>3</fieldWidth>

121

                          <fieldWidth>3</fieldWidth>

122

                          <fieldWidth>4</fieldWidth>

123

                          <fieldWidth>3</fieldWidth>

124

125

                        or, one could explicitly describe the starting columns:

126

127

                          <fieldStartColumn>1</fieldStartColumn>

128

                          <fieldWidth>3</fieldWidth>

129

                          <fieldStartColumn>4</fieldStartColumn>

130

                          <fieldWidth>3</fieldWidth>

131

                          <fieldStartColumn>7</fieldStartColumn>

132

                          <fieldWidth>4</fieldWidth>

133

                          <fieldStartColumn>11</fieldStartColumn>

134

                          <fieldWidth>3</fieldWidth>

135

-->

136

<!ELEMENT fieldStartColumn (#PCDATA)>

137

<!ELEMENT fieldDelimiter (#PCDATA)>

138

<!ELEMENT fieldWidth (#PCDATA)>

139

<!ELEMENT paragraph (#PCDATA)>

140

<!-- End of file -->

Project

General

Profile

Metacat

Project

General

Profile

Metacat

metacat/lib/dtd/eml-physical-2.0.dtd @ 836