/lib/dtd/eml-physical-2.0.0.beta4e.dtd - Metacat - Ecoinformatics Redmine

metacat/lib/dtd/eml-physical-2.0.0.beta4e.dtd @ 869

       <!--
              '$RCSfile$'
              Copyright: 2000 Regents of the University of California and the
                         National Center for Ecological Analysis and Synthesis
            For Details: http://knb.ecoinformatics.org/
               '$Author: jones $'
                 '$Date: 2001-10-22 09:46:55 -0700 (Mon, 22 Oct 2001) $'
             '$Revision: 853 $'
           This program is free software; you can redistribute it and/or modify
           it under the terms of the GNU General Public License as published by
           the Free Software Foundation; either version 2 of the License, or
           (at your option) any later version.
           This program is distributed in the hope that it will be useful,
           but WITHOUT ANY WARRANTY; without even the implied warranty of
           MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
           GNU General Public License for more details.
           You should have received a copy of the GNU General Public License
           along with this program; if not, write to the Free Software
           Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
       -->
       <!-- EML DTD document that defines the structural
            characteristics of physical objects -->
       <!-- The root element, which contains an identifier and
            an physical description -->
       <!ELEMENT eml-physical (identifier, format, characterEncoding?, size?, authentication*, compressionMethod?,
                                                encodingMethod?, numHeaderLines?, recordDelimiter?, maxRecordLength?,
                                                quoteCharacter*, literalCharacter*, (fieldStartColumn?, (fieldDelimiter | fieldWidth))* )>
       <!-- File identifier of the metadata document -->
       <!ELEMENT identifier (#PCDATA)>
       <!ATTLIST identifier
         system CDATA #IMPLIED
+      >
       <!-- size -->
       <!ELEMENT size (#PCDATA)>
       <!ATTLIST size
         unit CDATA "bytes"
+      >
       <!-- Authentication value and method -->
       <!ELEMENT authentication (#PCDATA)>
       <!ATTLIST authentication
         method CDATA #IMPLIED
+      >
       <!-- Entity format (e.g., text, name of various binary formats [TIFF]) -->
       <!ELEMENT format (#PCDATA)>
       <!-- characterEncoding fro text files (e.g., ASCII, UTF-8) -->
       <!ELEMENT characterEncoding (#PCDATA)>
       <!-- Method of compression -->
       <!ELEMENT compressionMethod (#PCDATA)>
       <!-- Method of encoding -->
       <!ELEMENT encodingMethod (#PCDATA)>
       <!-- The character used to delimit records in the entity -->
       <!ELEMENT recordDelimiter (#PCDATA)>
       <!ELEMENT maxRecordLength (#PCDATA)>
       <!-- The character used to delimit quote data values so that the
            filed delimeters can be used in the data value, typically
            " or ' -->
       <!ELEMENT quoteCharacter (#PCDATA)>
       <!-- The character used to escape special characters
            so that they are interpreted literally, usually \  -->
       <!ELEMENT literalCharacter (#PCDATA)>
       <!-- Number of header lines or information that prepares data -->
       <!ELEMENT numHeaderLines (#PCDATA)>
       <!--
                               Variable width format fields (attributes) can vary in their
                               field length, thus the end of the field is
                               delimited by a special character called a
                               field delimiter (typically a comma or a space).
                               Data sets are generally classified as fixedWidth
                               format or variableWidth format, but we have
                               determined that this is actually a per-field
                               classification because one may encounter
                               fixedWidth fields mixed together in the same
                               data file with variableWidth fields.
                               In our encoding scheme, the start of each field
                               is assumed to be the column after the last column
                               of the previous field, or the first column
                               if this is the first field in the dataset, unless
                               the starting column is explicity enumerated using the
                               "fieldStartColumn" element.
                               The end column for each field is classified
                               using either a special character delimeter indicated
                               using the filedDelimiter element,
                               or a fixed field length indicated by using the "fieldWidth"
                               element.  The delimiter for the last field in the data set can be omitted.
                               variableWidth fields can vary in their field length, and the end of
                               the field is delimited by a special character
                               called a field delimiter, usually a comma or
                               a tab character.  fixedWidth fields have a set
                               length, and so the end of the field can always
                               be determined by adding the fieldWidth to the
                               starting column number.  Here is an example:
                               Assume we have the following data in a data set:
                               May,100aaaa,1.2,
                               April,200aaaa,3.4,
                               June,300bbbb,4.6,
                               The metadata indicating the physical layout of the 4 fields would include the
                               following:
                                 <delimiter>,</delimiter>
                                 <fieldWidth>3</fieldWidth>
                                 <fieldWidth>3</fieldWidth>
                                 <delimiter>,</delimiter>
                               In a strictly fixed format file, the metadata would be slightly different:
                               May100aaaa1.2
                               Apr200aaaa3.4
                               Jun300bbbb4.6
                                 <fieldWidth>3</fieldWidth>
                                 <fieldWidth>3</fieldWidth>
                                 <fieldWidth>4</fieldWidth>
                                 <fieldWidth>3</fieldWidth>
                               or, one could explicitly describe the starting columns:
                                 <fieldStartColumn>1</fieldStartColumn>
                                 <fieldWidth>3</fieldWidth>
                                 <fieldStartColumn>4</fieldStartColumn>
                                 <fieldWidth>3</fieldWidth>
                                 <fieldStartColumn>7</fieldStartColumn>
                                 <fieldWidth>4</fieldWidth>
                                 <fieldStartColumn>11</fieldStartColumn>
                                 <fieldWidth>3</fieldWidth>
       -->
       <!ELEMENT fieldStartColumn (#PCDATA)>
       <!ELEMENT fieldDelimiter (#PCDATA)>
       <!ELEMENT fieldWidth (#PCDATA)>
       <!-- End of file -->

(9-9/13)

Project

General

Profile

Metacat