Project

General

Profile

metacat / src / ruby / lib / data_table.rb @ 6693

1
# Copyright: 2006 Regents of the University of California,
2
# Santa Barbara Coastal LTER
3
# http://sbcdata.lternet.edu/
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
# 
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
# 
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18
# 02111-1307  USA
19
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
20
require "eml.rb"
21

    
22
# ==  What is it
23
# Each EML document can contain DataTable elements representing(in most cases) plain
24
# text data. The attributes of this data, such as column names, types, domain info,
25
# are documented in the eml metadata. DataTable encapsulates data table elements from
26
# eml documents in the instance variable @metadata.
27
# 
28
# The purpose of this class is to provide methods to easily access metadata attributes
29
# pertaining to the data table. It can also be extended for specific
30
# applications to process the data based on the metadata description.
31
# == Examples
32
# === Get the location where this data file is stored
33
#   eml = metacat.find(:docid => 'somedoc.1.1')
34
#   data_table = eml.data_tables[0]
35
#   data_table.location
36
#   => ecogrid://knb/pisco_cbs.30.3
37
# === Write the data file to disk
38
#   # note we are using a block so the whole file isn't loaded to RAM
39
#   file = File.new("./tmp/#{data_table.id}")
40
#   data_table.read do |buffer|
41
#     file.write(buffer)
42
#   end
43
#   file.close 
44
class DataTable
45
  attr_reader :metadata, :eml
46
  
47
  def initialize(data_table_element, eml)
48
    @metadata = data_table_element
49
    @eml = eml
50
  end
51
  
52
  # Methods for accessing eml metadata
53
  # ----------------------------------
54
  
55
  # pulls the docid from distribution element
56
  def docid
57
    @docid ||= location.reverse.match('[^/]+')[0].reverse
58
  end
59
  
60
  # refers to the docid function
61
  def id
62
    docid
63
  end
64
  
65
  # attribute reader for online distribution
66
  def location
67
    @location ||= @metadata.elements['physical/distribution/online/url'].text
68
  end
69
  
70
  def physical
71
    @metadata.elements['physical']
72
  end
73
  
74
  # only supports unit bytes
75
  def size
76
    physical.elements['size'].text.to_i
77
  end
78
  
79
  def data_format
80
    physical.elements['dataFormat'].elements[1].name
81
  end
82
  
83
  def field_delimiter
84
    text_format.elements[]
85
  end
86
  
87
  def text_format
88
    physical.elements['dataFormat/textFormat']
89
  end
90
  
91
  def simple_delimited
92
    if text_format
93
      text_format.elements['simpleDelimited']
94
    else
95
      raise "data table is not in textFormat"
96
    end
97
  end
98
  
99
  def num_headers
100
    if text_format
101
      text_format.elements['numHeaderLines'].text.to_i      
102
    else
103
      raise "data table is not in textFormat"
104
    end
105
  end
106
  
107
  def record_delimiter
108
    if text_format
109
      text_format.elements['recordDelimiter'].text      
110
    else
111
      raise "data table is not in textFormat"
112
    end
113
  end
114
  
115
  def field_delimiter
116
    if simple_delimited
117
      simple_delimited.elements['fieldDelimiter'].text      
118
    else
119
      raise "data table is not in simpleDelimited format"
120
    end
121
  end
122
      
123
  def columns
124
    cols = Array.new
125
    @metadata.elements.each('attributeList/attribute') do |col|
126
      cols.push col
127
    end
128
    return cols
129
  end
130
  
131
  def entity_name
132
    @metadata.elements['entityName'].text
133
  end
134
  
135
  # ---------------------------
136
  # End Metadata Access Methods
137
  
138
  # reads the dataTable text from the url or docid specified
139
  # by the physical/distribution/online/url entity
140
  def read
141
    if(location =~ /ecogrid/)
142
      #we need to pull out the docid and do a read on metacat
143
      #get self.location, and pull out the string after the last "/"
144
      uri = URI.parse(PATH_TO_METACAT)
145
      uri.query = "action=read&qformat=xml&docid=#{docid}"
146
      # Use Net:HTTP first to get the content_type
147
      http = Net::HTTP.start(uri.host, uri.port)
148
      http.request_get(uri.to_s) do |response|
149
        if(response.content_type == 'text/xml')
150
          # error message
151
          doc = REXML::Document.new(response.read_body)
152
          if(doc.root.name == 'error')
153
            raise doc.root.text
154
          else
155
            raise "Unrecognized response from metacat at #{PATH_TO_METACAT}"
156
          end
157
        elsif(response.content_type == 'text/plain')
158
          response.read_body do |f|
159
            yield f
160
          end
161
        else
162
          raise "Unrecognized content type \"#{response.content_type}\" " +
163
                "from metacat at #{PATH_TO_METACAT}"
164
        end
165
      end
166
    elsif(location =~ /http/)
167
      uri = URI.parse(location)
168
      http = Net::HTTP.start(uri.host, uri.port)
169
      http.request_get(uri.to_s) do |response|
170
        response.read_body do |f|
171
          yield f
172
        end
173
      end
174
    else
175
      raise 'Unknown location for dataTable'
176
    end
177
  end 
178
end