Project

General

Profile

metacat / src / ruby / lib / eml.rb @ 6693

1
# Copyright: 2006 Regents of the University of California,
2
# Santa Barbara Coastal LTER
3
# http://sbcdata.lternet.edu/
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
# 
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
# 
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18
# 02111-1307  USA
19

    
20
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
21
require "date"
22
require "rexml/document"
23
require "data_table.rb"
24

    
25
# == What is it
26
# The goal of this object is to encapsulate a dom representation of
27
# an EML(http://knb.ecoinformatics.org/software/eml) document and provide
28
# quick helper methods to access commonly needed attributes. These methods
29
# will return a more "ruby friendly" representation of this metadata.
30
# 
31
# At their core Eml objects contain a REXML::Document in the instance variable @doc.
32
# Until this object is feature-complete, this dom document can be used when this
33
# object is returned from this module's Metacat client.
34
# 
35
# ==   Examples
36
# ===  Get temporal coverage
37
#
38
#   metacat = Metacat.new('http://data.piscoweb.org/catalog/metacat')
39
#   eml_object = metacat.find(:docid => 'HMS001_020ADCP019R00_20060612.50.1')
40
#   geographic_coverage = eml_object.geographic_coverage
41
#   => [{ "latitude"=>-121.8996,
42
#         "longitude"=>36.6214,
43
#         "id"=>"HMS001",
44
#         "description"=>
45
#         "Hopkins Marine Station: HMS001: This inner-shelf mooring is located offshore 
46
#         of the city of Monterey, California, USA, near Hopkins Marine Station.  The 
47
#         mooring is located in an overall water depth of 020 meters (referenced to Mean 
48
#         Sea Level, MSL).  The altitudeMinimum and altitudeMaximum tags in this initial 
49
#         coverage section refer to the ADCP measurement range and are also referenced to 
50
#         MSL.  They do not represent the overall water depth.  Note the nominal range of 
51
#         the ADCP may extend from near-bottom (a depth expressed as a negative altitude) 
52
#         to slightly above MSL (a height expressed as a positive altitude)."}]
53
#
54
# ===  Get associated data table(DataTable) and write it to disk
55
#   eml_object.data_tables.each do |data_table|
56
#     file = File.new("./store/#{data_table.id}")
57
#     # data_table is an object, with method read
58
#     data_table.read do |buffer|
59
#       file.write(buffer)
60
#     end
61
#     file.close()
62
#   end
63
class Eml
64
  attr_reader :docid, :doc
65
  
66
  # Accepts an EML REXML::Document
67
  def initialize(metadata)
68
    if(metadata.class != REXML::Document || metadata.root.name != 'eml')
69
      raise ArgumentError, 'Must initialize with REXML::Document representation of EML metadata'
70
    else
71
      @doc = metadata
72
      @docid = @doc.root.attributes['packageId']
73
    end
74
  end
75
  
76
  def to_s
77
    @doc.to_s
78
  end
79
    
80
  def data_tables
81
    tables = []
82
    @doc.root.elements.each("dataset/dataTable") { |element|  
83
      tables.push(DataTable.new(element, self))
84
    }
85
    return tables
86
  end
87

    
88
  def largest_data_table
89
    if(data_tables.length == 1)
90
      return data_tables[0]
91
    else
92
      size = 0
93
      largest = nil
94
      data_tables.each do |data_table|
95
        if(data_table.size > size)
96
          size = data_table.size
97
          largest = data_table
98
        end
99
      end
100
      largest
101
    end
102
  end
103
  
104
  # Pulls a date range from the temporalCoverage element
105
  #
106
  # Note : EML supports multiple date ranges to account for gaps
107
  # this code just lumps them into one
108
  # Also, it does not support cases of singleDateTime
109
  def temporal_coverage
110
    beginDates = endDates = Array.new()
111
    path = "dataset/coverage/temporalCoverage/rangeOfDates"
112
    @doc.root.elements.each(path){ |range|  
113
      beginDates.push(
114
        Date.strptime(range.elements["beginDate"].elements[1].text)
115
      )
116
      endDates.push(
117
        Date.strptime(range.elements["endDate"].elements[1].text)
118
      )
119
    }
120
    return beginDates.min, endDates.max
121
  end
122
  
123
  def geographic_coverage
124
    sites = Array.new
125
    coverage.elements.each('geographicCoverage') do |g|
126
      site = {
127
        'id'          =>  g.attributes['id'],
128
        'description' =>  g.elements['geographicDescription'].text,
129
        'latitude'    =>  g.elements['boundingCoordinates/westBoundingCoordinate'].text.to_f,
130
        'longitude'   =>  g.elements['boundingCoordinates/northBoundingCoordinate'].text.to_f
131
      }
132
      sites << site
133
    end
134
    return sites
135
  end
136
  
137
  def coverage
138
    @doc.root.elements["dataset/coverage"]
139
  end
140
  
141
  def title
142
    @doc.root.elements["dataset/title"].text
143
  end
144
  
145
  def short_name
146
    @doc.root.elements["dataset/shortName"].text
147
  end
148
end