Project

General

Profile

1
# Copyright: 2006 Regents of the University of California,
2
# Santa Barbara Coastal LTER
3
# http://sbcdata.lternet.edu/
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
# 
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
# 
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18
# 02111-1307  USA
19

    
20
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
21
require "rexml/document"
22
require "net/http"
23
require "uri"
24
require "eml.rb"
25

    
26
# Changing buffer size to greatly improve performance 
27
class Net::BufferedIO
28
  def rbuf_fill
29
    timeout(@read_timeout) {
30
      #changed from 1024
31
      @rbuf << @io.sysread(32768)
32
    }
33
  end
34
end
35

    
36
# = Metacat Client Library
37
# == What is it
38
# A client for the Metacat data catalog. For a description of Metacat, see
39
# http://knb.ecoinformatics.org/software/metacat
40
# For now, this client does not implement all features of the API. Rather, 
41
# it focuses on querying and returning Eml metadata objects from either 
42
# pathqueries or docid's. Should you find yourself using methods other than find()
43
# very often, you may be veering from the original intent.
44
# 
45
# ==   Examples
46
# ===  Read metadata for a public document
47
#
48
#   require 'lib/metacat.rb'
49
#   metacat = Metacat.new('http://data.piscoweb.org/catalog/metacat')
50
#   eml = metacat.find(:docid => 'pisco.10.4')
51
#   puts eml.docid
52
#   => 'pisco.10.4'
53
#
54
# === Log into Metacat and read Eml metadata. Then logout
55
#
56
#   username = 'uid=cburt,o=PISCO,dc=ecoinformatic,dc=org'
57
#   password = *****
58
#   Metacat.new('http://data.piscoweb.org/catalog/metacat', username, password) do |metacat|
59
#     eml = metacat.find(:docid => 'pisco.10.3')
60
#     start, end = eml.temporal_coverage
61
#     puts "start: #{start}, end: #{end}" 
62
#   end
63
#
64
# === Search for oceanographic data
65
#
66
#   metacat = Metacat.new('http://data.piscoweb.org/catalog/metacat')
67
#   pathquery = '...' # see example at http://knb.ecoinformatics.org/software/metacat/metacatquery.html
68
#   docs = metacat.find(:squery => pathquery)
69
#   docs.each { |eml| puts eml.docid }
70
#
71
# === Find and write a data_table to local disk
72
#   Metacat.new('http://data.piscoweb.org/catalog/metacat', username, password) do |metacat|
73
#     file = File.new('tmp', 'w+')
74
#     # using a block you can avoid loading the whole file into memory!
75
#     metacat.read('data_table.1.1') do |fragment|
76
#       file.write(fragment)
77
#     end
78
#     file.close
79
#   end 
80
class Metacat
81
      
82
  def initialize(path_to_metacat, options = {}, &block)
83
    @uri = URI.parse(path_to_metacat)
84
    @cookie = false
85
    if options.has_key?('username') && options.has_key?('password')
86
      login(options['username'], options['password'])
87
    end
88
    if block_given?
89
      yield self
90
      logout if @logged_in
91
    end
92
  end
93
  
94
  # Check if the metacat instance has a session cookie
95
  def logged_in?
96
    if @cookie 
97
      true
98
    else 
99
      false
100
    end
101
  end
102
  
103
  # Returns either an array of Eml documents(or nil) if :squery is passed or
104
  # a single Eml document(or nil) if passed :docid. This function _will_ _not_
105
  # return a data table, only Eml objects.
106
  #
107
  # If you need to retrieve a data table or other document, use read()
108
  #
109
  # Examples:
110
  #   Metacat.find(:docid => 'cbs_10.1')
111
  #   Metacat.find(:squery => xml_path_query)
112
  # 
113
  def find(args)
114
    if args[:docid] && args[:squery]
115
      raise ArgumentError, "Too many parameters. Choose :docid or :squery"
116
    elsif args[:docid]
117
      result = read(args[:docid], 'only_eml' => true)
118
      unless result.nil?
119
        try_eml(result) || raise(ArgumentError, "#{args[:docid]} does not refer to eml metadata. To read other documents use read.")
120
      end
121
    elsif args[:squery]
122
      results = squery(args[:squery])
123
      doc = REXML::Document.new(results)
124
      documents = Array.new()
125
      doc.elements.each("/resultset/document") { |document|
126
        docid = document.elements[1].text
127
        documents.push(try_eml(read(docid, 'only_eml' => true)))
128
      }
129
      return documents.delete_if {|doc| doc == false }
130
    end
131
  end
132
  
133
  # Logs into metacat using ldap authentication. Usernames are complex, such as 
134
  # 'uid=cburt,o=PISCO,dc=ecoinformatics,dc=org'
135
  #
136
  # Raises MetacatPermissionDenied exception on fail
137
  #
138
  # Example
139
  #   metacat.login('uid=cburt,o=PISCO,dc=ecoinformatics,dc=org', '******')
140
  #   => true
141
  def login(username, password)  
142
    response = metacat_get({
143
      'action'    =>  'login',
144
      'qformat'   =>  'xml',
145
      'username'  =>  username,
146
      'password'  =>  password
147
    })
148
    if(response.content_type == 'text/xml')
149
      doc = REXML::Document.new(response.read_body)
150
      if(doc.root.name == 'login')
151
        @cookie = response.response['set-cookie']
152
        @logged_in = true
153
      else
154
        raise MetacatPermissionDenied, "login error: "+doc.root.elements['message'].text
155
      end
156
    else
157
      raise MetacatResponseError
158
    end
159
  end
160
  
161
  def logout
162
    response = metacat_get({
163
      'action'    =>  'logout',
164
      'qformat'   =>  'xml'
165
    })
166
    if(response.content_type == 'text/xml')
167
      doc = REXML::Document.new(response.read_body)
168
      if doc.root.name = 'logout'
169
        @cookie = false
170
        return true
171
      else
172
        raise 'Failed to logout: '+doc.root.text
173
      end
174
    else
175
      raise MetacatResponseError
176
    end    
177
  end
178
  
179
  # Reads a specified document from metacat. If xml is found, a REXML::Document will be returned
180
  #
181
  # When reading text data tables, it should be noted that loading the entire large file can
182
  # consume an enormous amount of memory. To avoid this, read can be passed a &block. The block
183
  # will recieve fragments of the file as it comes in.
184
  #
185
  # Examples:
186
  # Reading an EML document
187
  #   metacat.read('eml_doc.1.1')
188
  #   => <REXML::Document >
189
  # 
190
  # Writing a data table to disk
191
  #   file = File.new('tmp', 'w+')
192
  #   metacat.read('data_table.1.1') do |fragment|
193
  #     file.write(fragment)
194
  #   end
195
  #   file.close
196
  #
197
  # Reading an entire data table into memory
198
  #   data_table = metacat.read('data_table.1.1')
199
  def read(docid, options = {}, &block) # :yields: xml or data_table fragment
200
    data = {
201
      'action'  =>  'read',
202
      'qformat' =>  'xml',
203
      'docid'   =>  docid
204
    }
205
    metacat_get(data) do |response|
206
      if response.content_type == 'text/xml'
207
        doc = REXML::Document.new(response.read_body)
208
        if(doc.root.name == 'error')
209
          if(doc.root.text.match('permission'))
210
            raise MetacatPermissionDenied, doc.root.text
211
          elsif(doc.root.text.match('does not exist'))
212
            # Nothing found, return nil
213
            return nil
214
          else
215
            raise 'Unrecognized response from metacat: '+doc.root.text
216
          end
217
        else # xml data
218
          return doc
219
        end
220
      else # probably a data table
221
        if (options.has_key?('only_eml') && options['only_eml'] == true)
222
          return nil
223
        else
224
          if block_given?
225
            response.read_body { |buffer| yield buffer }
226
          else
227
            response.read_body
228
          end
229
        end
230
      end      
231
    end
232
  end  
233
  
234
  # Uses the metacat pathquery search and returns the xml response as a string.
235
  # For query format information, see 
236
  # http://knb.ecoinformatics.org/software/metacat/metacatquery.html 
237
  def squery(squery)
238
    response = metacat_get({
239
      'action'  =>  'squery',
240
      'qformat' =>  'xml',
241
      'query'   =>  squery
242
    })
243
    if(response.content_type == 'text/xml')
244
      response.read_body
245
    else
246
      raise "Metacat returned unexpected Content Type"
247
    end
248
  end
249

    
250
  private
251
  
252
  def try_eml(doc)
253
    begin
254
      Eml.new(doc)
255
    rescue ArgumentError
256
      return false
257
    end
258
  end
259
  
260
  def metacat_post(data, &block)
261
    Net::HTTP.start(@uri.host, @uri.port) do |http|
262
      if block_given?
263
        http.request_post(@uri.path, data, headers) {|r| yield(r) }
264
      else
265
        http.post(@uri.path, data, headers)
266
      end
267
    end
268
  end
269
  
270
  def metacat_get(data, &block)
271
    path = @uri.path
272
    path = path+query_string(data)
273
    Net::HTTP.start(@uri.host, @uri.port) do |http|
274
      if block_given?
275
        http.request_get(path, headers) {|r| yield(r) }
276
      else
277
        http.get(path, headers)
278
      end
279
    end
280
  end
281
  
282
  def query_string(hash)
283
    qstring = []
284
    hash.each {|k, v| qstring << "#{k}=#{URI.encode(v)}" }
285
    '?'+qstring.join('&')
286
  end
287
  
288
  def headers
289
    {'Cookie' =>  @cookie} if @cookie
290
  end
291
  
292
end
293

    
294
class MetacatPermissionDenied < RuntimeError
295
end
296

    
297
class MetacatResponseError < RuntimeError
298
end
(3-3/3)