Project

General

Profile

« Previous | Next » 

Revision 4945

Added by Duane Costa over 15 years ago

Bug 3835: Design and implement OAI-PMH compliant harvest subsystem
1. When building the Metacat OAI-PMH catalog, modify the database query to search only for documents with public read access.
2. Change syntax used for identifiers in the repository to LSID format instead of using a URL to a specific repository.
Old syntax: 'http://metacat.lternet.edu/knb/metacat/knb-lter-gce.169'
New syntax: 'urn:lsid:knb.ecoinformatics.org:knb-lter-gce:169'

View differences:

src/edu/ucsb/nceas/metacat/oaipmh/provider/server/catalog/MetacatRecordFactory.java
38 38
  private String repositoryIdentifier = null;
39 39
  private String context = null;
40 40
  private final String TEST_CONTEXT = "knb";
41
  private final String LSID_PREFIX = "urn:lsid:knb.ecoinformatics.org:";
41 42

  
42 43

  
43 44
  /**
......
79 80
  /**
80 81
   * Utility method to parse the 'local identifier' from the OAI identifier
81 82
   * 
82
   * @param identifier
83
   *          OAI identifier (e.g.
84
   *          "http://metacat.lternet.edu/knb/metacat/knb-lter-gce.247")
83
   * @param oaiIdentifier  OAI identifier e.g.
84
   *                       "urn:lsid:knb.ecoinformatics.org:knb-lter-gce:169"
85 85
   * 
86
   * @return local identifier (e.g. "knb-lter-gce.247")
86
   * @return local identifier, e.g. "knb-lter-gce.169"
87 87
   */
88
  public String fromOAIIdentifier(String identifier) {
89
    try {
90
      if (identifier != null) {
91
        int i = identifier.indexOf("/metacat/");
92
        String tailString = identifier.substring(i + 9);
93
        StringTokenizer tokenizer = new StringTokenizer(tailString, "/");
94
        String localIdentifier = tokenizer.nextToken();
95
        return localIdentifier;
96
      } else {
97
        return null;
88
  public String fromOAIIdentifier(String oaiIdentifier) {
89
    String localIdentifier = null;
90
    
91
    if (oaiIdentifier != null) {
92
      String[] oaiIdentifierArray = splitOAIIdentifier(oaiIdentifier);
93
      int len = oaiIdentifierArray.length;
94
      if (len >= 2) {
95
        String scope = oaiIdentifierArray[len - 2];
96
        String identifier = oaiIdentifierArray[len - 1];
97
        localIdentifier = scope + "." + identifier;
98 98
      }
99
    } catch (Exception e) {
100
      return null;
101 99
    }
100
  
101
    return localIdentifier;
102 102
  }
103 103

  
104 104

  
105 105
  /**
106 106
   * Construct an OAI identifier from the native item
107 107
   * 
108
   * @param  nativeItem         native Item object
109
   * @return OAI identifier, 
110
   *         e.g. urn:lsid:knb.ecoinformatics.org:knb-lter-gce:169
111
   */
112
  public String getOAIIdentifier(Object nativeItem) {
113
    String localIdentifier = getLocalIdentifier(nativeItem);
114
    StringBuffer sb = new StringBuffer();
115
    
116
    if (localIdentifier != null) {
117
      String[] localIdentifierArray = splitLocalIdentifier(localIdentifier);
118
      
119
      if (localIdentifierArray.length >= 2) {
120
        sb.append(LSID_PREFIX);
121
        String scope = localIdentifierArray[0];
122
        sb.append(scope);
123
        sb.append(":");
124
        String identifier = localIdentifierArray[1];
125
        sb.append(identifier);
126
      }
127
    }
128
    
129
    return sb.toString();
130
  }
131

  
132

  
133
  /**
134
   * Construct an OAI identifier from the native item
135
   * 
108 136
   * @param nativeItem
109 137
   *          native Item object
110 138
   * @return OAI identifier
111 139
   */
112
  public String getOAIIdentifier(Object nativeItem) {
140
  public String getOAIIdentifierOld(Object nativeItem) {
113 141
    String localIdentifier = getLocalIdentifier(nativeItem);
114 142
    StringBuffer sb = new StringBuffer();
115 143
    
......
238 266
    return headerArray;
239 267
  }
240 268
  
269
  
270
  private String[] splitLocalIdentifier(String s) {
271
    StringTokenizer tokenizer = new StringTokenizer(s, ".");
272
    String[] tokens = new String[tokenizer.countTokens()];
273
    for (int i=0; i<tokens.length; ++i) {
274
        tokens[i] = tokenizer.nextToken();
275
    }
276
    return tokens;
277
  }
278
  
279
  private String[] splitOAIIdentifier(String s) {
280
    StringTokenizer tokenizer = new StringTokenizer(s, ":");
281
    String[] tokens = new String[tokenizer.countTokens()];
282
    for (int i=0; i<tokens.length; ++i) {
283
        tokens[i] = tokenizer.nextToken();
284
    }
285
    return tokens;
286
  }
287
  
241 288
}
src/edu/ucsb/nceas/metacat/oaipmh/provider/server/catalog/MetacatCatalog.java
85 85
  private HashMap resumptionResults = new HashMap();
86 86
  private int maxListSize;
87 87
  
88
  /*
89
   * QUERY string to find all eml-2.x.y documents in the Metacat database
90
   * that are publicly accessible
91
   */
92
  private final String QUERY =
93
  "SELECT docid, doctype, date_updated " +
94
  "FROM xml_documents " +
95
  "WHERE doctype like 'eml://ecoinformatics.org/eml-2%' AND " + 
96
  "  (docid IN " +
97
  "     (SELECT docid " +
98
  "      FROM xml_access " +
99
  "      WHERE( (lower(principal_name) = 'public') AND " +
100
  "             perm_type = 'allow' AND " +
101
  "             permission > 3" +
102
  "           )" +
103
  "     )" +
104
  "   AND " +
105
  "   docid NOT IN " +
106
  "     (SELECT docid " +
107
  "      FROM xml_access " +
108
  "      WHERE( (lower(principal_name) = 'public') AND " +
109
  "             perm_type = 'deny' AND " +
110
  "             perm_order ='allowFirst' AND " +
111
  "             permission > 3" +
112
  "           )" +
113
  "     )" +
114
  "  )";
88 115
  
89
  /* Constructors */
90 116
  
117
/* Constructors */
118
  
91 119
  public MetacatCatalog(Properties properties) {
92 120
    String errorStr;
93 121
    String temp;
......
111 139
        metacatURL = SystemUtil.getServletURL();
112 140
      }
113 141
      else {
114
        metacatURL = "http://localhost:8080/knb/metacat";
142
        metacatURL = properties.getProperty("test.metacatUrl");
115 143
      }
116 144
      
117 145
      logger.warn("metacatURL: " + metacatURL);
......
612 640
   * values.
613 641
   */
614 642
  public void loadCatalog() {
615
    String query = 
616
      "SELECT docid, doctype, date_updated " +
617
      "FROM xml_documents " +
618
      "WHERE doctype like 'eml://ecoinformatics.org/eml-2%'";
619 643
    Statement stmt;
620 644

  
621 645
    try {
......
623 647
      
624 648
      if (conn != null) {
625 649
        stmt = conn.createStatement();                          
626
        ResultSet rs = stmt.executeQuery(query);
650
        ResultSet rs = stmt.executeQuery(QUERY);
651
        
652
        int documentCount = 0;
627 653

  
628 654
        while (rs.next()) {
655
          documentCount++;
629 656
          String docid = rs.getString("docid");
630 657
          String doctype = rs.getString("doctype");
631 658
          String dateUpdated = rs.getDate("date_updated").toString();
632 659
          docTypeMap.put(docid, doctype);
633 660
          dateMap.put(docid, dateUpdated);
634 661
        }
662
        
663
        logger.info("Number of documents in catalog: " + documentCount);
635 664

  
636 665
        stmt.close();   
637 666
        conn.close();

Also available in: Unified diff