Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that searches a relational DB for elements and 
4
 *             attributes that have free text matches a query string,
5
 *             or structured query matches to a path specified node in the 
6
 *             XML hierarchy.  It returns a result set consisting of the 
7
 *             document ID for each document that satisfies the query
8
 *  Copyright: 2000 Regents of the University of California and the
9
 *             National Center for Ecological Analysis and Synthesis
10
 *    Authors: Matt Jones
11
 *    Release: @release@
12
 *
13
 *   '$Author: berkley $'
14
 *     '$Date: 2000-12-13 09:41:50 -0800 (Wed, 13 Dec 2000) $'
15
 * '$Revision: 602 $'
16
 */
17

    
18
package edu.ucsb.nceas.metacat;
19

    
20
import java.io.*;
21
import java.util.Vector;
22
import java.net.URL;
23
import java.net.MalformedURLException;
24
import java.sql.*;
25
import java.util.Stack;
26
import java.util.Hashtable;
27
import java.util.Enumeration;
28

    
29
/** 
30
 * A Class that searches a relational DB for elements and 
31
 * attributes that have free text matches a query string,
32
 * or structured query matches to a path specified node in the 
33
 * XML hierarchy.  It returns a result set consisting of the 
34
 * document ID for each document that satisfies the query
35
 */
36
public class DBQuery {
37

    
38
  static final int ALL = 1;
39
  static final int WRITE = 2;
40
  static final int READ = 4;
41

    
42
  private Connection  conn = null;
43
  private String  parserName = null;
44
  private MetaCatUtil util = new MetaCatUtil();
45
  /**
46
   * the main routine used to test the DBQuery utility.
47
   * <p>
48
   * Usage: java DBQuery <xmlfile>
49
   *
50
   * @param xmlfile the filename of the xml file containing the query
51
   */
52
  static public void main(String[] args) {
53
     
54
     if (args.length < 1)
55
     {
56
        System.err.println("Wrong number of arguments!!!");
57
        System.err.println("USAGE: java DBQuery <xmlfile>");
58
        return;
59
     } else {
60
        try {
61
                    
62
          String xmlfile  = args[0];
63

    
64
          // Open a connection to the database
65
          MetaCatUtil   util = new MetaCatUtil();
66
          Connection dbconn = util.openDBConnection();
67

    
68
          // Execute the query
69
          DBQuery queryobj = new DBQuery(dbconn, util.getOption("saxparser"));
70
          FileReader xml = new FileReader(new File(xmlfile));
71
          Hashtable nodelist = null;
72
          nodelist = queryobj.findDocuments(xml, null, null);
73

    
74
          // Print the reulting document listing
75
          StringBuffer result = new StringBuffer();
76
          String document = null;
77
          String docid = null;
78
          result.append("<?xml version=\"1.0\"?>\n");
79
          result.append("<resultset>\n"); 
80
  // following line removed by Dan Higgins to avoid insertion of query XML inside returned XML doc
81
  //        result.append("  <query>" + xmlfile + "</query>\n");
82
          Enumeration doclist = nodelist.keys(); 
83
          while (doclist.hasMoreElements()) {
84
            docid = (String)doclist.nextElement();
85
            document = (String)nodelist.get(docid);
86
            result.append("  <document>\n    " + document + 
87
                          "\n  </document>\n");
88
          }
89
          result.append("</resultset>\n");
90

    
91
          System.out.println(result);
92

    
93
        } catch (Exception e) {
94
          System.err.println("EXCEPTION HANDLING REQUIRED");
95
          System.err.println(e.getMessage());
96
          e.printStackTrace(System.err);
97
        }
98
     }
99
  }
100
  
101
  /**
102
   * construct an instance of the DBQuery class 
103
   *
104
   * <p>Generally, one would call the findDocuments() routine after creating 
105
   * an instance to specify the search query</p>
106
   *
107
   * @param conn the JDBC connection that we use for the query
108
   * @param parserName the fully qualified name of a Java class implementing
109
   *                   the org.xml.sax.XMLReader interface
110
   */
111
  public DBQuery( Connection conn, String parserName ) 
112
                  throws IOException, 
113
                         SQLException, 
114
                         ClassNotFoundException {
115
    this.conn = conn;
116
    this.parserName = parserName;
117
  }
118
  
119
  public Hashtable findDocuments(Reader xmlquery, String user, String group)
120
  {
121
    return findDocuments(xmlquery, user, group, null);
122
  }
123
  
124
  /** 
125
   * routine to search the elements and attributes looking to match query
126
   *
127
   * @param xmlquery the xml serialization of the query (@see pathquery.dtd)
128
   * @param user the username of the user
129
   * @param group the group of the user
130
   * @param returndoc an array of document types to backtrack against.
131
   */
132
  public Hashtable findDocuments(Reader xmlquery, String user, String group,
133
                                 String[] returndoc)
134
  {
135
    //System.out.println("in finddocuments");
136
      Hashtable   docListResult = new Hashtable();
137
      PreparedStatement pstmt;
138
      String docid = null;
139
      String docname = null;
140
      String doctype = null;
141
      String doctitle = null;
142
      String createDate = null;
143
      String updateDate = null;
144
      String fieldname = null;
145
      String fielddata = null;
146
      String relation = null;
147
      StringBuffer document = null; 
148
      Vector returndocVec = new Vector();
149
      
150
      if(returndoc != null)
151
      {//add the returndoc elements to a vector for easier manipulation
152
        for(int i=0; i<returndoc.length; i++)
153
        {
154
          returndocVec.add(new String((String)returndoc[i]));
155
        }
156
      }
157
      
158
      try {
159
        // Get the XML query and covert it into a SQL statment
160
        QuerySpecification qspec = new QuerySpecification(xmlquery, 
161
                                   parserName, 
162
                                   util.getOption("accNumberSeparator"));
163
        //System.out.println(qspec.printSQL());
164
        pstmt = conn.prepareStatement( qspec.printSQL() );
165

    
166
        // Execute the SQL query using the JDBC connection
167
        pstmt.execute();
168
        ResultSet rs = pstmt.getResultSet();
169
        boolean tableHasRows = rs.next();
170
        while (tableHasRows) {
171
          docid = rs.getString(1);
172
          if ( !hasPermission(conn, user, group, docid) ) {continue;}
173
          docname = rs.getString(2);
174
          doctype = rs.getString(3);
175
          doctitle = rs.getString(4);
176
          createDate = rs.getString(5);
177
          updateDate = rs.getString(6);
178
          //System.out.println("vec.size = " + returndocVec.size());
179
          if(returndocVec.size() != 0 && !returndocVec.contains(doctype))
180
          { //there are returndocs to match (backtracking can now be performed). 
181
            //System.out.println("olddoctype: " + doctype);
182
            StringBuffer btBuf = new StringBuffer();
183
            btBuf.append("select object from xml_relation where ");
184
            btBuf.append("objdoctype in (");
185
            //build the doctype list for the backtracking sql statement
186
            for(int i=0; i<returndocVec.size(); i++)
187
            {
188
              btBuf.append("'").append((String)returndocVec.get(i)).append("'");
189
              if(i != (returndocVec.size() - 1))
190
              {
191
                btBuf.append(", ");
192
              } 
193
            }
194
            btBuf.append(") ");
195
            btBuf.append("and subject like '");
196
            //btBuf.append("metacat://").append(util.getOption("server"));
197
            //btBuf.append("?docid=").append(docid).append("'");
198
            btBuf.append("%docid=").append(docid).append("'");
199
            System.out.println("sql: " + btBuf.toString());
200
            
201
            pstmt = conn.prepareStatement(btBuf.toString());
202
            pstmt.execute();
203
            ResultSet btrs = pstmt.getResultSet();
204
            boolean hasBtRows = btrs.next();
205
            if(hasBtRows)
206
            { //there was a backtrackable document found
207
              DocumentImpl xmldoc = null;
208
              System.out.println("document found is: " + btrs.getString(1));
209
              MetacatURL objURL = new MetacatURL(btrs.getString(1));
210
              try
211
              {
212
                xmldoc = new DocumentImpl(conn, objURL.getParam(0)[1]);
213
              }
214
              catch(Exception e)
215
              {
216
                System.out.println("Error getting document: " + e.getMessage());
217
              }
218
              
219
              docid   = xmldoc.getDocID();
220
              docname = xmldoc.getDocname();
221
              doctype = xmldoc.getDoctype();
222
              doctitle = xmldoc.getDocTitle();
223
              createDate = xmldoc.getCreateDate();
224
              updateDate = xmldoc.getUpdateDate();
225
              //System.out.println("docname: " + docname + " doctype: " + doctype + 
226
              //                   " doctitle: " + doctitle + " createdate: " +
227
              //                   createDate + " updatedate: " + updateDate);
228
            }
229
            btrs.close();
230
          }
231
          
232
          document = new StringBuffer();
233
          //System.out.println("packagdoctype: " + util.getOption("packagedoctype"));
234
          if(!doctype.equals(util.getOption("packagedoctype")))
235
          {
236
            document.append("<docid>").append(docid).append("</docid>");
237
            if (docname != null) {
238
              document.append("<docname>" + docname + "</docname>");
239
            }
240
            if (doctype != null) {
241
              document.append("<doctype>" + doctype + "</doctype>");
242
            }
243
            if (doctitle != null) {
244
              document.append("<doctitle>" + doctitle + "</doctitle>");
245
            }
246
            if(createDate != null) {
247
              document.append("<createdate>" + createDate + "</createdate>");
248
            }
249
            if(updateDate != null) {
250
              document.append("<updatedate>" + updateDate + "</updatedate>");
251
            }
252
            // Store the document id and the root node id
253
            docListResult.put(docid,(String)document.toString());
254
          }
255

    
256
          // Advance to the next record in the cursor
257
          tableHasRows = rs.next();
258
        }
259
        
260
        if(qspec.containsExtendedSQL())
261
        {
262
          Vector extendedFields = new Vector(qspec.getReturnFieldList());
263
          Vector results = new Vector();
264
          Enumeration keylist = docListResult.keys();
265
          StringBuffer doclist = new StringBuffer();
266
          while(keylist.hasMoreElements())
267
          {
268
            doclist.append("'");
269
            doclist.append((String)keylist.nextElement());
270
            doclist.append("',");
271
          }
272
          doclist.deleteCharAt(doclist.length()-1); //remove the last comma
273
          pstmt = conn.prepareStatement(qspec.printExtendedSQL(
274
                                        doclist.toString()));
275
          pstmt.execute();
276
          rs = pstmt.getResultSet();
277
          tableHasRows = rs.next();
278
          while(tableHasRows) 
279
          {
280
            docid = rs.getString(1);
281
            if ( !hasPermission(conn, user, group, docid) ) {continue;}
282
            fieldname = rs.getString(2);
283
            fielddata = rs.getString(3);
284
            
285
            document = new StringBuffer();
286

    
287
            document.append("<param name=\"");
288
            document.append(fieldname);
289
            document.append("\">");
290
            document.append(fielddata);
291
            document.append("</param>");
292

    
293
            tableHasRows = rs.next();
294
            if(docListResult.containsKey(docid))
295
            {
296
              String removedelement = (String)docListResult.remove(docid);
297
              docListResult.put(docid, removedelement + document.toString());
298
            }
299
            else
300
            {
301
              docListResult.put(docid, document.toString()); 
302
            }
303
          }
304
        }
305

    
306
        //this loop adds the relation data to the resultdoc
307
        //this code might be able to be added to the backtracking code above
308
        Enumeration docidkeys = docListResult.keys();
309
        while(docidkeys.hasMoreElements())
310
        {
311
          //String connstring = "metacat://"+util.getOption("server")+"?docid=";
312
          String connstring = "%docid=";
313
          String docidkey = (String)docidkeys.nextElement();
314
          System.out.println("relationsql: " + qspec.printRelationSQL(
315
                                                     connstring + docidkey));
316
          pstmt = conn.prepareStatement(
317
                  qspec.printRelationSQL(connstring + docidkey));
318
          pstmt.execute();
319
          rs = pstmt.getResultSet();
320
          tableHasRows = rs.next();
321
          while(tableHasRows)
322
          {
323
            String sub = rs.getString(1);
324
            String rel = rs.getString(2);
325
            String obj = rs.getString(3);
326
            String subDT = rs.getString(4);
327
            String objDT = rs.getString(5);
328
            
329
            MetacatURL murl = new MetacatURL(sub);
330
            if(murl.getProtocol().equals("metacat"))
331
            {//we only want to process metacat urls here.
332
              String[] tempparam = murl.getParam(0);
333
              if(tempparam[0].equals("docid") && tempparam[1].equals(docidkey))
334
              {
335
                document = new StringBuffer();
336
                document.append("<relation>");
337
                document.append("<relationtype>").append(rel);
338
                document.append("</relationtype>");
339
                document.append("<relationdoc>").append(obj);
340
                document.append("</relationdoc>");
341
                document.append("<relationdoctype>").append(objDT);
342
                document.append("</relationdoctype>");
343
                document.append("</relation>");
344
                
345
                String removedelement = (String)docListResult.remove(docidkey);
346
                docListResult.put(docidkey, removedelement + document.toString());
347
                
348
              }
349
            }
350
            tableHasRows = rs.next();
351
          }
352
        }
353
        pstmt.close();
354
      } catch (SQLException e) {
355
        System.err.println("Error getting id: " + e.getMessage());
356
      } catch (IOException ioe) {
357
        System.err.println("Error printing qspec:");
358
        System.err.println(ioe.getMessage());
359
      }
360
    //System.out.println("docListResult: ");
361
    //System.out.println(docListResult.toString());
362
    return docListResult;
363
  }
364
  
365
  /**
366
   * returns a string array of the contents of a particular node. 
367
   * If the node appears more than once, the contents are returned 
368
   * in the order in which they appearred in the document.
369
   * @param nodename the name or path of the particular node.
370
   * @param docid the docid of the document you want the node from.
371
   * @param conn a database connection-this allows this method to be static
372
   */
373
  public static Object[] getNodeContent(String nodename, String docid, 
374
                                        Connection conn)
375
  {
376
    StringBuffer query = new StringBuffer();
377
    Vector result = new Vector();
378
    PreparedStatement pstmt;
379
    query.append("select nodedata from xml_nodes where parentnodeid in ");
380
    query.append("(select nodeid from xml_index where path like '");
381
    query.append(nodename);
382
    query.append("' and docid like '").append(docid).append("')");
383
    try
384
    {
385
      pstmt = conn.prepareStatement(query.toString());
386

    
387
      // Execute the SQL query using the JDBC connection
388
      pstmt.execute();
389
      ResultSet rs = pstmt.getResultSet();
390
      boolean tableHasRows = rs.next();
391
      while (tableHasRows) 
392
      {
393
        result.add(rs.getString(1));
394
        System.out.println(rs.getString(1));
395
        tableHasRows = rs.next();
396
      }
397
    } 
398
    catch (SQLException e) 
399
    {
400
      System.err.println("Error getting id: " + e.getMessage());
401
    } 
402
    
403
    return result.toArray();
404
  }
405
  
406
  /**
407
   * format a structured query as an XML document that conforms
408
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
409
   * structured query engine
410
   *
411
   * @param params The list of parameters that  should be included in the query
412
   */
413
  public static String createSQuery(Hashtable params)
414
  { 
415
    StringBuffer query = new StringBuffer();
416
    Enumeration elements;
417
    Enumeration keys;
418
    String doctype = null;
419
    String casesensitive = null;
420
    String searchmode = null;
421
    Object nextkey;
422
    Object nextelement;
423
    //add the xml headers
424
    query.append("<?xml version=\"1.0\"?>\n");
425
    query.append("<pathquery version=\"1.0\"><meta_file_id>");
426
    
427
    if(params.containsKey("meta_file_id"))
428
    {
429
      query.append( ((String[])params.get("meta_file_id"))[0]);
430
      query.append("</meta_file_id>");
431
    }
432
    else
433
    {
434
      query.append("unspecified</meta_file_id>");
435
    }
436
    
437
    query.append("<querytitle>");
438
    if(params.containsKey("querytitle"))
439
    {
440
      query.append(((String[])params.get("querytitle"))[0]);
441
      query.append("</querytitle>");
442
    }
443
    else
444
    {
445
      query.append("unspecified</querytitle>");
446
    }
447
    
448
    if(params.containsKey("doctype"))
449
    {
450
      doctype = ((String[])params.get("doctype"))[0]; 
451
    }
452
    else
453
    {
454
      doctype = "ANY";  
455
    }
456
    
457
    if(params.containsKey("returnfield"))
458
    {
459
      String[] returnfield = ((String[])params.get("returnfield"));
460
      for(int i=0; i<returnfield.length; i++)
461
      {
462
        query.append("<returnfield>").append(returnfield[i]);
463
        query.append("</returnfield>");
464
      }
465
    }
466
    
467
    if(params.containsKey("owner"))
468
    {
469
      String[] owner = ((String[])params.get("owner"));
470
      for(int i=0; i<owner.length; i++)
471
      {
472
        query.append("<owner>").append(owner[i]);
473
        query.append("</owner>");
474
      }
475
    }
476
    
477
    if(params.containsKey("site"))
478
    {
479
      String[] site = ((String[])params.get("site"));
480
      for(int i=0; i<site.length; i++)
481
      {
482
        query.append("<site>").append(site[i]);
483
        query.append("</site>");
484
      }
485
    }
486
    
487
    //if you don't limit the query by doctype, then it just creates
488
    //an empty returndoctype tag.
489
    if (!doctype.equals("any") && 
490
        !doctype.equals("ANY") &&
491
        !doctype.equals("") ) 
492
    {
493
       query.append("<returndoctype>");
494
       query.append(doctype).append("</returndoctype>");
495
    }
496
    else
497
    { 
498
      query.append("<returndoctype></returndoctype>");
499
    }
500
    
501
    //allows the dynamic switching of boolean operators
502
    if(params.containsKey("operator"))
503
    {
504
      query.append("<querygroup operator=\"" + 
505
                ((String[])params.get("operator"))[0] + "\">");
506
    }
507
    else
508
    { //the default operator is UNION
509
      query.append("<querygroup operator=\"UNION\">"); 
510
    }
511
        
512
    if(params.containsKey("casesensitive"))
513
    {
514
      casesensitive = ((String[])params.get("casesensitive"))[0]; 
515
    }
516
    else
517
    {
518
      casesensitive = "false"; 
519
    }
520
    
521
    if(params.containsKey("searchmode"))
522
    {
523
      searchmode = ((String[])params.get("searchmode"))[0]; 
524
    }
525
    else
526
    {
527
      searchmode = "contains"; 
528
    }
529
        
530
    //anyfield is a special case because it does a 
531
    //free text search.  It does not have a <pathexpr>
532
    //tag.  This allows for a free text search within the structured
533
    //query.  This is useful if the INTERSECT operator is used.
534
    if(params.containsKey("anyfield"))
535
    {
536
       String[] anyfield = ((String[])params.get("anyfield"));
537
       //allow for more than one value for anyfield
538
       for(int i=0; i<anyfield.length; i++)
539
       {
540
         if(!anyfield[i].equals(""))
541
         {
542
           query.append("<queryterm casesensitive=\"" + casesensitive + 
543
                        "\" " + "searchmode=\"" + searchmode + "\"><value>" +
544
                        anyfield[i] +
545
                        "</value></queryterm>"); 
546
         }
547
       }
548
    }
549
        
550
    //this while loop finds the rest of the parameters
551
    //and attempts to query for the field specified
552
    //by the parameter.
553
    elements = params.elements();
554
    keys = params.keys();
555
    while(keys.hasMoreElements() && elements.hasMoreElements())
556
    {
557
      nextkey = keys.nextElement();
558
      nextelement = elements.nextElement();
559

    
560
      //make sure we aren't querying for any of these
561
      //parameters since the are already in the query
562
      //in one form or another.
563
      if(!nextkey.toString().equals("doctype") && 
564
         !nextkey.toString().equals("action")  &&
565
         !nextkey.toString().equals("qformat") && 
566
         !nextkey.toString().equals("anyfield") &&
567
         !nextkey.toString().equals("returnfield") &&
568
         !nextkey.toString().equals("owner") &&
569
         !nextkey.toString().equals("site") &&
570
         !nextkey.toString().equals("operator") )
571
      {
572
        //allow for more than value per field name
573
        for(int i=0; i<((String[])nextelement).length; i++)
574
        {
575
          if(!((String[])nextelement)[i].equals(""))
576
          {
577
            query.append("<queryterm casesensitive=\"" + casesensitive +"\" " + 
578
                         "searchmode=\"" + searchmode + "\">" +
579
                         "<value>" +
580
                         //add the query value
581
                         ((String[])nextelement)[i] +
582
                         "</value><pathexpr>" +
583
                         //add the path to query by 
584
                         nextkey.toString() + 
585
                         "</pathexpr></queryterm>");
586
          }
587
        }
588
      }
589
    }
590
    query.append("</querygroup></pathquery>");
591
    //append on the end of the xml and return the result as a string
592
    return query.toString();
593
  }
594
  
595
  /**
596
   * format a simple free-text value query as an XML document that conforms
597
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
598
   * structured query engine
599
   *
600
   * @param value the text string to search for in the xml catalog
601
   * @param doctype the type of documents to include in the result set -- use
602
   *        "any" or "ANY" for unfiltered result sets
603
   */
604
   public static String createQuery(String value, String doctype) {
605
     StringBuffer xmlquery = new StringBuffer();
606
     xmlquery.append("<?xml version=\"1.0\"?>\n");
607
     xmlquery.append("<pathquery version=\"1.0\">");
608
     xmlquery.append("<meta_file_id>Unspecified</meta_file_id>");
609
     xmlquery.append("<querytitle>Unspecified</querytitle>");
610

    
611
     if (!doctype.equals("any") && !doctype.equals("ANY")) {
612
       xmlquery.append("<returndoctype>");
613
       xmlquery.append(doctype).append("</returndoctype>");
614
     }
615

    
616
     xmlquery.append("<querygroup operator=\"UNION\">");
617
     //chad added - 8/14
618
     //the if statement allows a query to gracefully handle a null 
619
     //query.  Without this if a nullpointerException is thrown.
620
     if(!value.equals(""))
621
     {
622
       xmlquery.append("<queryterm casesensitive=\"false\" ");
623
       xmlquery.append("searchmode=\"contains\">");
624
       xmlquery.append("<value>").append(value).append("</value>");
625
       xmlquery.append("</queryterm>");
626
     }
627
     xmlquery.append("</querygroup>");
628
     xmlquery.append("</pathquery>");
629

    
630
     
631
     return (xmlquery.toString());
632
   }
633

    
634
  /**
635
   * format a simple free-text value query as an XML document that conforms
636
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
637
   * structured query engine
638
   *
639
   * @param value the text string to search for in the xml catalog
640
   */
641
   public static String createQuery(String value) {
642
     return createQuery(value, "any");
643
   }
644
   
645
  /** 
646
    * Check for "READ" permission on @docid for @user and/or @group 
647
    * from DB connection 
648
    */
649
  private boolean hasPermission ( Connection conn, String user,
650
                                  String group, String docid ) 
651
                  throws SQLException 
652
  {
653
    // b' of the command line invocation
654
    if ( (user == null) && (group == null) ) {
655
      return true;
656
    }
657
    
658
    // Check for READ permission on @docid for @user and/or @group
659
    AccessControlList aclobj = new AccessControlList(conn);
660
    boolean hasPermission = aclobj.hasPermission("READ",user,docid);
661
    if ( !hasPermission && group != null ) {
662
      hasPermission = aclobj.hasPermission("READ",group,docid);
663
    }
664
    
665
    return hasPermission;
666
  }
667
   
668
}
(14-14/38)