Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that searches a relational DB for elements and 
4
 *             attributes that have free text matches a query string,
5
 *             or structured query matches to a path specified node in the 
6
 *             XML hierarchy.  It returns a result set consisting of the 
7
 *             document ID for each document that satisfies the query
8
 *  Copyright: 2000 Regents of the University of California and the
9
 *             National Center for Ecological Analysis and Synthesis
10
 *    Authors: Matt Jones
11
 *    Release: @release@
12
 *
13
 *   '$Author: bojilova $'
14
 *     '$Date: 2000-09-12 10:37:07 -0700 (Tue, 12 Sep 2000) $'
15
 * '$Revision: 441 $'
16
 */
17

    
18
package edu.ucsb.nceas.metacat;
19

    
20
import java.io.*;
21
import java.util.Vector;
22
import java.net.URL;
23
import java.net.MalformedURLException;
24
import java.sql.*;
25
import java.util.Stack;
26
import java.util.Hashtable;
27
import java.util.Enumeration;
28

    
29
/** 
30
 * A Class that searches a relational DB for elements and 
31
 * attributes that have free text matches a query string,
32
 * or structured query matches to a path specified node in the 
33
 * XML hierarchy.  It returns a result set consisting of the 
34
 * document ID for each document that satisfies the query
35
 */
36
public class DBQuery {
37

    
38
  static final int ALL = 1;
39
  static final int WRITE = 2;
40
  static final int READ = 4;
41

    
42
  private Connection	conn = null;
43
  private String	parserName = null;
44

    
45
  /**
46
   * the main routine used to test the DBQuery utility.
47
   * <p>
48
   * Usage: java DBQuery <xmlfile>
49
   *
50
   * @param xmlfile the filename of the xml file containing the query
51
   */
52
  static public void main(String[] args) {
53
     
54
     if (args.length < 1)
55
     {
56
        System.err.println("Wrong number of arguments!!!");
57
        System.err.println("USAGE: java DBQuery <xmlfile>");
58
        return;
59
     } else {
60
        try {
61
                    
62
          String xmlfile  = args[0];
63

    
64
          // Open a connection to the database
65
          MetaCatUtil   util = new MetaCatUtil();
66
          Connection dbconn = util.openDBConnection();
67

    
68
          // Execute the query
69
          DBQuery queryobj = new DBQuery(dbconn, util.getOption("saxparser"));
70
          FileReader xml = new FileReader(new File(xmlfile));
71
          Hashtable nodelist = null;
72
          nodelist = queryobj.findDocuments(xml, null, null);
73

    
74
          // Print the reulting document listing
75
          StringBuffer result = new StringBuffer();
76
          String document = null;
77
          String docid = null;
78
          result.append("<?xml version=\"1.0\"?>\n");
79
          result.append("<resultset>\n"); 
80
  // following line removed by Dan Higgins to avoid insertion of query XML inside returned XML doc
81
  //        result.append("  <query>" + xmlfile + "</query>\n");
82
          Enumeration doclist = nodelist.keys(); 
83
          while (doclist.hasMoreElements()) {
84
            docid = (String)doclist.nextElement();
85
            document = (String)nodelist.get(docid);
86
            result.append("  <document>\n    " + document + 
87
                          "\n  </document>\n");
88
          }
89
          result.append("</resultset>\n");
90

    
91
          System.out.println(result);
92

    
93
        } catch (Exception e) {
94
          System.err.println("EXCEPTION HANDLING REQUIRED");
95
          System.err.println(e.getMessage());
96
          e.printStackTrace(System.err);
97
        }
98
     }
99
  }
100
  
101
  /**
102
   * construct an instance of the DBQuery class 
103
   *
104
   * <p>Generally, one would call the findDocuments() routine after creating 
105
   * an instance to specify the search query</p>
106
   *
107
   * @param conn the JDBC connection that we use for the query
108
   * @param parserName the fully qualified name of a Java class implementing
109
   *                   the org.xml.sax.XMLReader interface
110
   */
111
  public DBQuery( Connection conn, String parserName ) 
112
                  throws IOException, 
113
                         SQLException, 
114
                         ClassNotFoundException {
115
    this.conn = conn;
116
    this.parserName = parserName;
117
  }
118
  
119
  /** 
120
   * routine to search the elements and attributes looking to match query
121
   *
122
   * @param xmlquery the xml serialization of the query (@see pathquery.dtd)
123
   */
124
  public Hashtable findDocuments(Reader xmlquery, String user, String group) {
125
      Hashtable	 docListResult = new Hashtable();
126
      PreparedStatement pstmt;
127
      String docid = null;
128
      String docname = null;
129
      String doctype = null;
130
      String doctitle = null;
131
      String createDate = null;
132
      String updateDate = null;
133
      String fieldname = null;
134
      String fielddata = null;
135
      StringBuffer document = null; 
136

    
137
      try {
138
        // Get the XML query and covert it into a SQL statment
139
        QuerySpecification qspec = new QuerySpecification(xmlquery, 
140
                                   parserName);
141
        //System.out.println(qspec.printSQL());
142
        pstmt = conn.prepareStatement( qspec.printSQL() );
143

    
144
        // Execute the SQL query using the JDBC connection
145
        pstmt.execute();
146
        ResultSet rs = pstmt.getResultSet();
147
        boolean tableHasRows = rs.next();
148
        while (tableHasRows) {
149
          docid = rs.getString(1);
150
          if ( !hasReadPermission(conn, docid, user, group) ) {continue;}
151
          docname = rs.getString(2);
152
          doctype = rs.getString(3);
153
          doctitle = rs.getString(4);
154
          createDate = rs.getString(5);
155
          updateDate = rs.getString(6);
156
          
157
          document = new StringBuffer();
158
          
159
          document.append("<docid>").append(docid).append("</docid>");
160
          if (docname != null) {
161
            document.append("<docname>" + docname + "</docname>");
162
          }
163
          if (doctype != null) {
164
            document.append("<doctype>" + doctype + "</doctype>");
165
          }
166
          if (doctitle != null) {
167
            document.append("<doctitle>" + doctitle + "</doctitle>");
168
          }
169
          if(createDate != null) {
170
            document.append("<createdate>" + createDate + "</createdate>");
171
          }
172
          if(updateDate != null) {
173
            document.append("<updatedate>" + updateDate + "</updatedate>");
174
          }
175

    
176
          // Store the document id and the root node id
177
          docListResult.put(docid,(String)document.toString());
178

    
179
          // Advance to the next record in the cursor
180
          tableHasRows = rs.next();
181
        }
182
        
183
        if(qspec.containsExtendedSQL())
184
        {
185
          Vector extendedFields = new Vector(qspec.getReturnFieldList());
186
          Vector results = new Vector();
187
          pstmt = conn.prepareStatement(qspec.printExtendedSQL());
188
          pstmt.execute();
189
          rs = pstmt.getResultSet();
190
          tableHasRows = rs.next();
191
          while(tableHasRows) 
192
          {
193
            docid = rs.getString(1);
194
            if ( !hasReadPermission(conn, docid, user, group) ) {continue;}
195
            fieldname = rs.getString(2);
196
            fielddata = rs.getString(3);
197
            
198
            document = new StringBuffer();
199

    
200
            document.append("<param name=\"");
201
            document.append(fieldname);
202
            document.append("\">");
203
            document.append(fielddata);
204
            document.append("</param>");
205

    
206
            tableHasRows = rs.next();
207
            if(docListResult.containsKey(docid))
208
            {
209
              String removedelement = (String)docListResult.remove(docid);
210
              docListResult.put(docid, removedelement + document.toString());
211
            }
212
            else
213
            {
214
              docListResult.put(docid, document.toString()); 
215
            }
216
          }
217
        }
218
        pstmt.close();
219
      } catch (SQLException e) {
220
        System.err.println("Error getting id: " + e.getMessage());
221
      } catch (IOException ioe) {
222
        System.err.println("Error printing qspec:");
223
        System.err.println(ioe.getMessage());
224
      }
225
    //System.out.println("docListResult: ");
226
    //System.out.println(docListResult.toString());
227
    return docListResult;
228
  }
229
  
230
  /**
231
   * returns a string array of the contents of a particular node. 
232
   * If the node appears more than once, the contents are returned 
233
   * in the order in which they appearred in the document.
234
   * @param nodename the name or path of the particular node.
235
   * @param docid the docid of the document you want the node from.
236
   * @param conn a database connection-this allows this method to be static
237
   */
238
  public static Object[] getNodeContent(String nodename, String docid, 
239
                                        Connection conn)
240
  {
241
    StringBuffer query = new StringBuffer();
242
    Vector result = new Vector();
243
    PreparedStatement pstmt;
244
    query.append("select nodedata from xml_nodes where parentnodeid in ");
245
    query.append("(select nodeid from xml_index where path like '");
246
    query.append(nodename);
247
    query.append("' and docid like '").append(docid).append("')");
248
    try
249
    {
250
      pstmt = conn.prepareStatement(query.toString());
251

    
252
      // Execute the SQL query using the JDBC connection
253
      pstmt.execute();
254
      ResultSet rs = pstmt.getResultSet();
255
      boolean tableHasRows = rs.next();
256
      while (tableHasRows) 
257
      {
258
        result.add(rs.getString(1));
259
        System.out.println(rs.getString(1));
260
        tableHasRows = rs.next();
261
      }
262
    } 
263
    catch (SQLException e) 
264
    {
265
      System.err.println("Error getting id: " + e.getMessage());
266
    } 
267
    
268
    return result.toArray();
269
  }
270
  
271
  /**
272
   * format a structured query as an XML document that conforms
273
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
274
   * structured query engine
275
   *
276
   * @param params The list of parameters that  should be included in the query
277
   */
278
  public static String createSQuery(Hashtable params)
279
  { 
280
    StringBuffer query = new StringBuffer();
281
    Enumeration elements;
282
    Enumeration keys;
283
    String doctype = null;
284
    String casesensitive = null;
285
    String searchmode = null;
286
    Object nextkey;
287
    Object nextelement;
288
    //add the xml headers
289
    query.append("<?xml version=\"1.0\"?>\n");
290
    query.append("<pathquery version=\"1.0\"><meta_file_id>");
291
    
292
    if(params.containsKey("meta_file_id"))
293
    {
294
      query.append( ((String[])params.get("meta_file_id"))[0]);
295
    	query.append("</meta_file_id>");
296
    }
297
    else
298
    {
299
      query.append("unspecified</meta_file_id>");
300
    }
301
    
302
    query.append("<querytitle>");
303
    if(params.containsKey("querytitle"))
304
    {
305
      query.append(((String[])params.get("querytitle"))[0]);
306
    	query.append("</querytitle>");
307
    }
308
    else
309
    {
310
    	query.append("unspecified</querytitle>");
311
    }
312
    
313
    if(params.containsKey("doctype"))
314
    {
315
      doctype = ((String[])params.get("doctype"))[0]; 
316
    }
317
    else
318
    {
319
      doctype = "ANY";  
320
    }
321
    
322
    if(params.containsKey("returnfield"))
323
    {
324
      String[] returnfield = ((String[])params.get("returnfield"));
325
      for(int i=0; i<returnfield.length; i++)
326
      {
327
        query.append("<returnfield>").append(returnfield[i]);
328
        query.append("</returnfield>");
329
      }
330
    }
331
    
332
    //if you don't limit the query by doctype, then it just creates
333
    //an empty returndoctype tag.
334
    if (!doctype.equals("any") && 
335
        !doctype.equals("ANY") &&
336
        !doctype.equals("") ) 
337
    {
338
       query.append("<returndoctype>");
339
       query.append(doctype).append("</returndoctype>");
340
    }
341
    else
342
    { 
343
      query.append("<returndoctype></returndoctype>");
344
    }
345
    
346
    //allows the dynamic switching of boolean operators
347
    if(params.containsKey("operator"))
348
    {
349
      query.append("<querygroup operator=\"" + 
350
    		        ((String[])params.get("operator"))[0] + "\">");
351
    }
352
    else
353
    { //the default operator is UNION
354
      query.append("<querygroup operator=\"UNION\">"); 
355
    }
356
    		
357
    if(params.containsKey("casesensitive"))
358
    {
359
      casesensitive = ((String[])params.get("casesensitive"))[0]; 
360
    }
361
    else
362
    {
363
      casesensitive = "false"; 
364
    }
365
    
366
    if(params.containsKey("searchmode"))
367
    {
368
      searchmode = ((String[])params.get("searchmode"))[0]; 
369
    }
370
    else
371
    {
372
      searchmode = "contains"; 
373
    }
374
    		
375
    //anyfield is a special case because it does a 
376
    //free text search.  It does not have a <pathexpr>
377
    //tag.  This allows for a free text search within the structured
378
    //query.  This is useful if the INTERSECT operator is used.
379
    if(params.containsKey("anyfield"))
380
    {
381
       String[] anyfield = ((String[])params.get("anyfield"));
382
       //allow for more than one value for anyfield
383
       for(int i=0; i<anyfield.length; i++)
384
       {
385
         if(!anyfield[i].equals(""))
386
         {
387
           query.append("<queryterm casesensitive=\"" + casesensitive + 
388
                        "\" " + "searchmode=\"" + searchmode + "\"><value>" +
389
    			              anyfield[i] +
390
    			              "</value></queryterm>"); 
391
         }
392
       }
393
    }
394
    		
395
    //this while loop finds the rest of the parameters
396
    //and attempts to query for the field specified
397
    //by the parameter.
398
    elements = params.elements();
399
    keys = params.keys();
400
    while(keys.hasMoreElements() && elements.hasMoreElements())
401
    {
402
      nextkey = keys.nextElement();
403
    	nextelement = elements.nextElement();
404

    
405
    	//make sure we aren't querying for any of these
406
    	//parameters since the are already in the query
407
      //in one form or another.
408
    	if(!nextkey.toString().equals("doctype") && 
409
    		 !nextkey.toString().equals("action")  &&
410
    		 !nextkey.toString().equals("qformat") && 
411
    		 !nextkey.toString().equals("anyfield") &&
412
         !nextkey.toString().equals("returnfield") &&
413
    		 !nextkey.toString().equals("operator") )
414
    	{
415
        //allow for more than value per field name
416
        for(int i=0; i<((String[])nextelement).length; i++)
417
        {
418
          if(!((String[])nextelement)[i].equals(""))
419
          {
420
            query.append("<queryterm casesensitive=\"" + casesensitive +"\" " + 
421
    				             "searchmode=\"" + searchmode + "\">" +
422
    		                 "<value>" +
423
                         //add the query value
424
    		                 ((String[])nextelement)[i] +
425
    		                 "</value><pathexpr>" +
426
    		                 //add the path to query by 
427
                         nextkey.toString() + 
428
                         "</pathexpr></queryterm>");
429
          }
430
        }
431
    	}
432
    }
433
    query.append("</querygroup></pathquery>");
434
    //append on the end of the xml and return the result as a string
435
    return query.toString();
436
  }
437
  
438
  /**
439
   * format a simple free-text value query as an XML document that conforms
440
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
441
   * structured query engine
442
   *
443
   * @param value the text string to search for in the xml catalog
444
   * @param doctype the type of documents to include in the result set -- use
445
   *        "any" or "ANY" for unfiltered result sets
446
   */
447
   public static String createQuery(String value, String doctype) {
448
     StringBuffer xmlquery = new StringBuffer();
449
     xmlquery.append("<?xml version=\"1.0\"?>\n");
450
     xmlquery.append("<pathquery version=\"1.0\">");
451
     xmlquery.append("<meta_file_id>Unspecified</meta_file_id>");
452
     xmlquery.append("<querytitle>Unspecified</querytitle>");
453

    
454
     if (!doctype.equals("any") && !doctype.equals("ANY")) {
455
       xmlquery.append("<returndoctype>");
456
       xmlquery.append(doctype).append("</returndoctype>");
457
     }
458

    
459
     xmlquery.append("<querygroup operator=\"UNION\">");
460
     //chad added - 8/14
461
     //the if statement allows a query to gracefully handle a null 
462
     //query.  Without this if a nullpointerException is thrown.
463
     if(!value.equals(""))
464
     {
465
       xmlquery.append("<queryterm casesensitive=\"false\" ");
466
       xmlquery.append("searchmode=\"contains\">");
467
       xmlquery.append("<value>").append(value).append("</value>");
468
       xmlquery.append("</queryterm>");
469
     }
470
     xmlquery.append("</querygroup>");
471
     xmlquery.append("</pathquery>");
472

    
473
     
474
     return (xmlquery.toString());
475
   }
476

    
477
  /**
478
   * format a simple free-text value query as an XML document that conforms
479
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
480
   * structured query engine
481
   *
482
   * @param value the text string to search for in the xml catalog
483
   */
484
   public static String createQuery(String value) {
485
     return createQuery(value, "any");
486
   }
487
   
488
  /** Check for "read" permissions from DB connection */
489
  private boolean hasReadPermission(Connection conn, String docid, 
490
                                     String user, String group) 
491
                                     throws SQLException {
492
    // b' of the command line invocation
493
    if ( (user == null) && (group == null) ) {
494
      return true;
495
    }
496
    
497
    PreparedStatement pstmt;
498
    // checking if user is owner of docid or if docid has public access
499
    try {
500
      pstmt = conn.prepareStatement(
501
                   "SELECT 'x' FROM xml_documents " +
502
                   "WHERE docid LIKE ? AND user_owner LIKE ? " + 
503
                   "UNION " +
504
                   "SELECT 'x' FROM xml_documents " +
505
                   "WHERE docid LIKE ? AND public_access = 1");
506
      // Bind the values to the query
507
      pstmt.setString(1, docid);
508
      pstmt.setString(2, user);
509
      pstmt.setString(3, docid);
510

    
511
      pstmt.execute();
512
      ResultSet rs = pstmt.getResultSet();
513
      boolean hasRow = rs.next();
514
      pstmt.close();
515
      if (hasRow) {
516
        return true;
517
      }
518
      
519
    } catch (SQLException e) {
520
      throw new 
521
        SQLException("Error checking document's owner or public access: "
522
                      + e.getMessage());
523
    }
524

    
525
    // checking if docid has public access at this time
526
    try {
527
      pstmt = conn.prepareStatement(
528
                   "SELECT 'x' FROM xml_access " +
529
                   "WHERE docid LIKE ? " +
530
                   "AND principal_name = 'public' " +
531
                   "AND principal_type = 'user' " +
532
                   "AND sysdate BETWEEN nvl(begin_time,sysdate) " +
533
                                   "AND nvl(end_time,sysdate)");
534
      // Bind the values to the query
535
      pstmt.setString(1, docid);
536

    
537
      pstmt.execute();
538
      ResultSet rs = pstmt.getResultSet();
539
      boolean hasRow = rs.next();
540
      pstmt.close();
541
      if (hasRow) {
542
        return true;
543
      }
544
      
545
    } catch (SQLException e) {
546
      throw new 
547
        SQLException("Error checking doc's public access: " + e.getMessage());
548
    }
549

    
550
    // checking access type from xml_access table
551
    int accesstype = 0;
552
    try {
553
      pstmt = conn.prepareStatement(
554
                   "SELECT access_type FROM xml_access " +
555
                   "WHERE docid LIKE ? " + 
556
                   "AND principal_name LIKE ? " +
557
                   "AND principal_type = 'user' " +
558
                   "AND sysdate BETWEEN nvl(begin_time,sysdate) " +
559
                                   "AND nvl(end_time,sysdate) " +
560
                   "UNION " +
561
                   "SELECT access_type FROM xml_access " +
562
                   "WHERE docid LIKE ? " + 
563
                   "AND principal_name LIKE ? " +
564
                   "AND principal_type = 'group' " +
565
                   "AND sysdate BETWEEN nvl(begin_time,sysdate) " +
566
                                   "AND nvl(end_time,sysdate)");
567
      // Bind the values to the query
568
      pstmt.setString(1, docid);
569
      pstmt.setString(2, user);
570
      pstmt.setString(3, docid);
571
      pstmt.setString(2, group);
572

    
573
      pstmt.execute();
574
      ResultSet rs = pstmt.getResultSet();
575
      boolean hasRows = rs.next();
576
      while ( hasRows ) {
577
        accesstype = rs.getInt(1);
578
        if ( (accesstype & READ) == READ ) {
579
          pstmt.close();
580
          return true;
581
        }
582
        hasRows = rs.next();
583
      }
584

    
585
      pstmt.close();
586
      return false;
587
      
588
    } catch (SQLException e) {
589
      throw new 
590
      SQLException("Error getting document's permissions: " + e.getMessage());
591
    }
592
  }
593
   
594
}
595

    
596
/**
597
 * '$Log$
598
 * 'Revision 1.18  2000/09/05 20:50:56  berkley
599
 * 'Added a method called getNodeContent which retrieves the content of a node in a document.  If there are more than one nodes with the same name returned, it returns an array with all of the data.
600
 * '
601
 * 'Revision 1.17  2000/08/31 21:20:39  berkley
602
 * 'changed xslf for new returnfield scheme.  the returnfields are now returned as <param name="<returnfield>"> tags.
603
 * 'hThe sql for the returnfield query was redone to fix a previous problem with slow queries
604
 * '
605
 * 'Revision 1.16  2000/08/23 22:55:25  berkley
606
 * 'changed the field names to be case-sensitive in the returnfields
607
 * '
608
 * 'Revision 1.15  2000/08/23 17:22:07  berkley
609
 * 'added support for the returnfield parameter
610
 * '-added the dynamic parameters to the returned hash table of documents
611
 * '
612
 * 'Revision 1.14  2000/08/17 16:02:34  berkley
613
 * 'Made changes to createSQuery to allow for multiple parameters of the same name.  Also changed the param list to include only "Hashtable params" without a "String doctype" since the doctype is already contained in the params.
614
 * '
615
 * 'Revision 1.13  2000/08/14 21:26:12  berkley
616
 * 'Added createSQuery() to handle structured queries of an arbitrary number of parameters.  Also modified createQuery() to handle a null query in a graceful manner.
617
 * '
618
 * 'Revision 1.12  2000/08/14 20:53:33  jones
619
 * 'Added "release" keyword to all metacat source files so that the release
620
 * 'number will be evident in software distributions.
621
 * '
622
 * 'Revision 1.11  2000/08/11 18:26:07  berkley
623
 * 'added createSQuery
624
 * '
625
 * 'Revision 1.10  2000/07/26 20:40:41  higgins
626
 * 'no message
627
 * '
628
 * 'Revision 1.9  2000/06/26 10:35:04  jones
629
 * 'Merged in substantial changes to DBWriter and associated classes and to
630
 * 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
631
 * 'functions.  The command line tools and the parameters for the
632
 * 'servlet have changed substantially.
633
 * '
634
 * 'Revision 1.8.2.2  2000/06/25 23:38:16  jones
635
 * 'Added RCSfile keyword
636
 * '
637
 * 'Revision 1.8.2.1  2000/06/25 23:34:17  jones
638
 * 'Changed documentation formatting, added log entries at bottom of source files
639
 * ''
640
 */
(8-8/27)