Project

General

Profile

1 155 jones
/**
2 203 jones
 *  '$RCSfile$'
3
 *    Purpose: A Class that searches a relational DB for elements and
4
 *             attributes that have free text matches a query string,
5
 *             or structured query matches to a path specified node in the
6
 *             XML hierarchy.  It returns a result set consisting of the
7
 *             document ID for each document that satisfies the query
8
 *  Copyright: 2000 Regents of the University of California and the
9
 *             National Center for Ecological Analysis and Synthesis
10
 *    Authors: Matt Jones
11 349 jones
 *    Release: @release@
12 155 jones
 *
13 203 jones
 *   '$Author$'
14
 *     '$Date$'
15
 * '$Revision$'
16 155 jones
 */
17
18
package edu.ucsb.nceas.metacat;
19
20
import java.io.*;
21 401 berkley
import java.util.Vector;
22 155 jones
import java.net.URL;
23
import java.net.MalformedURLException;
24
import java.sql.*;
25
import java.util.Stack;
26
import java.util.Hashtable;
27
import java.util.Enumeration;
28
29
/**
30 172 jones
 * A Class that searches a relational DB for elements and
31
 * attributes that have free text matches a query string,
32
 * or structured query matches to a path specified node in the
33
 * XML hierarchy.  It returns a result set consisting of the
34
 * document ID for each document that satisfies the query
35 155 jones
 */
36
public class DBQuery {
37
38 441 bojilova
  static final int ALL = 1;
39
  static final int WRITE = 2;
40
  static final int READ = 4;
41
42 155 jones
  private Connection	conn = null;
43 172 jones
  private String	parserName = null;
44 155 jones
45
  /**
46
   * the main routine used to test the DBQuery utility.
47 184 jones
   * <p>
48
   * Usage: java DBQuery <xmlfile>
49 155 jones
   *
50 170 jones
   * @param xmlfile the filename of the xml file containing the query
51 155 jones
   */
52
  static public void main(String[] args) {
53
54 184 jones
     if (args.length < 1)
55 155 jones
     {
56
        System.err.println("Wrong number of arguments!!!");
57 184 jones
        System.err.println("USAGE: java DBQuery <xmlfile>");
58 155 jones
        return;
59
     } else {
60
        try {
61
62 170 jones
          String xmlfile  = args[0];
63 155 jones
64
          // Open a connection to the database
65 184 jones
          MetaCatUtil   util = new MetaCatUtil();
66
          Connection dbconn = util.openDBConnection();
67 172 jones
68 170 jones
          // Execute the query
69 184 jones
          DBQuery queryobj = new DBQuery(dbconn, util.getOption("saxparser"));
70 170 jones
          FileReader xml = new FileReader(new File(xmlfile));
71 155 jones
          Hashtable nodelist = null;
72 441 bojilova
          nodelist = queryobj.findDocuments(xml, null, null);
73 155 jones
74 172 jones
          // Print the reulting document listing
75 155 jones
          StringBuffer result = new StringBuffer();
76
          String document = null;
77 170 jones
          String docid = null;
78 155 jones
          result.append("<?xml version=\"1.0\"?>\n");
79 296 higgins
          result.append("<resultset>\n");
80
  // following line removed by Dan Higgins to avoid insertion of query XML inside returned XML doc
81
  //        result.append("  <query>" + xmlfile + "</query>\n");
82 155 jones
          Enumeration doclist = nodelist.keys();
83
          while (doclist.hasMoreElements()) {
84 170 jones
            docid = (String)doclist.nextElement();
85 155 jones
            document = (String)nodelist.get(docid);
86
            result.append("  <document>\n    " + document +
87
                          "\n  </document>\n");
88
          }
89
          result.append("</resultset>\n");
90
91
          System.out.println(result);
92
93
        } catch (Exception e) {
94
          System.err.println("EXCEPTION HANDLING REQUIRED");
95
          System.err.println(e.getMessage());
96
          e.printStackTrace(System.err);
97
        }
98
     }
99
  }
100
101
  /**
102
   * construct an instance of the DBQuery class
103
   *
104
   * <p>Generally, one would call the findDocuments() routine after creating
105
   * an instance to specify the search query</p>
106
   *
107
   * @param conn the JDBC connection that we use for the query
108 172 jones
   * @param parserName the fully qualified name of a Java class implementing
109 185 jones
   *                   the org.xml.sax.XMLReader interface
110 155 jones
   */
111 172 jones
  public DBQuery( Connection conn, String parserName )
112 155 jones
                  throws IOException,
113
                         SQLException,
114 172 jones
                         ClassNotFoundException {
115 155 jones
    this.conn = conn;
116 172 jones
    this.parserName = parserName;
117 155 jones
  }
118
119
  /**
120
   * routine to search the elements and attributes looking to match query
121
   *
122 178 jones
   * @param xmlquery the xml serialization of the query (@see pathquery.dtd)
123 155 jones
   */
124 441 bojilova
  public Hashtable findDocuments(Reader xmlquery, String user, String group) {
125 155 jones
      Hashtable	 docListResult = new Hashtable();
126
      PreparedStatement pstmt;
127 170 jones
      String docid = null;
128 155 jones
      String docname = null;
129
      String doctype = null;
130
      String doctitle = null;
131 401 berkley
      String createDate = null;
132
      String updateDate = null;
133
      String fieldname = null;
134
      String fielddata = null;
135 155 jones
      StringBuffer document = null;
136
137
      try {
138 172 jones
        // Get the XML query and covert it into a SQL statment
139 178 jones
        QuerySpecification qspec = new QuerySpecification(xmlquery,
140 172 jones
                                   parserName);
141 180 jones
        //System.out.println(qspec.printSQL());
142 172 jones
        pstmt = conn.prepareStatement( qspec.printSQL() );
143 155 jones
144 172 jones
        // Execute the SQL query using the JDBC connection
145 155 jones
        pstmt.execute();
146
        ResultSet rs = pstmt.getResultSet();
147
        boolean tableHasRows = rs.next();
148
        while (tableHasRows) {
149 170 jones
          docid = rs.getString(1);
150 441 bojilova
          if ( !hasReadPermission(conn, docid, user, group) ) {continue;}
151 155 jones
          docname = rs.getString(2);
152
          doctype = rs.getString(3);
153
          doctitle = rs.getString(4);
154 401 berkley
          createDate = rs.getString(5);
155
          updateDate = rs.getString(6);
156
157 155 jones
          document = new StringBuffer();
158 401 berkley
159 155 jones
          document.append("<docid>").append(docid).append("</docid>");
160
          if (docname != null) {
161
            document.append("<docname>" + docname + "</docname>");
162
          }
163
          if (doctype != null) {
164
            document.append("<doctype>" + doctype + "</doctype>");
165
          }
166
          if (doctitle != null) {
167
            document.append("<doctitle>" + doctitle + "</doctitle>");
168
          }
169 401 berkley
          if(createDate != null) {
170
            document.append("<createdate>" + createDate + "</createdate>");
171
          }
172
          if(updateDate != null) {
173
            document.append("<updatedate>" + updateDate + "</updatedate>");
174
          }
175 155 jones
176
          // Store the document id and the root node id
177 170 jones
          docListResult.put(docid,(String)document.toString());
178 155 jones
179
          // Advance to the next record in the cursor
180
          tableHasRows = rs.next();
181
        }
182 401 berkley
183
        if(qspec.containsExtendedSQL())
184
        {
185
          Vector extendedFields = new Vector(qspec.getReturnFieldList());
186
          Vector results = new Vector();
187
          pstmt = conn.prepareStatement(qspec.printExtendedSQL());
188
          pstmt.execute();
189
          rs = pstmt.getResultSet();
190
          tableHasRows = rs.next();
191
          while(tableHasRows)
192
          {
193
            docid = rs.getString(1);
194 441 bojilova
            if ( !hasReadPermission(conn, docid, user, group) ) {continue;}
195 401 berkley
            fieldname = rs.getString(2);
196
            fielddata = rs.getString(3);
197
198
            document = new StringBuffer();
199
200 423 berkley
            document.append("<param name=\"");
201 405 berkley
            document.append(fieldname);
202 423 berkley
            document.append("\">");
203 401 berkley
            document.append(fielddata);
204 423 berkley
            document.append("</param>");
205 401 berkley
206
            tableHasRows = rs.next();
207
            if(docListResult.containsKey(docid))
208
            {
209
              String removedelement = (String)docListResult.remove(docid);
210
              docListResult.put(docid, removedelement + document.toString());
211
            }
212
            else
213
            {
214
              docListResult.put(docid, document.toString());
215
            }
216
          }
217
        }
218 155 jones
        pstmt.close();
219
      } catch (SQLException e) {
220 180 jones
        System.err.println("Error getting id: " + e.getMessage());
221 170 jones
      } catch (IOException ioe) {
222
        System.err.println("Error printing qspec:");
223
        System.err.println(ioe.getMessage());
224 155 jones
      }
225 423 berkley
    //System.out.println("docListResult: ");
226
    //System.out.println(docListResult.toString());
227 155 jones
    return docListResult;
228
  }
229 342 berkley
230
  /**
231 436 berkley
   * returns a string array of the contents of a particular node.
232
   * If the node appears more than once, the contents are returned
233
   * in the order in which they appearred in the document.
234
   * @param nodename the name or path of the particular node.
235
   * @param docid the docid of the document you want the node from.
236
   * @param conn a database connection-this allows this method to be static
237
   */
238
  public static Object[] getNodeContent(String nodename, String docid,
239
                                        Connection conn)
240
  {
241
    StringBuffer query = new StringBuffer();
242
    Vector result = new Vector();
243
    PreparedStatement pstmt;
244
    query.append("select nodedata from xml_nodes where parentnodeid in ");
245
    query.append("(select nodeid from xml_index where path like '");
246
    query.append(nodename);
247
    query.append("' and docid like '").append(docid).append("')");
248
    try
249
    {
250
      pstmt = conn.prepareStatement(query.toString());
251
252
      // Execute the SQL query using the JDBC connection
253
      pstmt.execute();
254
      ResultSet rs = pstmt.getResultSet();
255
      boolean tableHasRows = rs.next();
256
      while (tableHasRows)
257
      {
258
        result.add(rs.getString(1));
259
        System.out.println(rs.getString(1));
260
        tableHasRows = rs.next();
261
      }
262
    }
263
    catch (SQLException e)
264
    {
265
      System.err.println("Error getting id: " + e.getMessage());
266
    }
267
268
    return result.toArray();
269
  }
270
271
  /**
272 342 berkley
   * format a structured query as an XML document that conforms
273
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
274
   * structured query engine
275
   *
276
   * @param params The list of parameters that  should be included in the query
277
   */
278 372 berkley
  public static String createSQuery(Hashtable params)
279 350 berkley
  {
280
    StringBuffer query = new StringBuffer();
281 342 berkley
    Enumeration elements;
282
    Enumeration keys;
283 372 berkley
    String doctype = null;
284
    String casesensitive = null;
285
    String searchmode = null;
286 342 berkley
    Object nextkey;
287
    Object nextelement;
288 350 berkley
    //add the xml headers
289
    query.append("<?xml version=\"1.0\"?>\n");
290 342 berkley
    query.append("<pathquery version=\"1.0\"><meta_file_id>");
291 350 berkley
292 342 berkley
    if(params.containsKey("meta_file_id"))
293
    {
294
      query.append( ((String[])params.get("meta_file_id"))[0]);
295
    	query.append("</meta_file_id>");
296
    }
297
    else
298
    {
299
      query.append("unspecified</meta_file_id>");
300
    }
301 350 berkley
302 342 berkley
    query.append("<querytitle>");
303
    if(params.containsKey("querytitle"))
304
    {
305
      query.append(((String[])params.get("querytitle"))[0]);
306
    	query.append("</querytitle>");
307
    }
308
    else
309
    {
310
    	query.append("unspecified</querytitle>");
311
    }
312 372 berkley
313
    if(params.containsKey("doctype"))
314
    {
315
      doctype = ((String[])params.get("doctype"))[0];
316
    }
317
    else
318
    {
319
      doctype = "ANY";
320
    }
321
322 401 berkley
    if(params.containsKey("returnfield"))
323
    {
324
      String[] returnfield = ((String[])params.get("returnfield"));
325
      for(int i=0; i<returnfield.length; i++)
326
      {
327
        query.append("<returnfield>").append(returnfield[i]);
328
        query.append("</returnfield>");
329
      }
330
    }
331
332 350 berkley
    //if you don't limit the query by doctype, then it just creates
333
    //an empty returndoctype tag.
334 342 berkley
    if (!doctype.equals("any") &&
335
        !doctype.equals("ANY") &&
336
        !doctype.equals("") )
337
    {
338
       query.append("<returndoctype>");
339
       query.append(doctype).append("</returndoctype>");
340
    }
341 350 berkley
    else
342
    {
343
      query.append("<returndoctype></returndoctype>");
344
    }
345
346
    //allows the dynamic switching of boolean operators
347
    if(params.containsKey("operator"))
348
    {
349
      query.append("<querygroup operator=\"" +
350 342 berkley
    		        ((String[])params.get("operator"))[0] + "\">");
351 350 berkley
    }
352
    else
353
    { //the default operator is UNION
354
      query.append("<querygroup operator=\"UNION\">");
355
    }
356 342 berkley
357 372 berkley
    if(params.containsKey("casesensitive"))
358
    {
359
      casesensitive = ((String[])params.get("casesensitive"))[0];
360
    }
361
    else
362
    {
363
      casesensitive = "false";
364
    }
365
366
    if(params.containsKey("searchmode"))
367
    {
368
      searchmode = ((String[])params.get("searchmode"))[0];
369
    }
370
    else
371
    {
372
      searchmode = "contains";
373
    }
374 342 berkley
375
    //anyfield is a special case because it does a
376
    //free text search.  It does not have a <pathexpr>
377 350 berkley
    //tag.  This allows for a free text search within the structured
378
    //query.  This is useful if the INTERSECT operator is used.
379
    if(params.containsKey("anyfield"))
380 342 berkley
    {
381 372 berkley
       String[] anyfield = ((String[])params.get("anyfield"));
382
       //allow for more than one value for anyfield
383
       for(int i=0; i<anyfield.length; i++)
384 350 berkley
       {
385 372 berkley
         if(!anyfield[i].equals(""))
386
         {
387
           query.append("<queryterm casesensitive=\"" + casesensitive +
388
                        "\" " + "searchmode=\"" + searchmode + "\"><value>" +
389
    			              anyfield[i] +
390
    			              "</value></queryterm>");
391
         }
392 350 berkley
       }
393 342 berkley
    }
394
395
    //this while loop finds the rest of the parameters
396
    //and attempts to query for the field specified
397
    //by the parameter.
398
    elements = params.elements();
399
    keys = params.keys();
400
    while(keys.hasMoreElements() && elements.hasMoreElements())
401
    {
402
      nextkey = keys.nextElement();
403
    	nextelement = elements.nextElement();
404 372 berkley
405 342 berkley
    	//make sure we aren't querying for any of these
406
    	//parameters since the are already in the query
407
      //in one form or another.
408
    	if(!nextkey.toString().equals("doctype") &&
409
    		 !nextkey.toString().equals("action")  &&
410
    		 !nextkey.toString().equals("qformat") &&
411
    		 !nextkey.toString().equals("anyfield") &&
412 401 berkley
         !nextkey.toString().equals("returnfield") &&
413 372 berkley
    		 !nextkey.toString().equals("operator") )
414 342 berkley
    	{
415 372 berkley
        //allow for more than value per field name
416
        for(int i=0; i<((String[])nextelement).length; i++)
417
        {
418
          if(!((String[])nextelement)[i].equals(""))
419
          {
420
            query.append("<queryterm casesensitive=\"" + casesensitive +"\" " +
421
    				             "searchmode=\"" + searchmode + "\">" +
422
    		                 "<value>" +
423
                         //add the query value
424
    		                 ((String[])nextelement)[i] +
425
    		                 "</value><pathexpr>" +
426
    		                 //add the path to query by
427
                         nextkey.toString() +
428
                         "</pathexpr></queryterm>");
429
          }
430
        }
431 342 berkley
    	}
432
    }
433
    query.append("</querygroup></pathquery>");
434 350 berkley
    //append on the end of the xml and return the result as a string
435 342 berkley
    return query.toString();
436
  }
437
438 181 jones
  /**
439
   * format a simple free-text value query as an XML document that conforms
440
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
441
   * structured query engine
442
   *
443
   * @param value the text string to search for in the xml catalog
444
   * @param doctype the type of documents to include in the result set -- use
445
   *        "any" or "ANY" for unfiltered result sets
446
   */
447
   public static String createQuery(String value, String doctype) {
448
     StringBuffer xmlquery = new StringBuffer();
449
     xmlquery.append("<?xml version=\"1.0\"?>\n");
450
     xmlquery.append("<pathquery version=\"1.0\">");
451
     xmlquery.append("<meta_file_id>Unspecified</meta_file_id>");
452
     xmlquery.append("<querytitle>Unspecified</querytitle>");
453
454
     if (!doctype.equals("any") && !doctype.equals("ANY")) {
455
       xmlquery.append("<returndoctype>");
456
       xmlquery.append(doctype).append("</returndoctype>");
457
     }
458
459
     xmlquery.append("<querygroup operator=\"UNION\">");
460 350 berkley
     //chad added - 8/14
461
     //the if statement allows a query to gracefully handle a null
462
     //query.  Without this if a nullpointerException is thrown.
463
     if(!value.equals(""))
464
     {
465
       xmlquery.append("<queryterm casesensitive=\"false\" ");
466
       xmlquery.append("searchmode=\"contains\">");
467
       xmlquery.append("<value>").append(value).append("</value>");
468
       xmlquery.append("</queryterm>");
469
     }
470 181 jones
     xmlquery.append("</querygroup>");
471
     xmlquery.append("</pathquery>");
472
473
474
     return (xmlquery.toString());
475
   }
476
477
  /**
478
   * format a simple free-text value query as an XML document that conforms
479
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
480
   * structured query engine
481
   *
482
   * @param value the text string to search for in the xml catalog
483
   */
484
   public static String createQuery(String value) {
485
     return createQuery(value, "any");
486
   }
487 441 bojilova
488
  /** Check for "read" permissions from DB connection */
489
  private boolean hasReadPermission(Connection conn, String docid,
490
                                     String user, String group)
491
                                     throws SQLException {
492
    // b' of the command line invocation
493
    if ( (user == null) && (group == null) ) {
494
      return true;
495
    }
496
497
    PreparedStatement pstmt;
498
    // checking if user is owner of docid or if docid has public access
499
    try {
500
      pstmt = conn.prepareStatement(
501
                   "SELECT 'x' FROM xml_documents " +
502
                   "WHERE docid LIKE ? AND user_owner LIKE ? " +
503
                   "UNION " +
504
                   "SELECT 'x' FROM xml_documents " +
505
                   "WHERE docid LIKE ? AND public_access = 1");
506
      // Bind the values to the query
507
      pstmt.setString(1, docid);
508
      pstmt.setString(2, user);
509
      pstmt.setString(3, docid);
510
511
      pstmt.execute();
512
      ResultSet rs = pstmt.getResultSet();
513
      boolean hasRow = rs.next();
514
      pstmt.close();
515
      if (hasRow) {
516
        return true;
517
      }
518
519
    } catch (SQLException e) {
520
      throw new
521
        SQLException("Error checking document's owner or public access: "
522
                      + e.getMessage());
523
    }
524
525
    // checking if docid has public access at this time
526
    try {
527
      pstmt = conn.prepareStatement(
528
                   "SELECT 'x' FROM xml_access " +
529
                   "WHERE docid LIKE ? " +
530
                   "AND principal_name = 'public' " +
531
                   "AND principal_type = 'user' " +
532
                   "AND sysdate BETWEEN nvl(begin_time,sysdate) " +
533
                                   "AND nvl(end_time,sysdate)");
534
      // Bind the values to the query
535
      pstmt.setString(1, docid);
536
537
      pstmt.execute();
538
      ResultSet rs = pstmt.getResultSet();
539
      boolean hasRow = rs.next();
540
      pstmt.close();
541
      if (hasRow) {
542
        return true;
543
      }
544
545
    } catch (SQLException e) {
546
      throw new
547
        SQLException("Error checking doc's public access: " + e.getMessage());
548
    }
549
550
    // checking access type from xml_access table
551
    int accesstype = 0;
552
    try {
553
      pstmt = conn.prepareStatement(
554
                   "SELECT access_type FROM xml_access " +
555
                   "WHERE docid LIKE ? " +
556
                   "AND principal_name LIKE ? " +
557
                   "AND principal_type = 'user' " +
558
                   "AND sysdate BETWEEN nvl(begin_time,sysdate) " +
559
                                   "AND nvl(end_time,sysdate) " +
560
                   "UNION " +
561
                   "SELECT access_type FROM xml_access " +
562
                   "WHERE docid LIKE ? " +
563
                   "AND principal_name LIKE ? " +
564
                   "AND principal_type = 'group' " +
565
                   "AND sysdate BETWEEN nvl(begin_time,sysdate) " +
566
                                   "AND nvl(end_time,sysdate)");
567
      // Bind the values to the query
568
      pstmt.setString(1, docid);
569
      pstmt.setString(2, user);
570
      pstmt.setString(3, docid);
571
      pstmt.setString(2, group);
572
573
      pstmt.execute();
574
      ResultSet rs = pstmt.getResultSet();
575
      boolean hasRows = rs.next();
576
      while ( hasRows ) {
577
        accesstype = rs.getInt(1);
578
        if ( (accesstype & READ) == READ ) {
579
          pstmt.close();
580
          return true;
581
        }
582
        hasRows = rs.next();
583
      }
584
585
      pstmt.close();
586
      return false;
587
588
    } catch (SQLException e) {
589
      throw new
590
      SQLException("Error getting document's permissions: " + e.getMessage());
591
    }
592
  }
593
594 155 jones
}
595 203 jones
596
/**
597
 * '$Log$
598 441 bojilova
 * 'Revision 1.18  2000/09/05 20:50:56  berkley
599
 * 'Added a method called getNodeContent which retrieves the content of a node in a document.  If there are more than one nodes with the same name returned, it returns an array with all of the data.
600
 * '
601 436 berkley
 * 'Revision 1.17  2000/08/31 21:20:39  berkley
602
 * 'changed xslf for new returnfield scheme.  the returnfields are now returned as <param name="<returnfield>"> tags.
603
 * 'hThe sql for the returnfield query was redone to fix a previous problem with slow queries
604
 * '
605 423 berkley
 * 'Revision 1.16  2000/08/23 22:55:25  berkley
606
 * 'changed the field names to be case-sensitive in the returnfields
607
 * '
608 405 berkley
 * 'Revision 1.15  2000/08/23 17:22:07  berkley
609
 * 'added support for the returnfield parameter
610
 * '-added the dynamic parameters to the returned hash table of documents
611
 * '
612 401 berkley
 * 'Revision 1.14  2000/08/17 16:02:34  berkley
613
 * 'Made changes to createSQuery to allow for multiple parameters of the same name.  Also changed the param list to include only "Hashtable params" without a "String doctype" since the doctype is already contained in the params.
614
 * '
615 372 berkley
 * 'Revision 1.13  2000/08/14 21:26:12  berkley
616
 * 'Added createSQuery() to handle structured queries of an arbitrary number of parameters.  Also modified createQuery() to handle a null query in a graceful manner.
617
 * '
618 350 berkley
 * 'Revision 1.12  2000/08/14 20:53:33  jones
619
 * 'Added "release" keyword to all metacat source files so that the release
620
 * 'number will be evident in software distributions.
621
 * '
622 349 jones
 * 'Revision 1.11  2000/08/11 18:26:07  berkley
623
 * 'added createSQuery
624
 * '
625 342 berkley
 * 'Revision 1.10  2000/07/26 20:40:41  higgins
626
 * 'no message
627
 * '
628 296 higgins
 * 'Revision 1.9  2000/06/26 10:35:04  jones
629
 * 'Merged in substantial changes to DBWriter and associated classes and to
630
 * 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
631
 * 'functions.  The command line tools and the parameters for the
632
 * 'servlet have changed substantially.
633
 * '
634 203 jones
 * 'Revision 1.8.2.2  2000/06/25 23:38:16  jones
635
 * 'Added RCSfile keyword
636
 * '
637
 * 'Revision 1.8.2.1  2000/06/25 23:34:17  jones
638
 * 'Changed documentation formatting, added log entries at bottom of source files
639
 * ''
640
 */