Project

General

Profile

1 155 jones
/**
2 203 jones
 *  '$RCSfile$'
3
 *    Purpose: A Class that searches a relational DB for elements and
4
 *             attributes that have free text matches a query string,
5
 *             or structured query matches to a path specified node in the
6
 *             XML hierarchy.  It returns a result set consisting of the
7
 *             document ID for each document that satisfies the query
8
 *  Copyright: 2000 Regents of the University of California and the
9
 *             National Center for Ecological Analysis and Synthesis
10
 *    Authors: Matt Jones
11 349 jones
 *    Release: @release@
12 155 jones
 *
13 203 jones
 *   '$Author$'
14
 *     '$Date$'
15
 * '$Revision$'
16 155 jones
 */
17
18
package edu.ucsb.nceas.metacat;
19
20
import java.io.*;
21 401 berkley
import java.util.Vector;
22 155 jones
import java.net.URL;
23
import java.net.MalformedURLException;
24
import java.sql.*;
25
import java.util.Stack;
26
import java.util.Hashtable;
27
import java.util.Enumeration;
28
29
/**
30 172 jones
 * A Class that searches a relational DB for elements and
31
 * attributes that have free text matches a query string,
32
 * or structured query matches to a path specified node in the
33
 * XML hierarchy.  It returns a result set consisting of the
34
 * document ID for each document that satisfies the query
35 155 jones
 */
36
public class DBQuery {
37
38
  private Connection	conn = null;
39 172 jones
  private String	parserName = null;
40 155 jones
41
  /**
42
   * the main routine used to test the DBQuery utility.
43 184 jones
   * <p>
44
   * Usage: java DBQuery <xmlfile>
45 155 jones
   *
46 170 jones
   * @param xmlfile the filename of the xml file containing the query
47 155 jones
   */
48
  static public void main(String[] args) {
49
50 184 jones
     if (args.length < 1)
51 155 jones
     {
52
        System.err.println("Wrong number of arguments!!!");
53 184 jones
        System.err.println("USAGE: java DBQuery <xmlfile>");
54 155 jones
        return;
55
     } else {
56
        try {
57
58 170 jones
          String xmlfile  = args[0];
59 155 jones
60
          // Open a connection to the database
61 184 jones
          MetaCatUtil   util = new MetaCatUtil();
62
          Connection dbconn = util.openDBConnection();
63 172 jones
64 170 jones
          // Execute the query
65 184 jones
          DBQuery queryobj = new DBQuery(dbconn, util.getOption("saxparser"));
66 170 jones
          FileReader xml = new FileReader(new File(xmlfile));
67 155 jones
          Hashtable nodelist = null;
68 181 jones
          nodelist = queryobj.findDocuments(xml);
69 155 jones
70 172 jones
          // Print the reulting document listing
71 155 jones
          StringBuffer result = new StringBuffer();
72
          String document = null;
73 170 jones
          String docid = null;
74 155 jones
          result.append("<?xml version=\"1.0\"?>\n");
75 296 higgins
          result.append("<resultset>\n");
76
  // following line removed by Dan Higgins to avoid insertion of query XML inside returned XML doc
77
  //        result.append("  <query>" + xmlfile + "</query>\n");
78 155 jones
          Enumeration doclist = nodelist.keys();
79
          while (doclist.hasMoreElements()) {
80 170 jones
            docid = (String)doclist.nextElement();
81 155 jones
            document = (String)nodelist.get(docid);
82
            result.append("  <document>\n    " + document +
83
                          "\n  </document>\n");
84
          }
85
          result.append("</resultset>\n");
86
87
          System.out.println(result);
88
89
        } catch (Exception e) {
90
          System.err.println("EXCEPTION HANDLING REQUIRED");
91
          System.err.println(e.getMessage());
92
          e.printStackTrace(System.err);
93
        }
94
     }
95
  }
96
97
  /**
98
   * construct an instance of the DBQuery class
99
   *
100
   * <p>Generally, one would call the findDocuments() routine after creating
101
   * an instance to specify the search query</p>
102
   *
103
   * @param conn the JDBC connection that we use for the query
104 172 jones
   * @param parserName the fully qualified name of a Java class implementing
105 185 jones
   *                   the org.xml.sax.XMLReader interface
106 155 jones
   */
107 172 jones
  public DBQuery( Connection conn, String parserName )
108 155 jones
                  throws IOException,
109
                         SQLException,
110 172 jones
                         ClassNotFoundException {
111 155 jones
    this.conn = conn;
112 172 jones
    this.parserName = parserName;
113 155 jones
  }
114
115
  /**
116
   * routine to search the elements and attributes looking to match query
117
   *
118 178 jones
   * @param xmlquery the xml serialization of the query (@see pathquery.dtd)
119 155 jones
   */
120 178 jones
  public Hashtable findDocuments(Reader xmlquery) {
121 155 jones
      Hashtable	 docListResult = new Hashtable();
122
      PreparedStatement pstmt;
123 170 jones
      String docid = null;
124 155 jones
      String docname = null;
125
      String doctype = null;
126
      String doctitle = null;
127 401 berkley
      String createDate = null;
128
      String updateDate = null;
129
      String fieldname = null;
130
      String fielddata = null;
131 155 jones
      StringBuffer document = null;
132
133
      try {
134 172 jones
        // Get the XML query and covert it into a SQL statment
135 178 jones
        QuerySpecification qspec = new QuerySpecification(xmlquery,
136 172 jones
                                   parserName);
137 180 jones
        //System.out.println(qspec.printSQL());
138 172 jones
        pstmt = conn.prepareStatement( qspec.printSQL() );
139 155 jones
140 172 jones
        // Execute the SQL query using the JDBC connection
141 155 jones
        pstmt.execute();
142
        ResultSet rs = pstmt.getResultSet();
143
        boolean tableHasRows = rs.next();
144
        while (tableHasRows) {
145 170 jones
          docid = rs.getString(1);
146 155 jones
          docname = rs.getString(2);
147
          doctype = rs.getString(3);
148
          doctitle = rs.getString(4);
149 401 berkley
          createDate = rs.getString(5);
150
          updateDate = rs.getString(6);
151
152 155 jones
          document = new StringBuffer();
153 401 berkley
154 155 jones
          document.append("<docid>").append(docid).append("</docid>");
155
          if (docname != null) {
156
            document.append("<docname>" + docname + "</docname>");
157
          }
158
          if (doctype != null) {
159
            document.append("<doctype>" + doctype + "</doctype>");
160
          }
161
          if (doctitle != null) {
162
            document.append("<doctitle>" + doctitle + "</doctitle>");
163
          }
164 401 berkley
          if(createDate != null) {
165
            document.append("<createdate>" + createDate + "</createdate>");
166
          }
167
          if(updateDate != null) {
168
            document.append("<updatedate>" + updateDate + "</updatedate>");
169
          }
170 155 jones
171
          // Store the document id and the root node id
172 170 jones
          docListResult.put(docid,(String)document.toString());
173 155 jones
174
          // Advance to the next record in the cursor
175
          tableHasRows = rs.next();
176
        }
177 401 berkley
178
        if(qspec.containsExtendedSQL())
179
        {
180
          Vector extendedFields = new Vector(qspec.getReturnFieldList());
181
          Vector results = new Vector();
182
          pstmt = conn.prepareStatement(qspec.printExtendedSQL());
183
          pstmt.execute();
184
          rs = pstmt.getResultSet();
185
          tableHasRows = rs.next();
186
          while(tableHasRows)
187
          {
188
            docid = rs.getString(1);
189
            fieldname = rs.getString(2);
190
            fielddata = rs.getString(3);
191
192
            document = new StringBuffer();
193
194 423 berkley
            document.append("<param name=\"");
195 405 berkley
            document.append(fieldname);
196 423 berkley
            document.append("\">");
197 401 berkley
            document.append(fielddata);
198 423 berkley
            document.append("</param>");
199 401 berkley
200
            tableHasRows = rs.next();
201
            if(docListResult.containsKey(docid))
202
            {
203
              String removedelement = (String)docListResult.remove(docid);
204
              docListResult.put(docid, removedelement + document.toString());
205
            }
206
            else
207
            {
208
              docListResult.put(docid, document.toString());
209
            }
210
          }
211
        }
212 155 jones
        pstmt.close();
213
      } catch (SQLException e) {
214 180 jones
        System.err.println("Error getting id: " + e.getMessage());
215 170 jones
      } catch (IOException ioe) {
216
        System.err.println("Error printing qspec:");
217
        System.err.println(ioe.getMessage());
218 155 jones
      }
219 423 berkley
    //System.out.println("docListResult: ");
220
    //System.out.println(docListResult.toString());
221 155 jones
    return docListResult;
222
  }
223 342 berkley
224
  /**
225
   * format a structured query as an XML document that conforms
226
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
227
   * structured query engine
228
   *
229
   * @param params The list of parameters that  should be included in the query
230
   */
231 372 berkley
  public static String createSQuery(Hashtable params)
232 350 berkley
  {
233
    StringBuffer query = new StringBuffer();
234 342 berkley
    Enumeration elements;
235
    Enumeration keys;
236 372 berkley
    String doctype = null;
237
    String casesensitive = null;
238
    String searchmode = null;
239 342 berkley
    Object nextkey;
240
    Object nextelement;
241 350 berkley
    //add the xml headers
242
    query.append("<?xml version=\"1.0\"?>\n");
243 342 berkley
    query.append("<pathquery version=\"1.0\"><meta_file_id>");
244 350 berkley
245 342 berkley
    if(params.containsKey("meta_file_id"))
246
    {
247
      query.append( ((String[])params.get("meta_file_id"))[0]);
248
    	query.append("</meta_file_id>");
249
    }
250
    else
251
    {
252
      query.append("unspecified</meta_file_id>");
253
    }
254 350 berkley
255 342 berkley
    query.append("<querytitle>");
256
    if(params.containsKey("querytitle"))
257
    {
258
      query.append(((String[])params.get("querytitle"))[0]);
259
    	query.append("</querytitle>");
260
    }
261
    else
262
    {
263
    	query.append("unspecified</querytitle>");
264
    }
265 372 berkley
266
    if(params.containsKey("doctype"))
267
    {
268
      doctype = ((String[])params.get("doctype"))[0];
269
    }
270
    else
271
    {
272
      doctype = "ANY";
273
    }
274
275 401 berkley
    if(params.containsKey("returnfield"))
276
    {
277
      String[] returnfield = ((String[])params.get("returnfield"));
278
      for(int i=0; i<returnfield.length; i++)
279
      {
280
        query.append("<returnfield>").append(returnfield[i]);
281
        query.append("</returnfield>");
282
      }
283
    }
284
285 350 berkley
    //if you don't limit the query by doctype, then it just creates
286
    //an empty returndoctype tag.
287 342 berkley
    if (!doctype.equals("any") &&
288
        !doctype.equals("ANY") &&
289
        !doctype.equals("") )
290
    {
291
       query.append("<returndoctype>");
292
       query.append(doctype).append("</returndoctype>");
293
    }
294 350 berkley
    else
295
    {
296
      query.append("<returndoctype></returndoctype>");
297
    }
298
299
    //allows the dynamic switching of boolean operators
300
    if(params.containsKey("operator"))
301
    {
302
      query.append("<querygroup operator=\"" +
303 342 berkley
    		        ((String[])params.get("operator"))[0] + "\">");
304 350 berkley
    }
305
    else
306
    { //the default operator is UNION
307
      query.append("<querygroup operator=\"UNION\">");
308
    }
309 342 berkley
310 372 berkley
    if(params.containsKey("casesensitive"))
311
    {
312
      casesensitive = ((String[])params.get("casesensitive"))[0];
313
    }
314
    else
315
    {
316
      casesensitive = "false";
317
    }
318
319
    if(params.containsKey("searchmode"))
320
    {
321
      searchmode = ((String[])params.get("searchmode"))[0];
322
    }
323
    else
324
    {
325
      searchmode = "contains";
326
    }
327 342 berkley
328
    //anyfield is a special case because it does a
329
    //free text search.  It does not have a <pathexpr>
330 350 berkley
    //tag.  This allows for a free text search within the structured
331
    //query.  This is useful if the INTERSECT operator is used.
332
    if(params.containsKey("anyfield"))
333 342 berkley
    {
334 372 berkley
       String[] anyfield = ((String[])params.get("anyfield"));
335
       //allow for more than one value for anyfield
336
       for(int i=0; i<anyfield.length; i++)
337 350 berkley
       {
338 372 berkley
         if(!anyfield[i].equals(""))
339
         {
340
           query.append("<queryterm casesensitive=\"" + casesensitive +
341
                        "\" " + "searchmode=\"" + searchmode + "\"><value>" +
342
    			              anyfield[i] +
343
    			              "</value></queryterm>");
344
         }
345 350 berkley
       }
346 342 berkley
    }
347
348
    //this while loop finds the rest of the parameters
349
    //and attempts to query for the field specified
350
    //by the parameter.
351
    elements = params.elements();
352
    keys = params.keys();
353
    while(keys.hasMoreElements() && elements.hasMoreElements())
354
    {
355
      nextkey = keys.nextElement();
356
    	nextelement = elements.nextElement();
357 372 berkley
358 342 berkley
    	//make sure we aren't querying for any of these
359
    	//parameters since the are already in the query
360
      //in one form or another.
361
    	if(!nextkey.toString().equals("doctype") &&
362
    		 !nextkey.toString().equals("action")  &&
363
    		 !nextkey.toString().equals("qformat") &&
364
    		 !nextkey.toString().equals("anyfield") &&
365 401 berkley
         !nextkey.toString().equals("returnfield") &&
366 372 berkley
    		 !nextkey.toString().equals("operator") )
367 342 berkley
    	{
368 372 berkley
        //allow for more than value per field name
369
        for(int i=0; i<((String[])nextelement).length; i++)
370
        {
371
          if(!((String[])nextelement)[i].equals(""))
372
          {
373
            query.append("<queryterm casesensitive=\"" + casesensitive +"\" " +
374
    				             "searchmode=\"" + searchmode + "\">" +
375
    		                 "<value>" +
376
                         //add the query value
377
    		                 ((String[])nextelement)[i] +
378
    		                 "</value><pathexpr>" +
379
    		                 //add the path to query by
380
                         nextkey.toString() +
381
                         "</pathexpr></queryterm>");
382
          }
383
        }
384 342 berkley
    	}
385
    }
386
    query.append("</querygroup></pathquery>");
387 350 berkley
    //append on the end of the xml and return the result as a string
388 342 berkley
    return query.toString();
389
  }
390
391 181 jones
  /**
392
   * format a simple free-text value query as an XML document that conforms
393
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
394
   * structured query engine
395
   *
396
   * @param value the text string to search for in the xml catalog
397
   * @param doctype the type of documents to include in the result set -- use
398
   *        "any" or "ANY" for unfiltered result sets
399
   */
400
   public static String createQuery(String value, String doctype) {
401
     StringBuffer xmlquery = new StringBuffer();
402
     xmlquery.append("<?xml version=\"1.0\"?>\n");
403
     xmlquery.append("<pathquery version=\"1.0\">");
404
     xmlquery.append("<meta_file_id>Unspecified</meta_file_id>");
405
     xmlquery.append("<querytitle>Unspecified</querytitle>");
406
407
     if (!doctype.equals("any") && !doctype.equals("ANY")) {
408
       xmlquery.append("<returndoctype>");
409
       xmlquery.append(doctype).append("</returndoctype>");
410
     }
411
412
     xmlquery.append("<querygroup operator=\"UNION\">");
413 350 berkley
     //chad added - 8/14
414
     //the if statement allows a query to gracefully handle a null
415
     //query.  Without this if a nullpointerException is thrown.
416
     if(!value.equals(""))
417
     {
418
       xmlquery.append("<queryterm casesensitive=\"false\" ");
419
       xmlquery.append("searchmode=\"contains\">");
420
       xmlquery.append("<value>").append(value).append("</value>");
421
       xmlquery.append("</queryterm>");
422
     }
423 181 jones
     xmlquery.append("</querygroup>");
424
     xmlquery.append("</pathquery>");
425
426
427
     return (xmlquery.toString());
428
   }
429
430
  /**
431
   * format a simple free-text value query as an XML document that conforms
432
   * to the pathquery.dtd and is appropriate for submission to the DBQuery
433
   * structured query engine
434
   *
435
   * @param value the text string to search for in the xml catalog
436
   */
437
   public static String createQuery(String value) {
438
     return createQuery(value, "any");
439
   }
440 155 jones
}
441 203 jones
442
/**
443
 * '$Log$
444 423 berkley
 * 'Revision 1.16  2000/08/23 22:55:25  berkley
445
 * 'changed the field names to be case-sensitive in the returnfields
446
 * '
447 405 berkley
 * 'Revision 1.15  2000/08/23 17:22:07  berkley
448
 * 'added support for the returnfield parameter
449
 * '-added the dynamic parameters to the returned hash table of documents
450
 * '
451 401 berkley
 * 'Revision 1.14  2000/08/17 16:02:34  berkley
452
 * 'Made changes to createSQuery to allow for multiple parameters of the same name.  Also changed the param list to include only "Hashtable params" without a "String doctype" since the doctype is already contained in the params.
453
 * '
454 372 berkley
 * 'Revision 1.13  2000/08/14 21:26:12  berkley
455
 * 'Added createSQuery() to handle structured queries of an arbitrary number of parameters.  Also modified createQuery() to handle a null query in a graceful manner.
456
 * '
457 350 berkley
 * 'Revision 1.12  2000/08/14 20:53:33  jones
458
 * 'Added "release" keyword to all metacat source files so that the release
459
 * 'number will be evident in software distributions.
460
 * '
461 349 jones
 * 'Revision 1.11  2000/08/11 18:26:07  berkley
462
 * 'added createSQuery
463
 * '
464 342 berkley
 * 'Revision 1.10  2000/07/26 20:40:41  higgins
465
 * 'no message
466
 * '
467 296 higgins
 * 'Revision 1.9  2000/06/26 10:35:04  jones
468
 * 'Merged in substantial changes to DBWriter and associated classes and to
469
 * 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
470
 * 'functions.  The command line tools and the parameters for the
471
 * 'servlet have changed substantially.
472
 * '
473 203 jones
 * 'Revision 1.8.2.2  2000/06/25 23:38:16  jones
474
 * 'Added RCSfile keyword
475
 * '
476
 * 'Revision 1.8.2.1  2000/06/25 23:34:17  jones
477
 * 'Changed documentation formatting, added log entries at bottom of source files
478
 * ''
479
 */