Project

General

Profile

1 155 jones
/**
2 203 jones
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10 349 jones
 *    Release: @release@
11 155 jones
 *
12 203 jones
 *   '$Author$'
13
 *     '$Date$'
14
 * '$Revision$'
15 155 jones
 */
16
17
package edu.ucsb.nceas.metacat;
18
19
import java.io.*;
20
import java.util.Stack;
21 158 jones
import java.util.Vector;
22 159 jones
import java.util.Enumeration;
23 155 jones
24 185 jones
import org.xml.sax.Attributes;
25 158 jones
import org.xml.sax.InputSource;
26
import org.xml.sax.SAXException;
27
import org.xml.sax.SAXParseException;
28 185 jones
import org.xml.sax.XMLReader;
29
import org.xml.sax.helpers.XMLReaderFactory;
30
import org.xml.sax.helpers.DefaultHandler;
31 155 jones
32 402 berkley
/**
33 172 jones
 * A Class that represents a structured query, and can be
34
 * constructed from an XML serialization conforming to @see pathquery.dtd.
35
 * The printSQL() method can be used to print a SQL serialization of the query.
36 155 jones
 */
37 185 jones
public class QuerySpecification extends DefaultHandler {
38 155 jones
39 402 berkley
  private boolean containsExtendedSQL=false;
40
41 158 jones
  // Query data structures
42
  private String meta_file_id;
43
  private String querytitle;
44 172 jones
  private Vector doctypeList;
45 402 berkley
  private Vector returnFieldList;
46 158 jones
  private QueryGroup query = null;
47
48
  private Stack elementStack;
49
  private Stack queryStack;
50 159 jones
  private String currentValue;
51
  private String currentPathexpr;
52 172 jones
  private String parserName = null;
53 158 jones
54 155 jones
  /**
55
   * construct an instance of the QuerySpecification class
56
   *
57 172 jones
   * @param queryspec the XML representation of the query (should conform
58
   *                  to pathquery.dtd) as a Reader
59
   * @param parserName the fully qualified name of a Java Class implementing
60 185 jones
   *                  the org.xml.sax.XMLReader interface
61 155 jones
   */
62 172 jones
  public QuerySpecification( Reader queryspec, String parserName )
63
         throws IOException {
64 155 jones
    super();
65 402 berkley
66 172 jones
    // Initialize the class variables
67
    doctypeList = new Vector();
68 158 jones
    elementStack = new Stack();
69
    queryStack   = new Stack();
70 402 berkley
    returnFieldList = new Vector();
71 172 jones
    this.parserName = parserName;
72 158 jones
73
    // Initialize the parser and read the queryspec
74 185 jones
    XMLReader parser = initializeParser();
75 181 jones
    if (parser == null) {
76
      System.err.println("SAX parser not instantiated properly.");
77
    }
78 155 jones
    try {
79
      parser.parse(new InputSource(queryspec));
80
    } catch (SAXException e) {
81 180 jones
      System.err.println("error parsing data");
82
      System.err.println(e.getMessage());
83 155 jones
    }
84
  }
85
86
  /**
87
   * construct an instance of the QuerySpecification class
88
   *
89 172 jones
   * @param queryspec the XML representation of the query (should conform
90
   *                  to pathquery.dtd) as a String
91
   * @param parserName the fully qualified name of a Java Class implementing
92
   *                  the org.xml.sax.Parser interface
93 155 jones
   */
94 172 jones
  public QuerySpecification( String queryspec, String parserName )
95
         throws IOException {
96
    this(new StringReader(queryspec), parserName);
97 155 jones
  }
98
99
  /** Main routine for testing */
100
  static public void main(String[] args) {
101
102
     if (args.length < 1) {
103
       System.err.println("Wrong number of arguments!!!");
104
       System.err.println("USAGE: java QuerySpecification <xmlfile>");
105
       return;
106
     } else {
107
       String xmlfile  = args[0];
108
109
       try {
110 203 jones
         MetaCatUtil util = new MetaCatUtil();
111 155 jones
         FileReader xml = new FileReader(new File(xmlfile));
112 203 jones
         QuerySpecification qspec =
113
                 new QuerySpecification(xml, util.getOption("saxparser"));
114 172 jones
         System.out.println(qspec.printSQL());
115 181 jones
116 155 jones
       } catch (IOException e) {
117
         System.err.println(e.getMessage());
118
       }
119
120
     }
121
  }
122 402 berkley
123
  /**
124
   * Returns true if the parsed query contains and extended xml query
125
   * (i.e. there is at least one &lt;returnfield&gt; in the pathquery document)
126
   */
127
  public boolean containsExtendedSQL()
128
  {
129
    if(containsExtendedSQL)
130
    {
131
      return true;
132
    }
133
    else
134
    {
135
      return false;
136
    }
137
  }
138
139
  /**
140
   * Accessor method to return a vector of the extended return fields as
141
   * defined in the &lt;returnfield&gt; tag in the pathquery dtd.
142
   */
143
  public Vector getReturnFieldList()
144
  {
145
    return this.returnFieldList;
146
  }
147 155 jones
148 172 jones
  /**
149
   * Set up the SAX parser for reading the XML serialized query
150
   */
151 185 jones
  private XMLReader initializeParser() {
152
    XMLReader parser = null;
153 172 jones
154 155 jones
    // Set up the SAX document handlers for parsing
155
    try {
156
157
      // Get an instance of the parser
158 185 jones
      parser = XMLReaderFactory.createXMLReader(parserName);
159 155 jones
160 185 jones
      // Set the ContentHandler to this instance
161
      parser.setContentHandler(this);
162 155 jones
163 185 jones
      // Set the error Handler to this instance
164 158 jones
      parser.setErrorHandler(this);
165 155 jones
166
    } catch (Exception e) {
167
       System.err.println(e.toString());
168
    }
169
170
    return parser;
171
  }
172
173 172 jones
  /**
174
   * callback method used by the SAX Parser when the start tag of an
175
   * element is detected. Used in this context to parse and store
176
   * the query information in class variables.
177
   */
178 185 jones
  public void startElement (String uri, String localName,
179
                            String qName, Attributes atts)
180 155 jones
         throws SAXException {
181 185 jones
    BasicNode currentNode = new BasicNode(localName);
182 159 jones
    // add attributes to BasicNode here
183
    if (atts != null) {
184
      int len = atts.getLength();
185
      for (int i = 0; i < len; i++) {
186 185 jones
        currentNode.setAttribute(atts.getLocalName(i), atts.getValue(i));
187 159 jones
      }
188
    }
189
190 158 jones
    elementStack.push(currentNode);
191 159 jones
    if (currentNode.getTagName().equals("querygroup")) {
192 158 jones
      QueryGroup currentGroup = new QueryGroup(
193 178 jones
                                currentNode.getAttribute("operator"));
194 158 jones
      if (query == null) {
195
        query = currentGroup;
196 159 jones
      } else {
197
        QueryGroup parentGroup = (QueryGroup)queryStack.peek();
198
        parentGroup.addChild(currentGroup);
199 158 jones
      }
200 159 jones
      queryStack.push(currentGroup);
201 158 jones
    }
202 155 jones
  }
203
204 172 jones
  /**
205
   * callback method used by the SAX Parser when the end tag of an
206
   * element is detected. Used in this context to parse and store
207
   * the query information in class variables.
208
   */
209 185 jones
  public void endElement (String uri, String localName,
210
                          String qName) throws SAXException {
211 158 jones
    BasicNode leaving = (BasicNode)elementStack.pop();
212 159 jones
    if (leaving.getTagName().equals("queryterm")) {
213
      boolean isCaseSensitive = (new Boolean(
214
              leaving.getAttribute("casesensitive"))).booleanValue();
215
      QueryTerm currentTerm = null;
216
      if (currentPathexpr == null) {
217
        currentTerm = new QueryTerm(isCaseSensitive,
218
                      leaving.getAttribute("searchmode"),currentValue);
219
      } else {
220
        currentTerm = new QueryTerm(isCaseSensitive,
221
                      leaving.getAttribute("searchmode"),currentValue,
222
                      currentPathexpr);
223
      }
224
      QueryGroup currentGroup = (QueryGroup)queryStack.peek();
225
      currentGroup.addChild(currentTerm);
226
      currentValue = null;
227
      currentPathexpr = null;
228
    } else if (leaving.getTagName().equals("querygroup")) {
229
      QueryGroup leavingGroup = (QueryGroup)queryStack.pop();
230 158 jones
    }
231 155 jones
  }
232 158 jones
233 172 jones
  /**
234
   * callback method used by the SAX Parser when the text sequences of an
235
   * xml stream are detected. Used in this context to parse and store
236
   * the query information in class variables.
237
   */
238 158 jones
  public void characters(char ch[], int start, int length) {
239
240
    String inputString = new String(ch, start, length);
241
    BasicNode currentNode = (BasicNode)elementStack.peek();
242
    String currentTag = currentNode.getTagName();
243
    if (currentTag.equals("meta_file_id")) {
244
      meta_file_id = inputString;
245
    } else if (currentTag.equals("querytitle")) {
246
      querytitle = inputString;
247 159 jones
    } else if (currentTag.equals("value")) {
248
      currentValue = inputString;
249
    } else if (currentTag.equals("pathexpr")) {
250
      currentPathexpr = inputString;
251 172 jones
    } else if (currentTag.equals("returndoctype")) {
252
      doctypeList.add(inputString);
253 402 berkley
    } else if (currentTag.equals("returnfield")) {
254
      returnFieldList.add(inputString);
255
      containsExtendedSQL = true;
256 158 jones
    }
257
  }
258
259 172 jones
260
  /**
261
   * create a SQL serialization of the query that this instance represents
262
   */
263
  public String printSQL() {
264 170 jones
    StringBuffer self = new StringBuffer();
265
266 402 berkley
    self.append("SELECT docid,docname,doctype,doctitle,");
267
    self.append("date_created, date_updated ");
268 172 jones
    self.append("FROM xml_documents WHERE docid IN (");
269 170 jones
270 178 jones
    // This determines the documents that meet the query conditions
271 172 jones
    self.append(query.printSQL());
272
273
    self.append(") ");
274 402 berkley
275 172 jones
    // Add SQL to filter for doctypes requested in the query
276
    if (!doctypeList.isEmpty()) {
277
      boolean firstdoctype = true;
278
      self.append(" AND (");
279
      Enumeration en = doctypeList.elements();
280
      while (en.hasMoreElements()) {
281
        String currentDoctype = (String)en.nextElement();
282
        if (firstdoctype) {
283 402 berkley
           firstdoctype = false;
284
           self.append(" doctype = '" + currentDoctype + "'");
285 172 jones
        } else {
286
          self.append(" OR doctype = '" + currentDoctype + "'");
287
        }
288
      }
289
      self.append(") ");
290
    }
291 402 berkley
292 170 jones
    return self.toString();
293
  }
294 402 berkley
295
  /**
296
   * This method prints sql based upon the &lt;returnfield&gt; tag in the
297
   * pathquery document.  This allows for customization of the
298
   * returned fields
299
   */
300
  public String printExtendedSQL()
301
  {
302
    StringBuffer self = new StringBuffer();
303 423 berkley
    self.append("select xml_nodes.docid, xml_index.path, xml_nodes.nodedata ");
304
    self.append("from xml_index, xml_nodes where xml_index.nodeid=");
305
    self.append("xml_nodes.parentnodeid and (xml_index.path like '");
306 402 berkley
    boolean firstfield = true;
307
    //put the returnfields into the query
308
    //the for loop allows for multiple fields
309
    for(int i=0; i<returnFieldList.size(); i++)
310
    {
311
      if(firstfield)
312
      {
313
        firstfield = false;
314 405 berkley
        self.append((String)returnFieldList.elementAt(i));
315 402 berkley
        self.append("' ");
316
      }
317
      else
318
      {
319 423 berkley
        self.append("or xml_index.path like '");
320 405 berkley
        self.append((String)returnFieldList.elementAt(i));
321 402 berkley
        self.append("' ");
322
      }
323
    }
324 423 berkley
    self.append(") AND xml_nodes.docid in (");
325 402 berkley
    self.append(query.printSQL());
326
    self.append(")");
327 423 berkley
    self.append(" AND xml_nodes.nodetype = 'TEXT'");
328 405 berkley
329 423 berkley
    //System.out.println(self.toString());
330 402 berkley
    return self.toString();
331
  }
332 453 berkley
333 172 jones
  /**
334 453 berkley
   *
335
   */
336
  public static String printPackageSQL()
337
  {
338
    StringBuffer self = new StringBuffer();
339
    self.append("select z.nodedata, x.nodedata, y.nodedata from ");
340
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
341
    self.append("'package/relation/subject') s, (select nodeid, parentnodeid ");
342
    self.append("from xml_index where path like ");
343
    self.append("'package/relation/relationship') rel, ");
344
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
345
    self.append("'package/relation/object') o, ");
346
    self.append("xml_nodes x, xml_nodes y, xml_nodes z ");
347
    self.append("where s.parentnodeid = rel.parentnodeid ");
348
    self.append("and rel.parentnodeid = o.parentnodeid ");
349
    self.append("and x.parentnodeid in rel.nodeid ");
350
    self.append("and y.parentnodeid in o.nodeid ");
351
    self.append("and z.parentnodeid in s.nodeid ");
352
    //self.append("and z.nodedata like '%");
353
    //self.append(docid);
354
    //self.append("%'");
355
    return self.toString();
356
  }
357
358
  /**
359 172 jones
   * create a String description of the query that this instance represents.
360
   * This should become a way to get the XML serialization of the query.
361
   */
362 159 jones
  public String toString() {
363
    return "meta_file_id=" + meta_file_id + "\n" +
364
           "querytitle=" + querytitle + "\n" + query;
365
  }
366
367 158 jones
  /** a utility class that represents a group of terms in a query */
368
  private class QueryGroup {
369 178 jones
    private String operator = null;  // indicates how query terms are combined
370
    private Vector children = null;  // the list of query terms and groups
371 158 jones
372 172 jones
    /**
373
     * construct a new QueryGroup
374
     *
375 178 jones
     * @param operator the boolean conector used to connect query terms
376 172 jones
     *                    in this query group
377
     */
378 178 jones
    public QueryGroup(String operator) {
379
      this.operator = operator;
380 158 jones
      children = new Vector();
381
    }
382
383 172 jones
    /**
384
     * Add a child QueryGroup to this QueryGroup
385
     *
386
     * @param qgroup the query group to be added to the list of terms
387
     */
388 158 jones
    public void addChild(QueryGroup qgroup) {
389
      children.add((Object)qgroup);
390
    }
391
392 172 jones
    /**
393
     * Add a child QueryTerm to this QueryGroup
394
     *
395
     * @param qterm the query term to be added to the list of terms
396
     */
397 158 jones
    public void addChild(QueryTerm qterm) {
398
      children.add((Object)qterm);
399
    }
400
401 172 jones
    /**
402
     * Retrieve an Enumeration of query terms for this QueryGroup
403
     */
404 158 jones
    public Enumeration getChildren() {
405
      return children.elements();
406
    }
407 159 jones
408 172 jones
    /**
409
     * create a SQL serialization of the query that this instance represents
410
     */
411
    public String printSQL() {
412 170 jones
      StringBuffer self = new StringBuffer();
413
      boolean first = true;
414
415
      self.append("(");
416
417
      Enumeration en= getChildren();
418
      while (en.hasMoreElements()) {
419
        Object qobject = en.nextElement();
420
        if (first) {
421
          first = false;
422
        } else {
423 178 jones
          self.append(" " + operator + " ");
424 170 jones
        }
425
        if (qobject instanceof QueryGroup) {
426
          QueryGroup qg = (QueryGroup)qobject;
427 172 jones
          self.append(qg.printSQL());
428 170 jones
        } else if (qobject instanceof QueryTerm) {
429
          QueryTerm qt = (QueryTerm)qobject;
430 172 jones
          self.append(qt.printSQL());
431 170 jones
        } else {
432
          System.err.println("qobject wrong type: fatal error");
433
        }
434
      }
435
      self.append(") \n");
436
      return self.toString();
437
    }
438
439 172 jones
    /**
440
     * create a String description of the query that this instance represents.
441
     * This should become a way to get the XML serialization of the query.
442
     */
443 159 jones
    public String toString() {
444
      StringBuffer self = new StringBuffer();
445
446 178 jones
      self.append("  (Query group operator=" + operator + "\n");
447 159 jones
      Enumeration en= getChildren();
448
      while (en.hasMoreElements()) {
449
        Object qobject = en.nextElement();
450
        self.append(qobject);
451
      }
452
      self.append("  )\n");
453
      return self.toString();
454
    }
455 158 jones
  }
456
457
  /** a utility class that represents a single term in a query */
458
  private class QueryTerm {
459
    private boolean casesensitive = false;
460
    private String searchmode = null;
461
    private String value = null;
462
    private String pathexpr = null;
463
464 172 jones
    /**
465
     * Construct a new instance of a query term for a free text search
466
     * (using the value only)
467
     *
468
     * @param casesensitive flag indicating whether case is used to match
469
     * @param searchmode determines what kind of substring match is performed
470
     *        (one of starts-with|ends-with|contains|matches-exactly)
471
     * @param value the text value to match
472
     */
473 158 jones
    public QueryTerm(boolean casesensitive, String searchmode,
474
                     String value) {
475
      this.casesensitive = casesensitive;
476
      this.searchmode = searchmode;
477
      this.value = value;
478
    }
479
480 172 jones
    /**
481
     * Construct a new instance of a query term for a structured search
482
     * (matching the value only for those nodes in the pathexpr)
483
     *
484
     * @param casesensitive flag indicating whether case is used to match
485
     * @param searchmode determines what kind of substring match is performed
486
     *        (one of starts-with|ends-with|contains|matches-exactly)
487
     * @param value the text value to match
488
     * @param pathexpr the hierarchical path to the nodes to be searched
489
     */
490 158 jones
    public QueryTerm(boolean casesensitive, String searchmode,
491
                     String value, String pathexpr) {
492
      this(casesensitive, searchmode, value);
493
      this.pathexpr = pathexpr;
494
    }
495
496 172 jones
    /** determine if the QueryTerm is case sensitive */
497 158 jones
    public boolean isCaseSensitive() {
498
      return casesensitive;
499
    }
500
501 172 jones
    /** get the searchmode parameter */
502 158 jones
    public String getSearchMode() {
503
      return searchmode;
504
    }
505
506 172 jones
    /** get the Value parameter */
507 158 jones
    public String getValue() {
508
      return value;
509
    }
510
511 172 jones
    /** get the path expression parameter */
512 158 jones
    public String getPathExpression() {
513
      return pathexpr;
514
    }
515 159 jones
516 172 jones
    /**
517
     * create a SQL serialization of the query that this instance represents
518
     */
519
    public String printSQL() {
520 170 jones
      StringBuffer self = new StringBuffer();
521
522
      // Uppercase the search string if case match is not important
523
      String casevalue = null;
524
      String nodedataterm = null;
525
526
      if (casesensitive) {
527
        nodedataterm = "nodedata";
528
        casevalue = value;
529
      } else {
530
        nodedataterm = "UPPER(nodedata)";
531
        casevalue = value.toUpperCase();
532
      }
533
534
      // Add appropriate wildcards to search string
535
      String searchvalue = null;
536
      if (searchmode.equals("starts-with")) {
537
        searchvalue = casevalue + "%";
538
      } else if (searchmode.equals("ends-with")) {
539
        searchvalue = "%" + casevalue;
540
      } else if (searchmode.equals("contains")) {
541
        searchvalue = "%" + casevalue + "%";
542
      } else {
543
        searchvalue = casevalue;
544
      }
545
546 178 jones
      self.append("SELECT DISTINCT docid FROM xml_nodes WHERE \n");
547 170 jones
548
      if (pathexpr != null) {
549
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
550
        self.append("AND parentnodeid IN ");
551
        self.append("(SELECT nodeid FROM xml_index WHERE path LIKE " +
552
                    "'" +  pathexpr + "') " );
553
      } else {
554
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
555
      }
556
557
      return self.toString();
558
    }
559
560 172 jones
    /**
561
     * create a String description of the query that this instance represents.
562
     * This should become a way to get the XML serialization of the query.
563
     */
564 159 jones
    public String toString() {
565
      StringBuffer self = new StringBuffer();
566
567
      self.append("    Query Term iscasesensitive=" + casesensitive + "\n");
568
      self.append("               searchmode=" + searchmode + "\n");
569
      self.append("               value=" + value + "\n");
570
      if (pathexpr != null) {
571
        self.append("               pathexpr=" + pathexpr + "\n");
572
      }
573
574
      return self.toString();
575
    }
576 158 jones
  }
577 155 jones
}
578 203 jones
579
/**
580
 * '$Log$
581 453 berkley
 * 'Revision 1.14  2000/08/31 21:20:39  berkley
582
 * 'changed xslf for new returnfield scheme.  the returnfields are now returned as <param name="<returnfield>"> tags.
583
 * 'hThe sql for the returnfield query was redone to fix a previous problem with slow queries
584
 * '
585 423 berkley
 * 'Revision 1.13  2000/08/23 22:55:38  berkley
586
 * 'changed the field names to be case-sensitive in the returnfields
587
 * '
588 405 berkley
 * 'Revision 1.12  2000/08/23 17:29:05  berkley
589
 * 'added support for the returnfield parameter
590
 * '-QuerySpecification now sets a flag (containsExtendedSQL) when there are returnfield items in the pathquery document.
591
 * 'the accessor method containsExtendedSQL() can be called by other classes to check for extended return parameters
592
 * '-getReturnFields returns a Vector of the names of each specified return field.
593
 * '-printExtendedSQL returns a string of the extra SQL statements required for the query.
594
 * '
595
 * '-a calling class should first check containsExtendedSQL to make sure that there are extra fields being returned, then call printExtendedSQL to
596
 * 'insert the extra SQL into the query.  (Note that this is how DBQuery implements this.)
597
 * '
598 402 berkley
 * 'Revision 1.11  2000/08/14 20:53:34  jones
599
 * 'Added "release" keyword to all metacat source files so that the release
600
 * 'number will be evident in software distributions.
601
 * '
602 349 jones
 * 'Revision 1.10  2000/06/26 10:35:05  jones
603
 * 'Merged in substantial changes to DBWriter and associated classes and to
604
 * 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
605
 * 'functions.  The command line tools and the parameters for the
606
 * 'servlet have changed substantially.
607
 * '
608 203 jones
 * 'Revision 1.9.2.3  2000/06/25 23:38:17  jones
609
 * 'Added RCSfile keyword
610
 * '
611
 * 'Revision 1.9.2.2  2000/06/25 23:34:18  jones
612
 * 'Changed documentation formatting, added log entries at bottom of source files
613
 * ''
614
 */