Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be 
4
 *             constructed from an XML serialization conforming to 
5
 *             pathquery.dtd. The printSQL() method can be used to print 
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *    Release: @release@
11
 *
12
 *   '$Author: berkley $'
13
 *     '$Date: 2000-09-15 12:52:12 -0700 (Fri, 15 Sep 2000) $'
14
 * '$Revision: 453 $'
15
 */
16

    
17
package edu.ucsb.nceas.metacat;
18

    
19
import java.io.*;
20
import java.util.Stack;
21
import java.util.Vector;
22
import java.util.Enumeration;
23

    
24
import org.xml.sax.Attributes;
25
import org.xml.sax.InputSource;
26
import org.xml.sax.SAXException;
27
import org.xml.sax.SAXParseException;
28
import org.xml.sax.XMLReader;
29
import org.xml.sax.helpers.XMLReaderFactory;
30
import org.xml.sax.helpers.DefaultHandler;
31

    
32
/**
33
 * A Class that represents a structured query, and can be 
34
 * constructed from an XML serialization conforming to @see pathquery.dtd. 
35
 * The printSQL() method can be used to print a SQL serialization of the query.
36
 */
37
public class QuerySpecification extends DefaultHandler {
38
 
39
  private boolean containsExtendedSQL=false;
40
 
41
  // Query data structures
42
  private String meta_file_id;
43
  private String querytitle;
44
  private Vector doctypeList;
45
  private Vector returnFieldList;
46
  private QueryGroup query = null;
47

    
48
  private Stack elementStack;
49
  private Stack queryStack;
50
  private String currentValue;
51
  private String currentPathexpr;
52
  private String parserName = null;
53

    
54
  /**
55
   * construct an instance of the QuerySpecification class 
56
   *
57
   * @param queryspec the XML representation of the query (should conform
58
   *                  to pathquery.dtd) as a Reader
59
   * @param parserName the fully qualified name of a Java Class implementing
60
   *                  the org.xml.sax.XMLReader interface
61
   */
62
  public QuerySpecification( Reader queryspec, String parserName ) 
63
         throws IOException {
64
    super();
65
    
66
    // Initialize the class variables
67
    doctypeList = new Vector();
68
    elementStack = new Stack();
69
    queryStack   = new Stack();
70
    returnFieldList = new Vector();
71
    this.parserName = parserName;
72

    
73
    // Initialize the parser and read the queryspec
74
    XMLReader parser = initializeParser();
75
    if (parser == null) {
76
      System.err.println("SAX parser not instantiated properly.");
77
    }
78
    try {
79
      parser.parse(new InputSource(queryspec));
80
    } catch (SAXException e) {
81
      System.err.println("error parsing data");
82
      System.err.println(e.getMessage());
83
    }
84
  }
85

    
86
  /**
87
   * construct an instance of the QuerySpecification class 
88
   *
89
   * @param queryspec the XML representation of the query (should conform
90
   *                  to pathquery.dtd) as a String
91
   * @param parserName the fully qualified name of a Java Class implementing
92
   *                  the org.xml.sax.Parser interface
93
   */
94
  public QuerySpecification( String queryspec, String parserName ) 
95
         throws IOException {
96
    this(new StringReader(queryspec), parserName);
97
  }
98

    
99
  /** Main routine for testing */
100
  static public void main(String[] args) {
101

    
102
     if (args.length < 1) {
103
       System.err.println("Wrong number of arguments!!!");
104
       System.err.println("USAGE: java QuerySpecification <xmlfile>");
105
       return;
106
     } else {
107
       String xmlfile  = args[0];
108
        
109
       try {
110
         MetaCatUtil util = new MetaCatUtil();
111
         FileReader xml = new FileReader(new File(xmlfile));
112
         QuerySpecification qspec = 
113
                 new QuerySpecification(xml, util.getOption("saxparser"));
114
         System.out.println(qspec.printSQL());
115

    
116
       } catch (IOException e) {
117
         System.err.println(e.getMessage());
118
       }
119
         
120
     }
121
  }
122
  
123
  /**
124
   * Returns true if the parsed query contains and extended xml query 
125
   * (i.e. there is at least one &lt;returnfield&gt; in the pathquery document)
126
   */
127
  public boolean containsExtendedSQL()
128
  {
129
    if(containsExtendedSQL)
130
    {
131
      return true;
132
    }
133
    else
134
    {
135
      return false;
136
    }
137
  }
138
  
139
  /**
140
   * Accessor method to return a vector of the extended return fields as
141
   * defined in the &lt;returnfield&gt; tag in the pathquery dtd.
142
   */
143
  public Vector getReturnFieldList()
144
  {
145
    return this.returnFieldList; 
146
  }
147

    
148
  /**
149
   * Set up the SAX parser for reading the XML serialized query
150
   */
151
  private XMLReader initializeParser() {
152
    XMLReader parser = null;
153

    
154
    // Set up the SAX document handlers for parsing
155
    try {
156

    
157
      // Get an instance of the parser
158
      parser = XMLReaderFactory.createXMLReader(parserName);
159

    
160
      // Set the ContentHandler to this instance
161
      parser.setContentHandler(this);
162

    
163
      // Set the error Handler to this instance
164
      parser.setErrorHandler(this);
165

    
166
    } catch (Exception e) {
167
       System.err.println(e.toString());
168
    }
169

    
170
    return parser;
171
  }
172

    
173
  /**
174
   * callback method used by the SAX Parser when the start tag of an 
175
   * element is detected. Used in this context to parse and store
176
   * the query information in class variables.
177
   */
178
  public void startElement (String uri, String localName, 
179
                            String qName, Attributes atts) 
180
         throws SAXException {
181
    BasicNode currentNode = new BasicNode(localName);
182
    // add attributes to BasicNode here
183
    if (atts != null) {
184
      int len = atts.getLength();
185
      for (int i = 0; i < len; i++) {
186
        currentNode.setAttribute(atts.getLocalName(i), atts.getValue(i));
187
      }
188
    }
189

    
190
    elementStack.push(currentNode); 
191
    if (currentNode.getTagName().equals("querygroup")) {
192
      QueryGroup currentGroup = new QueryGroup(
193
                                currentNode.getAttribute("operator"));
194
      if (query == null) {
195
        query = currentGroup;
196
      } else {
197
        QueryGroup parentGroup = (QueryGroup)queryStack.peek();
198
        parentGroup.addChild(currentGroup);
199
      }
200
      queryStack.push(currentGroup);
201
    }
202
  }
203

    
204
  /**
205
   * callback method used by the SAX Parser when the end tag of an 
206
   * element is detected. Used in this context to parse and store
207
   * the query information in class variables.
208
   */
209
  public void endElement (String uri, String localName,
210
                          String qName) throws SAXException {
211
    BasicNode leaving = (BasicNode)elementStack.pop(); 
212
    if (leaving.getTagName().equals("queryterm")) {
213
      boolean isCaseSensitive = (new Boolean(
214
              leaving.getAttribute("casesensitive"))).booleanValue();
215
      QueryTerm currentTerm = null;
216
      if (currentPathexpr == null) {
217
        currentTerm = new QueryTerm(isCaseSensitive,
218
                      leaving.getAttribute("searchmode"),currentValue);
219
      } else {
220
        currentTerm = new QueryTerm(isCaseSensitive,
221
                      leaving.getAttribute("searchmode"),currentValue,
222
                      currentPathexpr);
223
      }
224
      QueryGroup currentGroup = (QueryGroup)queryStack.peek();
225
      currentGroup.addChild(currentTerm);
226
      currentValue = null;
227
      currentPathexpr = null;
228
    } else if (leaving.getTagName().equals("querygroup")) {
229
      QueryGroup leavingGroup = (QueryGroup)queryStack.pop();
230
    }
231
  }
232

    
233
  /**
234
   * callback method used by the SAX Parser when the text sequences of an 
235
   * xml stream are detected. Used in this context to parse and store
236
   * the query information in class variables.
237
   */
238
  public void characters(char ch[], int start, int length) {
239

    
240
    String inputString = new String(ch, start, length);
241
    BasicNode currentNode = (BasicNode)elementStack.peek(); 
242
    String currentTag = currentNode.getTagName();
243
    if (currentTag.equals("meta_file_id")) {
244
      meta_file_id = inputString;
245
    } else if (currentTag.equals("querytitle")) {
246
      querytitle = inputString;
247
    } else if (currentTag.equals("value")) {
248
      currentValue = inputString;
249
    } else if (currentTag.equals("pathexpr")) {
250
      currentPathexpr = inputString;
251
    } else if (currentTag.equals("returndoctype")) {
252
      doctypeList.add(inputString);
253
    } else if (currentTag.equals("returnfield")) {
254
      returnFieldList.add(inputString);
255
      containsExtendedSQL = true;
256
    }
257
  }
258

    
259

    
260
  /**
261
   * create a SQL serialization of the query that this instance represents
262
   */
263
  public String printSQL() {
264
    StringBuffer self = new StringBuffer();
265

    
266
    self.append("SELECT docid,docname,doctype,doctitle,");
267
    self.append("date_created, date_updated ");
268
    self.append("FROM xml_documents WHERE docid IN (");
269

    
270
    // This determines the documents that meet the query conditions
271
    self.append(query.printSQL());
272

    
273
    self.append(") ");
274
 
275
    // Add SQL to filter for doctypes requested in the query
276
    if (!doctypeList.isEmpty()) {
277
      boolean firstdoctype = true;
278
      self.append(" AND ("); 
279
      Enumeration en = doctypeList.elements();
280
      while (en.hasMoreElements()) {
281
        String currentDoctype = (String)en.nextElement();
282
        if (firstdoctype) {
283
           firstdoctype = false;
284
           self.append(" doctype = '" + currentDoctype + "'"); 
285
        } else {
286
          self.append(" OR doctype = '" + currentDoctype + "'"); 
287
        }
288
      }
289
      self.append(") ");
290
    }
291
    
292
    return self.toString();
293
  }
294
  
295
  /**
296
   * This method prints sql based upon the &lt;returnfield&gt; tag in the
297
   * pathquery document.  This allows for customization of the 
298
   * returned fields
299
   */
300
  public String printExtendedSQL()
301
  {  
302
    StringBuffer self = new StringBuffer();
303
    self.append("select xml_nodes.docid, xml_index.path, xml_nodes.nodedata ");
304
    self.append("from xml_index, xml_nodes where xml_index.nodeid=");
305
    self.append("xml_nodes.parentnodeid and (xml_index.path like '");
306
    boolean firstfield = true;
307
    //put the returnfields into the query
308
    //the for loop allows for multiple fields
309
    for(int i=0; i<returnFieldList.size(); i++)
310
    {
311
      if(firstfield)
312
      {
313
        firstfield = false;
314
        self.append((String)returnFieldList.elementAt(i));
315
        self.append("' ");
316
      }
317
      else
318
      {
319
        self.append("or xml_index.path like '");
320
        self.append((String)returnFieldList.elementAt(i));
321
        self.append("' ");
322
      }
323
    }
324
    self.append(") AND xml_nodes.docid in (");
325
    self.append(query.printSQL());
326
    self.append(")");
327
    self.append(" AND xml_nodes.nodetype = 'TEXT'");
328

    
329
    //System.out.println(self.toString());
330
    return self.toString();
331
  }
332
   
333
  /**
334
   *
335
   */
336
  public static String printPackageSQL()
337
  {
338
    StringBuffer self = new StringBuffer();
339
    self.append("select z.nodedata, x.nodedata, y.nodedata from ");
340
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
341
    self.append("'package/relation/subject') s, (select nodeid, parentnodeid ");
342
    self.append("from xml_index where path like ");
343
    self.append("'package/relation/relationship') rel, ");
344
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
345
    self.append("'package/relation/object') o, ");
346
    self.append("xml_nodes x, xml_nodes y, xml_nodes z ");
347
    self.append("where s.parentnodeid = rel.parentnodeid ");
348
    self.append("and rel.parentnodeid = o.parentnodeid ");
349
    self.append("and x.parentnodeid in rel.nodeid ");
350
    self.append("and y.parentnodeid in o.nodeid ");
351
    self.append("and z.parentnodeid in s.nodeid ");
352
    //self.append("and z.nodedata like '%");
353
    //self.append(docid);
354
    //self.append("%'");
355
    return self.toString();
356
  }
357
  
358
  /**
359
   * create a String description of the query that this instance represents.
360
   * This should become a way to get the XML serialization of the query.
361
   */
362
  public String toString() {
363
    return "meta_file_id=" + meta_file_id + "\n" + 
364
           "querytitle=" + querytitle + "\n" + query;
365
  }
366

    
367
  /** a utility class that represents a group of terms in a query */
368
  private class QueryGroup {
369
    private String operator = null;  // indicates how query terms are combined
370
    private Vector children = null;  // the list of query terms and groups
371

    
372
    /** 
373
     * construct a new QueryGroup 
374
     *
375
     * @param operator the boolean conector used to connect query terms 
376
     *                    in this query group
377
     */
378
    public QueryGroup(String operator) {
379
      this.operator = operator;
380
      children = new Vector();
381
    }
382

    
383
    /** 
384
     * Add a child QueryGroup to this QueryGroup
385
     *
386
     * @param qgroup the query group to be added to the list of terms
387
     */
388
    public void addChild(QueryGroup qgroup) {
389
      children.add((Object)qgroup); 
390
    }
391

    
392
    /**
393
     * Add a child QueryTerm to this QueryGroup
394
     *
395
     * @param qterm the query term to be added to the list of terms
396
     */
397
    public void addChild(QueryTerm qterm) {
398
      children.add((Object)qterm); 
399
    }
400

    
401
    /**
402
     * Retrieve an Enumeration of query terms for this QueryGroup
403
     */
404
    public Enumeration getChildren() {
405
      return children.elements();
406
    }
407
   
408
    /**
409
     * create a SQL serialization of the query that this instance represents
410
     */
411
    public String printSQL() {
412
      StringBuffer self = new StringBuffer();
413
      boolean first = true;
414

    
415
      self.append("(");
416

    
417
      Enumeration en= getChildren();
418
      while (en.hasMoreElements()) {
419
        Object qobject = en.nextElement();
420
        if (first) {
421
          first = false;
422
        } else {
423
          self.append(" " + operator + " ");
424
        }
425
        if (qobject instanceof QueryGroup) {
426
          QueryGroup qg = (QueryGroup)qobject;
427
          self.append(qg.printSQL());
428
        } else if (qobject instanceof QueryTerm) {
429
          QueryTerm qt = (QueryTerm)qobject;
430
          self.append(qt.printSQL());
431
        } else {
432
          System.err.println("qobject wrong type: fatal error");
433
        }
434
      }
435
      self.append(") \n");
436
      return self.toString();
437
    }
438

    
439
    /**
440
     * create a String description of the query that this instance represents.
441
     * This should become a way to get the XML serialization of the query.
442
     */
443
    public String toString() {
444
      StringBuffer self = new StringBuffer();
445

    
446
      self.append("  (Query group operator=" + operator + "\n");
447
      Enumeration en= getChildren();
448
      while (en.hasMoreElements()) {
449
        Object qobject = en.nextElement();
450
        self.append(qobject);
451
      }
452
      self.append("  )\n");
453
      return self.toString();
454
    }
455
  }
456

    
457
  /** a utility class that represents a single term in a query */
458
  private class QueryTerm {
459
    private boolean casesensitive = false;
460
    private String searchmode = null;
461
    private String value = null;
462
    private String pathexpr = null;
463

    
464
    /**
465
     * Construct a new instance of a query term for a free text search
466
     * (using the value only)
467
     *
468
     * @param casesensitive flag indicating whether case is used to match
469
     * @param searchmode determines what kind of substring match is performed
470
     *        (one of starts-with|ends-with|contains|matches-exactly)
471
     * @param value the text value to match
472
     */
473
    public QueryTerm(boolean casesensitive, String searchmode, 
474
                     String value) {
475
      this.casesensitive = casesensitive;
476
      this.searchmode = searchmode;
477
      this.value = value;
478
    }
479

    
480
    /**
481
     * Construct a new instance of a query term for a structured search
482
     * (matching the value only for those nodes in the pathexpr)
483
     *
484
     * @param casesensitive flag indicating whether case is used to match
485
     * @param searchmode determines what kind of substring match is performed
486
     *        (one of starts-with|ends-with|contains|matches-exactly)
487
     * @param value the text value to match
488
     * @param pathexpr the hierarchical path to the nodes to be searched
489
     */
490
    public QueryTerm(boolean casesensitive, String searchmode, 
491
                     String value, String pathexpr) {
492
      this(casesensitive, searchmode, value);
493
      this.pathexpr = pathexpr;
494
    }
495

    
496
    /** determine if the QueryTerm is case sensitive */
497
    public boolean isCaseSensitive() {
498
      return casesensitive;
499
    }
500

    
501
    /** get the searchmode parameter */
502
    public String getSearchMode() {
503
      return searchmode;
504
    }
505
 
506
    /** get the Value parameter */
507
    public String getValue() {
508
      return value;
509
    }
510

    
511
    /** get the path expression parameter */
512
    public String getPathExpression() {
513
      return pathexpr;
514
    }
515

    
516
    /**
517
     * create a SQL serialization of the query that this instance represents
518
     */
519
    public String printSQL() {
520
      StringBuffer self = new StringBuffer();
521

    
522
      // Uppercase the search string if case match is not important
523
      String casevalue = null;
524
      String nodedataterm = null;
525

    
526
      if (casesensitive) {
527
        nodedataterm = "nodedata";
528
        casevalue = value;
529
      } else {
530
        nodedataterm = "UPPER(nodedata)";
531
        casevalue = value.toUpperCase();
532
      }
533

    
534
      // Add appropriate wildcards to search string
535
      String searchvalue = null;
536
      if (searchmode.equals("starts-with")) {
537
        searchvalue = casevalue + "%";
538
      } else if (searchmode.equals("ends-with")) {
539
        searchvalue = "%" + casevalue;
540
      } else if (searchmode.equals("contains")) {
541
        searchvalue = "%" + casevalue + "%";
542
      } else {
543
        searchvalue = casevalue;
544
      }
545

    
546
      self.append("SELECT DISTINCT docid FROM xml_nodes WHERE \n");
547

    
548
      if (pathexpr != null) {
549
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
550
        self.append("AND parentnodeid IN ");
551
        self.append("(SELECT nodeid FROM xml_index WHERE path LIKE " + 
552
                    "'" +  pathexpr + "') " );
553
      } else {
554
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
555
      }
556

    
557
      return self.toString();
558
    }
559

    
560
    /**
561
     * create a String description of the query that this instance represents.
562
     * This should become a way to get the XML serialization of the query.
563
     */
564
    public String toString() {
565
      StringBuffer self = new StringBuffer();
566

    
567
      self.append("    Query Term iscasesensitive=" + casesensitive + "\n");
568
      self.append("               searchmode=" + searchmode + "\n");
569
      self.append("               value=" + value + "\n");
570
      if (pathexpr != null) {
571
        self.append("               pathexpr=" + pathexpr + "\n");
572
      }
573

    
574
      return self.toString();
575
    }
576
  }
577
}
578

    
579
/**
580
 * '$Log$
581
 * 'Revision 1.14  2000/08/31 21:20:39  berkley
582
 * 'changed xslf for new returnfield scheme.  the returnfields are now returned as <param name="<returnfield>"> tags.
583
 * 'hThe sql for the returnfield query was redone to fix a previous problem with slow queries
584
 * '
585
 * 'Revision 1.13  2000/08/23 22:55:38  berkley
586
 * 'changed the field names to be case-sensitive in the returnfields
587
 * '
588
 * 'Revision 1.12  2000/08/23 17:29:05  berkley
589
 * 'added support for the returnfield parameter
590
 * '-QuerySpecification now sets a flag (containsExtendedSQL) when there are returnfield items in the pathquery document.
591
 * 'the accessor method containsExtendedSQL() can be called by other classes to check for extended return parameters
592
 * '-getReturnFields returns a Vector of the names of each specified return field.
593
 * '-printExtendedSQL returns a string of the extra SQL statements required for the query.
594
 * '
595
 * '-a calling class should first check containsExtendedSQL to make sure that there are extra fields being returned, then call printExtendedSQL to
596
 * 'insert the extra SQL into the query.  (Note that this is how DBQuery implements this.)
597
 * '
598
 * 'Revision 1.11  2000/08/14 20:53:34  jones
599
 * 'Added "release" keyword to all metacat source files so that the release
600
 * 'number will be evident in software distributions.
601
 * '
602
 * 'Revision 1.10  2000/06/26 10:35:05  jones
603
 * 'Merged in substantial changes to DBWriter and associated classes and to
604
 * 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
605
 * 'functions.  The command line tools and the parameters for the
606
 * 'servlet have changed substantially.
607
 * '
608
 * 'Revision 1.9.2.3  2000/06/25 23:38:17  jones
609
 * 'Added RCSfile keyword
610
 * '
611
 * 'Revision 1.9.2.2  2000/06/25 23:34:18  jones
612
 * 'Changed documentation formatting, added log entries at bottom of source files
613
 * ''
614
 */
(25-25/28)