Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be 
4
 *             constructed from an XML serialization conforming to 
5
 *             pathquery.dtd. The printSQL() method can be used to print 
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *    Release: @release@
11
 *
12
 *   '$Author: berkley $'
13
 *     '$Date: 2000-08-31 14:20:39 -0700 (Thu, 31 Aug 2000) $'
14
 * '$Revision: 423 $'
15
 */
16

    
17
package edu.ucsb.nceas.metacat;
18

    
19
import java.io.*;
20
import java.util.Stack;
21
import java.util.Vector;
22
import java.util.Enumeration;
23

    
24
import org.xml.sax.Attributes;
25
import org.xml.sax.InputSource;
26
import org.xml.sax.SAXException;
27
import org.xml.sax.SAXParseException;
28
import org.xml.sax.XMLReader;
29
import org.xml.sax.helpers.XMLReaderFactory;
30
import org.xml.sax.helpers.DefaultHandler;
31

    
32
/**
33
 * A Class that represents a structured query, and can be 
34
 * constructed from an XML serialization conforming to @see pathquery.dtd. 
35
 * The printSQL() method can be used to print a SQL serialization of the query.
36
 */
37
public class QuerySpecification extends DefaultHandler {
38
 
39
  private boolean containsExtendedSQL=false;
40
 
41
  // Query data structures
42
  private String meta_file_id;
43
  private String querytitle;
44
  private Vector doctypeList;
45
  private Vector returnFieldList;
46
  private QueryGroup query = null;
47

    
48
  private Stack elementStack;
49
  private Stack queryStack;
50
  private String currentValue;
51
  private String currentPathexpr;
52
  private String parserName = null;
53

    
54
  /**
55
   * construct an instance of the QuerySpecification class 
56
   *
57
   * @param queryspec the XML representation of the query (should conform
58
   *                  to pathquery.dtd) as a Reader
59
   * @param parserName the fully qualified name of a Java Class implementing
60
   *                  the org.xml.sax.XMLReader interface
61
   */
62
  public QuerySpecification( Reader queryspec, String parserName ) 
63
         throws IOException {
64
    super();
65
    
66
    // Initialize the class variables
67
    doctypeList = new Vector();
68
    elementStack = new Stack();
69
    queryStack   = new Stack();
70
    returnFieldList = new Vector();
71
    this.parserName = parserName;
72

    
73
    // Initialize the parser and read the queryspec
74
    XMLReader parser = initializeParser();
75
    if (parser == null) {
76
      System.err.println("SAX parser not instantiated properly.");
77
    }
78
    try {
79
      parser.parse(new InputSource(queryspec));
80
    } catch (SAXException e) {
81
      System.err.println("error parsing data");
82
      System.err.println(e.getMessage());
83
    }
84
  }
85

    
86
  /**
87
   * construct an instance of the QuerySpecification class 
88
   *
89
   * @param queryspec the XML representation of the query (should conform
90
   *                  to pathquery.dtd) as a String
91
   * @param parserName the fully qualified name of a Java Class implementing
92
   *                  the org.xml.sax.Parser interface
93
   */
94
  public QuerySpecification( String queryspec, String parserName ) 
95
         throws IOException {
96
    this(new StringReader(queryspec), parserName);
97
  }
98

    
99
  /** Main routine for testing */
100
  static public void main(String[] args) {
101

    
102
     if (args.length < 1) {
103
       System.err.println("Wrong number of arguments!!!");
104
       System.err.println("USAGE: java QuerySpecification <xmlfile>");
105
       return;
106
     } else {
107
       String xmlfile  = args[0];
108
        
109
       try {
110
         MetaCatUtil util = new MetaCatUtil();
111
         FileReader xml = new FileReader(new File(xmlfile));
112
         QuerySpecification qspec = 
113
                 new QuerySpecification(xml, util.getOption("saxparser"));
114
         System.out.println(qspec.printSQL());
115

    
116
       } catch (IOException e) {
117
         System.err.println(e.getMessage());
118
       }
119
         
120
     }
121
  }
122
  
123
  /**
124
   * Returns true if the parsed query contains and extended xml query 
125
   * (i.e. there is at least one &lt;returnfield&gt; in the pathquery document)
126
   */
127
  public boolean containsExtendedSQL()
128
  {
129
    if(containsExtendedSQL)
130
    {
131
      return true;
132
    }
133
    else
134
    {
135
      return false;
136
    }
137
  }
138
  
139
  /**
140
   * Accessor method to return a vector of the extended return fields as
141
   * defined in the &lt;returnfield&gt; tag in the pathquery dtd.
142
   */
143
  public Vector getReturnFieldList()
144
  {
145
    return this.returnFieldList; 
146
  }
147

    
148
  /**
149
   * Set up the SAX parser for reading the XML serialized query
150
   */
151
  private XMLReader initializeParser() {
152
    XMLReader parser = null;
153

    
154
    // Set up the SAX document handlers for parsing
155
    try {
156

    
157
      // Get an instance of the parser
158
      parser = XMLReaderFactory.createXMLReader(parserName);
159

    
160
      // Set the ContentHandler to this instance
161
      parser.setContentHandler(this);
162

    
163
      // Set the error Handler to this instance
164
      parser.setErrorHandler(this);
165

    
166
    } catch (Exception e) {
167
       System.err.println(e.toString());
168
    }
169

    
170
    return parser;
171
  }
172

    
173
  /**
174
   * callback method used by the SAX Parser when the start tag of an 
175
   * element is detected. Used in this context to parse and store
176
   * the query information in class variables.
177
   */
178
  public void startElement (String uri, String localName, 
179
                            String qName, Attributes atts) 
180
         throws SAXException {
181
    BasicNode currentNode = new BasicNode(localName);
182
    // add attributes to BasicNode here
183
    if (atts != null) {
184
      int len = atts.getLength();
185
      for (int i = 0; i < len; i++) {
186
        currentNode.setAttribute(atts.getLocalName(i), atts.getValue(i));
187
      }
188
    }
189

    
190
    elementStack.push(currentNode); 
191
    if (currentNode.getTagName().equals("querygroup")) {
192
      QueryGroup currentGroup = new QueryGroup(
193
                                currentNode.getAttribute("operator"));
194
      if (query == null) {
195
        query = currentGroup;
196
      } else {
197
        QueryGroup parentGroup = (QueryGroup)queryStack.peek();
198
        parentGroup.addChild(currentGroup);
199
      }
200
      queryStack.push(currentGroup);
201
    }
202
  }
203

    
204
  /**
205
   * callback method used by the SAX Parser when the end tag of an 
206
   * element is detected. Used in this context to parse and store
207
   * the query information in class variables.
208
   */
209
  public void endElement (String uri, String localName,
210
                          String qName) throws SAXException {
211
    BasicNode leaving = (BasicNode)elementStack.pop(); 
212
    if (leaving.getTagName().equals("queryterm")) {
213
      boolean isCaseSensitive = (new Boolean(
214
              leaving.getAttribute("casesensitive"))).booleanValue();
215
      QueryTerm currentTerm = null;
216
      if (currentPathexpr == null) {
217
        currentTerm = new QueryTerm(isCaseSensitive,
218
                      leaving.getAttribute("searchmode"),currentValue);
219
      } else {
220
        currentTerm = new QueryTerm(isCaseSensitive,
221
                      leaving.getAttribute("searchmode"),currentValue,
222
                      currentPathexpr);
223
      }
224
      QueryGroup currentGroup = (QueryGroup)queryStack.peek();
225
      currentGroup.addChild(currentTerm);
226
      currentValue = null;
227
      currentPathexpr = null;
228
    } else if (leaving.getTagName().equals("querygroup")) {
229
      QueryGroup leavingGroup = (QueryGroup)queryStack.pop();
230
    }
231
  }
232

    
233
  /**
234
   * callback method used by the SAX Parser when the text sequences of an 
235
   * xml stream are detected. Used in this context to parse and store
236
   * the query information in class variables.
237
   */
238
  public void characters(char ch[], int start, int length) {
239

    
240
    String inputString = new String(ch, start, length);
241
    BasicNode currentNode = (BasicNode)elementStack.peek(); 
242
    String currentTag = currentNode.getTagName();
243
    if (currentTag.equals("meta_file_id")) {
244
      meta_file_id = inputString;
245
    } else if (currentTag.equals("querytitle")) {
246
      querytitle = inputString;
247
    } else if (currentTag.equals("value")) {
248
      currentValue = inputString;
249
    } else if (currentTag.equals("pathexpr")) {
250
      currentPathexpr = inputString;
251
    } else if (currentTag.equals("returndoctype")) {
252
      doctypeList.add(inputString);
253
    } else if (currentTag.equals("returnfield")) {
254
      returnFieldList.add(inputString);
255
      containsExtendedSQL = true;
256
    }
257
  }
258

    
259

    
260
  /**
261
   * create a SQL serialization of the query that this instance represents
262
   */
263
  public String printSQL() {
264
    StringBuffer self = new StringBuffer();
265

    
266
    self.append("SELECT docid,docname,doctype,doctitle,");
267
    self.append("date_created, date_updated ");
268
    self.append("FROM xml_documents WHERE docid IN (");
269

    
270
    // This determines the documents that meet the query conditions
271
    self.append(query.printSQL());
272

    
273
    self.append(") ");
274
 
275
    // Add SQL to filter for doctypes requested in the query
276
    if (!doctypeList.isEmpty()) {
277
      boolean firstdoctype = true;
278
      self.append(" AND ("); 
279
      Enumeration en = doctypeList.elements();
280
      while (en.hasMoreElements()) {
281
        String currentDoctype = (String)en.nextElement();
282
        if (firstdoctype) {
283
           firstdoctype = false;
284
           self.append(" doctype = '" + currentDoctype + "'"); 
285
        } else {
286
          self.append(" OR doctype = '" + currentDoctype + "'"); 
287
        }
288
      }
289
      self.append(") ");
290
    }
291
    
292
    return self.toString();
293
  }
294
  
295
  /**
296
   * This method prints sql based upon the &lt;returnfield&gt; tag in the
297
   * pathquery document.  This allows for customization of the 
298
   * returned fields
299
   * The parameters of the query are changed to upper case before the query
300
   * so that givenName is queryied the same as givenname.
301
   */
302
  public String printExtendedSQL()
303
  {  
304
    StringBuffer self = new StringBuffer();
305
    self.append("select xml_nodes.docid, xml_index.path, xml_nodes.nodedata ");
306
    self.append("from xml_index, xml_nodes where xml_index.nodeid=");
307
    self.append("xml_nodes.parentnodeid and (xml_index.path like '");
308
    boolean firstfield = true;
309
    //put the returnfields into the query
310
    //the for loop allows for multiple fields
311
    for(int i=0; i<returnFieldList.size(); i++)
312
    {
313
      if(firstfield)
314
      {
315
        firstfield = false;
316
        self.append((String)returnFieldList.elementAt(i));
317
        self.append("' ");
318
      }
319
      else
320
      {
321
        self.append("or xml_index.path like '");
322
        self.append((String)returnFieldList.elementAt(i));
323
        self.append("' ");
324
      }
325
    }
326
    self.append(") AND xml_nodes.docid in (");
327
    self.append(query.printSQL());
328
    self.append(")");
329
    self.append(" AND xml_nodes.nodetype = 'TEXT'");
330

    
331
    //System.out.println(self.toString());
332
    return self.toString();
333
  }
334

    
335
  /**
336
   * create a String description of the query that this instance represents.
337
   * This should become a way to get the XML serialization of the query.
338
   */
339
  public String toString() {
340
    return "meta_file_id=" + meta_file_id + "\n" + 
341
           "querytitle=" + querytitle + "\n" + query;
342
  }
343

    
344
  /** a utility class that represents a group of terms in a query */
345
  private class QueryGroup {
346
    private String operator = null;  // indicates how query terms are combined
347
    private Vector children = null;  // the list of query terms and groups
348

    
349
    /** 
350
     * construct a new QueryGroup 
351
     *
352
     * @param operator the boolean conector used to connect query terms 
353
     *                    in this query group
354
     */
355
    public QueryGroup(String operator) {
356
      this.operator = operator;
357
      children = new Vector();
358
    }
359

    
360
    /** 
361
     * Add a child QueryGroup to this QueryGroup
362
     *
363
     * @param qgroup the query group to be added to the list of terms
364
     */
365
    public void addChild(QueryGroup qgroup) {
366
      children.add((Object)qgroup); 
367
    }
368

    
369
    /**
370
     * Add a child QueryTerm to this QueryGroup
371
     *
372
     * @param qterm the query term to be added to the list of terms
373
     */
374
    public void addChild(QueryTerm qterm) {
375
      children.add((Object)qterm); 
376
    }
377

    
378
    /**
379
     * Retrieve an Enumeration of query terms for this QueryGroup
380
     */
381
    public Enumeration getChildren() {
382
      return children.elements();
383
    }
384
   
385
    /**
386
     * create a SQL serialization of the query that this instance represents
387
     */
388
    public String printSQL() {
389
      StringBuffer self = new StringBuffer();
390
      boolean first = true;
391

    
392
      self.append("(");
393

    
394
      Enumeration en= getChildren();
395
      while (en.hasMoreElements()) {
396
        Object qobject = en.nextElement();
397
        if (first) {
398
          first = false;
399
        } else {
400
          self.append(" " + operator + " ");
401
        }
402
        if (qobject instanceof QueryGroup) {
403
          QueryGroup qg = (QueryGroup)qobject;
404
          self.append(qg.printSQL());
405
        } else if (qobject instanceof QueryTerm) {
406
          QueryTerm qt = (QueryTerm)qobject;
407
          self.append(qt.printSQL());
408
        } else {
409
          System.err.println("qobject wrong type: fatal error");
410
        }
411
      }
412
      self.append(") \n");
413
      return self.toString();
414
    }
415

    
416
    /**
417
     * create a String description of the query that this instance represents.
418
     * This should become a way to get the XML serialization of the query.
419
     */
420
    public String toString() {
421
      StringBuffer self = new StringBuffer();
422

    
423
      self.append("  (Query group operator=" + operator + "\n");
424
      Enumeration en= getChildren();
425
      while (en.hasMoreElements()) {
426
        Object qobject = en.nextElement();
427
        self.append(qobject);
428
      }
429
      self.append("  )\n");
430
      return self.toString();
431
    }
432
  }
433

    
434
  /** a utility class that represents a single term in a query */
435
  private class QueryTerm {
436
    private boolean casesensitive = false;
437
    private String searchmode = null;
438
    private String value = null;
439
    private String pathexpr = null;
440

    
441
    /**
442
     * Construct a new instance of a query term for a free text search
443
     * (using the value only)
444
     *
445
     * @param casesensitive flag indicating whether case is used to match
446
     * @param searchmode determines what kind of substring match is performed
447
     *        (one of starts-with|ends-with|contains|matches-exactly)
448
     * @param value the text value to match
449
     */
450
    public QueryTerm(boolean casesensitive, String searchmode, 
451
                     String value) {
452
      this.casesensitive = casesensitive;
453
      this.searchmode = searchmode;
454
      this.value = value;
455
    }
456

    
457
    /**
458
     * Construct a new instance of a query term for a structured search
459
     * (matching the value only for those nodes in the pathexpr)
460
     *
461
     * @param casesensitive flag indicating whether case is used to match
462
     * @param searchmode determines what kind of substring match is performed
463
     *        (one of starts-with|ends-with|contains|matches-exactly)
464
     * @param value the text value to match
465
     * @param pathexpr the hierarchical path to the nodes to be searched
466
     */
467
    public QueryTerm(boolean casesensitive, String searchmode, 
468
                     String value, String pathexpr) {
469
      this(casesensitive, searchmode, value);
470
      this.pathexpr = pathexpr;
471
    }
472

    
473
    /** determine if the QueryTerm is case sensitive */
474
    public boolean isCaseSensitive() {
475
      return casesensitive;
476
    }
477

    
478
    /** get the searchmode parameter */
479
    public String getSearchMode() {
480
      return searchmode;
481
    }
482
 
483
    /** get the Value parameter */
484
    public String getValue() {
485
      return value;
486
    }
487

    
488
    /** get the path expression parameter */
489
    public String getPathExpression() {
490
      return pathexpr;
491
    }
492

    
493
    /**
494
     * create a SQL serialization of the query that this instance represents
495
     */
496
    public String printSQL() {
497
      StringBuffer self = new StringBuffer();
498

    
499
      // Uppercase the search string if case match is not important
500
      String casevalue = null;
501
      String nodedataterm = null;
502

    
503
      if (casesensitive) {
504
        nodedataterm = "nodedata";
505
        casevalue = value;
506
      } else {
507
        nodedataterm = "UPPER(nodedata)";
508
        casevalue = value.toUpperCase();
509
      }
510

    
511
      // Add appropriate wildcards to search string
512
      String searchvalue = null;
513
      if (searchmode.equals("starts-with")) {
514
        searchvalue = casevalue + "%";
515
      } else if (searchmode.equals("ends-with")) {
516
        searchvalue = "%" + casevalue;
517
      } else if (searchmode.equals("contains")) {
518
        searchvalue = "%" + casevalue + "%";
519
      } else {
520
        searchvalue = casevalue;
521
      }
522

    
523
      self.append("SELECT DISTINCT docid FROM xml_nodes WHERE \n");
524

    
525
      if (pathexpr != null) {
526
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
527
        self.append("AND parentnodeid IN ");
528
        self.append("(SELECT nodeid FROM xml_index WHERE path LIKE " + 
529
                    "'" +  pathexpr + "') " );
530
      } else {
531
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
532
      }
533

    
534
      return self.toString();
535
    }
536

    
537
    /**
538
     * create a String description of the query that this instance represents.
539
     * This should become a way to get the XML serialization of the query.
540
     */
541
    public String toString() {
542
      StringBuffer self = new StringBuffer();
543

    
544
      self.append("    Query Term iscasesensitive=" + casesensitive + "\n");
545
      self.append("               searchmode=" + searchmode + "\n");
546
      self.append("               value=" + value + "\n");
547
      if (pathexpr != null) {
548
        self.append("               pathexpr=" + pathexpr + "\n");
549
      }
550

    
551
      return self.toString();
552
    }
553
  }
554
}
555

    
556
/**
557
 * '$Log$
558
 * 'Revision 1.13  2000/08/23 22:55:38  berkley
559
 * 'changed the field names to be case-sensitive in the returnfields
560
 * '
561
 * 'Revision 1.12  2000/08/23 17:29:05  berkley
562
 * 'added support for the returnfield parameter
563
 * '-QuerySpecification now sets a flag (containsExtendedSQL) when there are returnfield items in the pathquery document.
564
 * 'the accessor method containsExtendedSQL() can be called by other classes to check for extended return parameters
565
 * '-getReturnFields returns a Vector of the names of each specified return field.
566
 * '-printExtendedSQL returns a string of the extra SQL statements required for the query.
567
 * '
568
 * '-a calling class should first check containsExtendedSQL to make sure that there are extra fields being returned, then call printExtendedSQL to
569
 * 'insert the extra SQL into the query.  (Note that this is how DBQuery implements this.)
570
 * '
571
 * 'Revision 1.11  2000/08/14 20:53:34  jones
572
 * 'Added "release" keyword to all metacat source files so that the release
573
 * 'number will be evident in software distributions.
574
 * '
575
 * 'Revision 1.10  2000/06/26 10:35:05  jones
576
 * 'Merged in substantial changes to DBWriter and associated classes and to
577
 * 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
578
 * 'functions.  The command line tools and the parameters for the
579
 * 'servlet have changed substantially.
580
 * '
581
 * 'Revision 1.9.2.3  2000/06/25 23:38:17  jones
582
 * 'Added RCSfile keyword
583
 * '
584
 * 'Revision 1.9.2.2  2000/06/25 23:34:18  jones
585
 * 'Changed documentation formatting, added log entries at bottom of source files
586
 * ''
587
 */
(25-25/27)