Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be 
4
 *             constructed from an XML serialization conforming to 
5
 *             pathquery.dtd. The printSQL() method can be used to print 
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *    Release: @release@
11
 *
12
 *   '$Author: berkley $'
13
 *     '$Date: 2000-08-23 15:55:38 -0700 (Wed, 23 Aug 2000) $'
14
 * '$Revision: 405 $'
15
 */
16

    
17
package edu.ucsb.nceas.metacat;
18

    
19
import java.io.*;
20
import java.util.Stack;
21
import java.util.Vector;
22
import java.util.Enumeration;
23

    
24
import org.xml.sax.Attributes;
25
import org.xml.sax.InputSource;
26
import org.xml.sax.SAXException;
27
import org.xml.sax.SAXParseException;
28
import org.xml.sax.XMLReader;
29
import org.xml.sax.helpers.XMLReaderFactory;
30
import org.xml.sax.helpers.DefaultHandler;
31

    
32
/**
33
 * A Class that represents a structured query, and can be 
34
 * constructed from an XML serialization conforming to @see pathquery.dtd. 
35
 * The printSQL() method can be used to print a SQL serialization of the query.
36
 */
37
public class QuerySpecification extends DefaultHandler {
38
 
39
  private boolean containsExtendedSQL=false;
40
 
41
  // Query data structures
42
  private String meta_file_id;
43
  private String querytitle;
44
  private Vector doctypeList;
45
  private Vector returnFieldList;
46
  private QueryGroup query = null;
47

    
48
  private Stack elementStack;
49
  private Stack queryStack;
50
  private String currentValue;
51
  private String currentPathexpr;
52
  private String parserName = null;
53

    
54
  /**
55
   * construct an instance of the QuerySpecification class 
56
   *
57
   * @param queryspec the XML representation of the query (should conform
58
   *                  to pathquery.dtd) as a Reader
59
   * @param parserName the fully qualified name of a Java Class implementing
60
   *                  the org.xml.sax.XMLReader interface
61
   */
62
  public QuerySpecification( Reader queryspec, String parserName ) 
63
         throws IOException {
64
    super();
65
    
66
    // Initialize the class variables
67
    doctypeList = new Vector();
68
    elementStack = new Stack();
69
    queryStack   = new Stack();
70
    returnFieldList = new Vector();
71
    this.parserName = parserName;
72

    
73
    // Initialize the parser and read the queryspec
74
    XMLReader parser = initializeParser();
75
    if (parser == null) {
76
      System.err.println("SAX parser not instantiated properly.");
77
    }
78
    try {
79
      parser.parse(new InputSource(queryspec));
80
    } catch (SAXException e) {
81
      System.err.println("error parsing data");
82
      System.err.println(e.getMessage());
83
    }
84
  }
85

    
86
  /**
87
   * construct an instance of the QuerySpecification class 
88
   *
89
   * @param queryspec the XML representation of the query (should conform
90
   *                  to pathquery.dtd) as a String
91
   * @param parserName the fully qualified name of a Java Class implementing
92
   *                  the org.xml.sax.Parser interface
93
   */
94
  public QuerySpecification( String queryspec, String parserName ) 
95
         throws IOException {
96
    this(new StringReader(queryspec), parserName);
97
  }
98

    
99
  /** Main routine for testing */
100
  static public void main(String[] args) {
101

    
102
     if (args.length < 1) {
103
       System.err.println("Wrong number of arguments!!!");
104
       System.err.println("USAGE: java QuerySpecification <xmlfile>");
105
       return;
106
     } else {
107
       String xmlfile  = args[0];
108
        
109
       try {
110
         MetaCatUtil util = new MetaCatUtil();
111
         FileReader xml = new FileReader(new File(xmlfile));
112
         QuerySpecification qspec = 
113
                 new QuerySpecification(xml, util.getOption("saxparser"));
114
         System.out.println(qspec.printSQL());
115

    
116
       } catch (IOException e) {
117
         System.err.println(e.getMessage());
118
       }
119
         
120
     }
121
  }
122
  
123
  /**
124
   * Returns true if the parsed query contains and extended xml query 
125
   * (i.e. there is at least one &lt;returnfield&gt; in the pathquery document)
126
   */
127
  public boolean containsExtendedSQL()
128
  {
129
    if(containsExtendedSQL)
130
    {
131
      return true;
132
    }
133
    else
134
    {
135
      return false;
136
    }
137
  }
138
  
139
  /**
140
   * Accessor method to return a vector of the extended return fields as
141
   * defined in the &lt;returnfield&gt; tag in the pathquery dtd.
142
   */
143
  public Vector getReturnFieldList()
144
  {
145
    return this.returnFieldList; 
146
  }
147

    
148
  /**
149
   * Set up the SAX parser for reading the XML serialized query
150
   */
151
  private XMLReader initializeParser() {
152
    XMLReader parser = null;
153

    
154
    // Set up the SAX document handlers for parsing
155
    try {
156

    
157
      // Get an instance of the parser
158
      parser = XMLReaderFactory.createXMLReader(parserName);
159

    
160
      // Set the ContentHandler to this instance
161
      parser.setContentHandler(this);
162

    
163
      // Set the error Handler to this instance
164
      parser.setErrorHandler(this);
165

    
166
    } catch (Exception e) {
167
       System.err.println(e.toString());
168
    }
169

    
170
    return parser;
171
  }
172

    
173
  /**
174
   * callback method used by the SAX Parser when the start tag of an 
175
   * element is detected. Used in this context to parse and store
176
   * the query information in class variables.
177
   */
178
  public void startElement (String uri, String localName, 
179
                            String qName, Attributes atts) 
180
         throws SAXException {
181
    BasicNode currentNode = new BasicNode(localName);
182
    // add attributes to BasicNode here
183
    if (atts != null) {
184
      int len = atts.getLength();
185
      for (int i = 0; i < len; i++) {
186
        currentNode.setAttribute(atts.getLocalName(i), atts.getValue(i));
187
      }
188
    }
189

    
190
    elementStack.push(currentNode); 
191
    if (currentNode.getTagName().equals("querygroup")) {
192
      QueryGroup currentGroup = new QueryGroup(
193
                                currentNode.getAttribute("operator"));
194
      if (query == null) {
195
        query = currentGroup;
196
      } else {
197
        QueryGroup parentGroup = (QueryGroup)queryStack.peek();
198
        parentGroup.addChild(currentGroup);
199
      }
200
      queryStack.push(currentGroup);
201
    }
202
  }
203

    
204
  /**
205
   * callback method used by the SAX Parser when the end tag of an 
206
   * element is detected. Used in this context to parse and store
207
   * the query information in class variables.
208
   */
209
  public void endElement (String uri, String localName,
210
                          String qName) throws SAXException {
211
    BasicNode leaving = (BasicNode)elementStack.pop(); 
212
    if (leaving.getTagName().equals("queryterm")) {
213
      boolean isCaseSensitive = (new Boolean(
214
              leaving.getAttribute("casesensitive"))).booleanValue();
215
      QueryTerm currentTerm = null;
216
      if (currentPathexpr == null) {
217
        currentTerm = new QueryTerm(isCaseSensitive,
218
                      leaving.getAttribute("searchmode"),currentValue);
219
      } else {
220
        currentTerm = new QueryTerm(isCaseSensitive,
221
                      leaving.getAttribute("searchmode"),currentValue,
222
                      currentPathexpr);
223
      }
224
      QueryGroup currentGroup = (QueryGroup)queryStack.peek();
225
      currentGroup.addChild(currentTerm);
226
      currentValue = null;
227
      currentPathexpr = null;
228
    } else if (leaving.getTagName().equals("querygroup")) {
229
      QueryGroup leavingGroup = (QueryGroup)queryStack.pop();
230
    }
231
  }
232

    
233
  /**
234
   * callback method used by the SAX Parser when the text sequences of an 
235
   * xml stream are detected. Used in this context to parse and store
236
   * the query information in class variables.
237
   */
238
  public void characters(char ch[], int start, int length) {
239

    
240
    String inputString = new String(ch, start, length);
241
    BasicNode currentNode = (BasicNode)elementStack.peek(); 
242
    String currentTag = currentNode.getTagName();
243
    if (currentTag.equals("meta_file_id")) {
244
      meta_file_id = inputString;
245
    } else if (currentTag.equals("querytitle")) {
246
      querytitle = inputString;
247
    } else if (currentTag.equals("value")) {
248
      currentValue = inputString;
249
    } else if (currentTag.equals("pathexpr")) {
250
      currentPathexpr = inputString;
251
    } else if (currentTag.equals("returndoctype")) {
252
      doctypeList.add(inputString);
253
    } else if (currentTag.equals("returnfield")) {
254
      returnFieldList.add(inputString);
255
      containsExtendedSQL = true;
256
    }
257
  }
258

    
259

    
260
  /**
261
   * create a SQL serialization of the query that this instance represents
262
   */
263
  public String printSQL() {
264
    StringBuffer self = new StringBuffer();
265

    
266
    self.append("SELECT docid,docname,doctype,doctitle,");
267
    self.append("date_created, date_updated ");
268
    self.append("FROM xml_documents WHERE docid IN (");
269

    
270
    // This determines the documents that meet the query conditions
271
    self.append(query.printSQL());
272

    
273
    self.append(") ");
274
 
275
    // Add SQL to filter for doctypes requested in the query
276
    if (!doctypeList.isEmpty()) {
277
      boolean firstdoctype = true;
278
      self.append(" AND ("); 
279
      Enumeration en = doctypeList.elements();
280
      while (en.hasMoreElements()) {
281
        String currentDoctype = (String)en.nextElement();
282
        if (firstdoctype) {
283
           firstdoctype = false;
284
           self.append(" doctype = '" + currentDoctype + "'"); 
285
        } else {
286
          self.append(" OR doctype = '" + currentDoctype + "'"); 
287
        }
288
      }
289
      self.append(") ");
290
    }
291
    
292
    return self.toString();
293
  }
294
  
295
  /**
296
   * This method prints sql based upon the &lt;returnfield&gt; tag in the
297
   * pathquery document.  This allows for customization of the 
298
   * returned fields
299
   * The parameters of the query are changed to upper case before the query
300
   * so that givenName is queryied the same as givenname.
301
   */
302
  public String printExtendedSQL()
303
  {  
304
    StringBuffer self = new StringBuffer();
305
    self.append("select A.docid, A.nodename, B.nodedata from xml_nodes A, ");
306
    self.append("xml_nodes B where A.nodeid = B.parentnodeid ");
307
    self.append("and B.nodeid in ");
308
    self.append("(select distinct nodeid from xml_nodes where parentnodeid in ");
309
    self.append("(select nodeid from xml_index where path like '");
310
    boolean firstfield = true;
311
    //put the returnfields into the query
312
    //the for loop allows for multiple fields
313
    for(int i=0; i<returnFieldList.size(); i++)
314
    {
315
      if(firstfield)
316
      {
317
        firstfield = false;
318
        self.append((String)returnFieldList.elementAt(i));
319
        self.append("' ");
320
      }
321
      else
322
      {
323
        self.append("or path like '");
324
        self.append((String)returnFieldList.elementAt(i));
325
        self.append("' ");
326
      }
327
    }
328
    self.append("))");
329
    self.append(" AND B.docid in (");
330
    self.append(query.printSQL());
331
    self.append(")");
332
    self.append(" AND B.nodetype = 'TEXT'");
333

    
334
    return self.toString();
335
  }
336

    
337
  /**
338
   * create a String description of the query that this instance represents.
339
   * This should become a way to get the XML serialization of the query.
340
   */
341
  public String toString() {
342
    return "meta_file_id=" + meta_file_id + "\n" + 
343
           "querytitle=" + querytitle + "\n" + query;
344
  }
345

    
346
  /** a utility class that represents a group of terms in a query */
347
  private class QueryGroup {
348
    private String operator = null;  // indicates how query terms are combined
349
    private Vector children = null;  // the list of query terms and groups
350

    
351
    /** 
352
     * construct a new QueryGroup 
353
     *
354
     * @param operator the boolean conector used to connect query terms 
355
     *                    in this query group
356
     */
357
    public QueryGroup(String operator) {
358
      this.operator = operator;
359
      children = new Vector();
360
    }
361

    
362
    /** 
363
     * Add a child QueryGroup to this QueryGroup
364
     *
365
     * @param qgroup the query group to be added to the list of terms
366
     */
367
    public void addChild(QueryGroup qgroup) {
368
      children.add((Object)qgroup); 
369
    }
370

    
371
    /**
372
     * Add a child QueryTerm to this QueryGroup
373
     *
374
     * @param qterm the query term to be added to the list of terms
375
     */
376
    public void addChild(QueryTerm qterm) {
377
      children.add((Object)qterm); 
378
    }
379

    
380
    /**
381
     * Retrieve an Enumeration of query terms for this QueryGroup
382
     */
383
    public Enumeration getChildren() {
384
      return children.elements();
385
    }
386
   
387
    /**
388
     * create a SQL serialization of the query that this instance represents
389
     */
390
    public String printSQL() {
391
      StringBuffer self = new StringBuffer();
392
      boolean first = true;
393

    
394
      self.append("(");
395

    
396
      Enumeration en= getChildren();
397
      while (en.hasMoreElements()) {
398
        Object qobject = en.nextElement();
399
        if (first) {
400
          first = false;
401
        } else {
402
          self.append(" " + operator + " ");
403
        }
404
        if (qobject instanceof QueryGroup) {
405
          QueryGroup qg = (QueryGroup)qobject;
406
          self.append(qg.printSQL());
407
        } else if (qobject instanceof QueryTerm) {
408
          QueryTerm qt = (QueryTerm)qobject;
409
          self.append(qt.printSQL());
410
        } else {
411
          System.err.println("qobject wrong type: fatal error");
412
        }
413
      }
414
      self.append(") \n");
415
      return self.toString();
416
    }
417

    
418
    /**
419
     * create a String description of the query that this instance represents.
420
     * This should become a way to get the XML serialization of the query.
421
     */
422
    public String toString() {
423
      StringBuffer self = new StringBuffer();
424

    
425
      self.append("  (Query group operator=" + operator + "\n");
426
      Enumeration en= getChildren();
427
      while (en.hasMoreElements()) {
428
        Object qobject = en.nextElement();
429
        self.append(qobject);
430
      }
431
      self.append("  )\n");
432
      return self.toString();
433
    }
434
  }
435

    
436
  /** a utility class that represents a single term in a query */
437
  private class QueryTerm {
438
    private boolean casesensitive = false;
439
    private String searchmode = null;
440
    private String value = null;
441
    private String pathexpr = null;
442

    
443
    /**
444
     * Construct a new instance of a query term for a free text search
445
     * (using the value only)
446
     *
447
     * @param casesensitive flag indicating whether case is used to match
448
     * @param searchmode determines what kind of substring match is performed
449
     *        (one of starts-with|ends-with|contains|matches-exactly)
450
     * @param value the text value to match
451
     */
452
    public QueryTerm(boolean casesensitive, String searchmode, 
453
                     String value) {
454
      this.casesensitive = casesensitive;
455
      this.searchmode = searchmode;
456
      this.value = value;
457
    }
458

    
459
    /**
460
     * Construct a new instance of a query term for a structured search
461
     * (matching the value only for those nodes in the pathexpr)
462
     *
463
     * @param casesensitive flag indicating whether case is used to match
464
     * @param searchmode determines what kind of substring match is performed
465
     *        (one of starts-with|ends-with|contains|matches-exactly)
466
     * @param value the text value to match
467
     * @param pathexpr the hierarchical path to the nodes to be searched
468
     */
469
    public QueryTerm(boolean casesensitive, String searchmode, 
470
                     String value, String pathexpr) {
471
      this(casesensitive, searchmode, value);
472
      this.pathexpr = pathexpr;
473
    }
474

    
475
    /** determine if the QueryTerm is case sensitive */
476
    public boolean isCaseSensitive() {
477
      return casesensitive;
478
    }
479

    
480
    /** get the searchmode parameter */
481
    public String getSearchMode() {
482
      return searchmode;
483
    }
484
 
485
    /** get the Value parameter */
486
    public String getValue() {
487
      return value;
488
    }
489

    
490
    /** get the path expression parameter */
491
    public String getPathExpression() {
492
      return pathexpr;
493
    }
494

    
495
    /**
496
     * create a SQL serialization of the query that this instance represents
497
     */
498
    public String printSQL() {
499
      StringBuffer self = new StringBuffer();
500

    
501
      // Uppercase the search string if case match is not important
502
      String casevalue = null;
503
      String nodedataterm = null;
504

    
505
      if (casesensitive) {
506
        nodedataterm = "nodedata";
507
        casevalue = value;
508
      } else {
509
        nodedataterm = "UPPER(nodedata)";
510
        casevalue = value.toUpperCase();
511
      }
512

    
513
      // Add appropriate wildcards to search string
514
      String searchvalue = null;
515
      if (searchmode.equals("starts-with")) {
516
        searchvalue = casevalue + "%";
517
      } else if (searchmode.equals("ends-with")) {
518
        searchvalue = "%" + casevalue;
519
      } else if (searchmode.equals("contains")) {
520
        searchvalue = "%" + casevalue + "%";
521
      } else {
522
        searchvalue = casevalue;
523
      }
524

    
525
      self.append("SELECT DISTINCT docid FROM xml_nodes WHERE \n");
526

    
527
      if (pathexpr != null) {
528
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
529
        self.append("AND parentnodeid IN ");
530
        self.append("(SELECT nodeid FROM xml_index WHERE path LIKE " + 
531
                    "'" +  pathexpr + "') " );
532
      } else {
533
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
534
      }
535

    
536
      return self.toString();
537
    }
538

    
539
    /**
540
     * create a String description of the query that this instance represents.
541
     * This should become a way to get the XML serialization of the query.
542
     */
543
    public String toString() {
544
      StringBuffer self = new StringBuffer();
545

    
546
      self.append("    Query Term iscasesensitive=" + casesensitive + "\n");
547
      self.append("               searchmode=" + searchmode + "\n");
548
      self.append("               value=" + value + "\n");
549
      if (pathexpr != null) {
550
        self.append("               pathexpr=" + pathexpr + "\n");
551
      }
552

    
553
      return self.toString();
554
    }
555
  }
556
}
557

    
558
/**
559
 * '$Log$
560
 * 'Revision 1.12  2000/08/23 17:29:05  berkley
561
 * 'added support for the returnfield parameter
562
 * '-QuerySpecification now sets a flag (containsExtendedSQL) when there are returnfield items in the pathquery document.
563
 * 'the accessor method containsExtendedSQL() can be called by other classes to check for extended return parameters
564
 * '-getReturnFields returns a Vector of the names of each specified return field.
565
 * '-printExtendedSQL returns a string of the extra SQL statements required for the query.
566
 * '
567
 * '-a calling class should first check containsExtendedSQL to make sure that there are extra fields being returned, then call printExtendedSQL to
568
 * 'insert the extra SQL into the query.  (Note that this is how DBQuery implements this.)
569
 * '
570
 * 'Revision 1.11  2000/08/14 20:53:34  jones
571
 * 'Added "release" keyword to all metacat source files so that the release
572
 * 'number will be evident in software distributions.
573
 * '
574
 * 'Revision 1.10  2000/06/26 10:35:05  jones
575
 * 'Merged in substantial changes to DBWriter and associated classes and to
576
 * 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
577
 * 'functions.  The command line tools and the parameters for the
578
 * 'servlet have changed substantially.
579
 * '
580
 * 'Revision 1.9.2.3  2000/06/25 23:38:17  jones
581
 * 'Added RCSfile keyword
582
 * '
583
 * 'Revision 1.9.2.2  2000/06/25 23:34:18  jones
584
 * 'Changed documentation formatting, added log entries at bottom of source files
585
 * ''
586
 */
(26-26/28)