Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be 
4
 *             constructed from an XML serialization conforming to 
5
 *             pathquery.dtd. The printSQL() method can be used to print 
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *    Release: @release@
11
 *
12
 *   '$Author: jones $'
13
 *     '$Date: 2000-11-13 17:04:48 -0800 (Mon, 13 Nov 2000) $'
14
 * '$Revision: 535 $'
15
 */
16

    
17
package edu.ucsb.nceas.metacat;
18

    
19
import java.io.*;
20
import java.util.Stack;
21
import java.util.Vector;
22
import java.util.Enumeration;
23

    
24
import org.xml.sax.Attributes;
25
import org.xml.sax.InputSource;
26
import org.xml.sax.SAXException;
27
import org.xml.sax.SAXParseException;
28
import org.xml.sax.XMLReader;
29
import org.xml.sax.helpers.XMLReaderFactory;
30
import org.xml.sax.helpers.DefaultHandler;
31

    
32
/**
33
 * A Class that represents a structured query, and can be 
34
 * constructed from an XML serialization conforming to @see pathquery.dtd. 
35
 * The printSQL() method can be used to print a SQL serialization of the query.
36
 */
37
public class QuerySpecification extends DefaultHandler {
38
 
39
  private boolean containsExtendedSQL=false;
40
 
41
  // Query data structures
42
  private String meta_file_id;
43
  private String querytitle;
44
  private Vector doctypeList;
45
  private Vector returnFieldList;
46
  private Vector ownerList;
47
  private Vector siteList;
48
  private QueryGroup query = null;
49

    
50
  private Stack elementStack;
51
  private Stack queryStack;
52
  private String currentValue;
53
  private String currentPathexpr;
54
  private String parserName = null;
55
  private String accNumberSeparator = null;
56

    
57
  /**
58
   * construct an instance of the QuerySpecification class 
59
   *
60
   * @param queryspec the XML representation of the query (should conform
61
   *                  to pathquery.dtd) as a Reader
62
   * @param parserName the fully qualified name of a Java Class implementing
63
   *                  the org.xml.sax.XMLReader interface
64
   */
65
  public QuerySpecification( Reader queryspec, String parserName,
66
         String accNumberSeparator ) throws IOException {
67
    super();
68
    
69
    // Initialize the class variables
70
    doctypeList = new Vector();
71
    elementStack = new Stack();
72
    queryStack   = new Stack();
73
    returnFieldList = new Vector();
74
    ownerList = new Vector();
75
    siteList = new Vector();
76
    this.parserName = parserName;
77
    this.accNumberSeparator = accNumberSeparator;
78

    
79
    // Initialize the parser and read the queryspec
80
    XMLReader parser = initializeParser();
81
    if (parser == null) {
82
      System.err.println("SAX parser not instantiated properly.");
83
    }
84
    try {
85
      parser.parse(new InputSource(queryspec));
86
    } catch (SAXException e) {
87
      System.err.println("error parsing data");
88
      System.err.println(e.getMessage());
89
    }
90
  }
91

    
92
  /**
93
   * construct an instance of the QuerySpecification class 
94
   *
95
   * @param queryspec the XML representation of the query (should conform
96
   *                  to pathquery.dtd) as a String
97
   * @param parserName the fully qualified name of a Java Class implementing
98
   *                  the org.xml.sax.Parser interface
99
   */
100
  public QuerySpecification( String queryspec, String parserName,
101
         String accNumberSeparator) throws IOException {
102
    this(new StringReader(queryspec), parserName, accNumberSeparator);
103
  }
104

    
105
  /** Main routine for testing */
106
  static public void main(String[] args) {
107

    
108
     if (args.length < 1) {
109
       System.err.println("Wrong number of arguments!!!");
110
       System.err.println("USAGE: java QuerySpecification <xmlfile>");
111
       return;
112
     } else {
113
       String xmlfile  = args[0];
114
        
115
       try {
116
         MetaCatUtil util = new MetaCatUtil();
117
         FileReader xml = new FileReader(new File(xmlfile));
118
         QuerySpecification qspec = 
119
                 new QuerySpecification(xml, util.getOption("saxparser"),
120
                                        util.getOption("accNumberSeparator"));
121
         System.out.println(qspec.printSQL());
122

    
123
       } catch (IOException e) {
124
         System.err.println(e.getMessage());
125
       }
126
         
127
     }
128
  }
129
  
130
  /**
131
   * Returns true if the parsed query contains and extended xml query 
132
   * (i.e. there is at least one &lt;returnfield&gt; in the pathquery document)
133
   */
134
  public boolean containsExtendedSQL()
135
  {
136
    if(containsExtendedSQL)
137
    {
138
      return true;
139
    }
140
    else
141
    {
142
      return false;
143
    }
144
  }
145
  
146
  /**
147
   * Accessor method to return a vector of the extended return fields as
148
   * defined in the &lt;returnfield&gt; tag in the pathquery dtd.
149
   */
150
  public Vector getReturnFieldList()
151
  {
152
    return this.returnFieldList; 
153
  }
154

    
155
  /**
156
   * Set up the SAX parser for reading the XML serialized query
157
   */
158
  private XMLReader initializeParser() {
159
    XMLReader parser = null;
160

    
161
    // Set up the SAX document handlers for parsing
162
    try {
163

    
164
      // Get an instance of the parser
165
      parser = XMLReaderFactory.createXMLReader(parserName);
166

    
167
      // Set the ContentHandler to this instance
168
      parser.setContentHandler(this);
169

    
170
      // Set the error Handler to this instance
171
      parser.setErrorHandler(this);
172

    
173
    } catch (Exception e) {
174
       System.err.println(e.toString());
175
    }
176

    
177
    return parser;
178
  }
179

    
180
  /**
181
   * callback method used by the SAX Parser when the start tag of an 
182
   * element is detected. Used in this context to parse and store
183
   * the query information in class variables.
184
   */
185
  public void startElement (String uri, String localName, 
186
                            String qName, Attributes atts) 
187
         throws SAXException {
188
    BasicNode currentNode = new BasicNode(localName);
189
    // add attributes to BasicNode here
190
    if (atts != null) {
191
      int len = atts.getLength();
192
      for (int i = 0; i < len; i++) {
193
        currentNode.setAttribute(atts.getLocalName(i), atts.getValue(i));
194
      }
195
    }
196

    
197
    elementStack.push(currentNode); 
198
    if (currentNode.getTagName().equals("querygroup")) {
199
      QueryGroup currentGroup = new QueryGroup(
200
                                currentNode.getAttribute("operator"));
201
      if (query == null) {
202
        query = currentGroup;
203
      } else {
204
        QueryGroup parentGroup = (QueryGroup)queryStack.peek();
205
        parentGroup.addChild(currentGroup);
206
      }
207
      queryStack.push(currentGroup);
208
    }
209
  }
210

    
211
  /**
212
   * callback method used by the SAX Parser when the end tag of an 
213
   * element is detected. Used in this context to parse and store
214
   * the query information in class variables.
215
   */
216
  public void endElement (String uri, String localName,
217
                          String qName) throws SAXException {
218
    BasicNode leaving = (BasicNode)elementStack.pop(); 
219
    if (leaving.getTagName().equals("queryterm")) {
220
      boolean isCaseSensitive = (new Boolean(
221
              leaving.getAttribute("casesensitive"))).booleanValue();
222
      QueryTerm currentTerm = null;
223
      if (currentPathexpr == null) {
224
        currentTerm = new QueryTerm(isCaseSensitive,
225
                      leaving.getAttribute("searchmode"),currentValue);
226
      } else {
227
        currentTerm = new QueryTerm(isCaseSensitive,
228
                      leaving.getAttribute("searchmode"),currentValue,
229
                      currentPathexpr);
230
      }
231
      QueryGroup currentGroup = (QueryGroup)queryStack.peek();
232
      currentGroup.addChild(currentTerm);
233
      currentValue = null;
234
      currentPathexpr = null;
235
    } else if (leaving.getTagName().equals("querygroup")) {
236
      QueryGroup leavingGroup = (QueryGroup)queryStack.pop();
237
    }
238
  }
239

    
240
  /**
241
   * callback method used by the SAX Parser when the text sequences of an 
242
   * xml stream are detected. Used in this context to parse and store
243
   * the query information in class variables.
244
   */
245
  public void characters(char ch[], int start, int length) {
246

    
247
    String inputString = new String(ch, start, length);
248
    BasicNode currentNode = (BasicNode)elementStack.peek(); 
249
    String currentTag = currentNode.getTagName();
250
    if (currentTag.equals("meta_file_id")) {
251
      meta_file_id = inputString;
252
    } else if (currentTag.equals("querytitle")) {
253
      querytitle = inputString;
254
    } else if (currentTag.equals("value")) {
255
      currentValue = inputString;
256
    } else if (currentTag.equals("pathexpr")) {
257
      currentPathexpr = inputString;
258
    } else if (currentTag.equals("returndoctype")) {
259
      doctypeList.add(inputString);
260
    } else if (currentTag.equals("returnfield")) {
261
      returnFieldList.add(inputString);
262
      containsExtendedSQL = true;
263
    } else if (currentTag.equals("owner")) {
264
      ownerList.add(inputString);
265
    } else if (currentTag.equals("site")) {
266
      siteList.add(inputString);
267
    }
268
  }
269

    
270

    
271
  /**
272
   * create a SQL serialization of the query that this instance represents
273
   */
274
  public String printSQL() {
275
    StringBuffer self = new StringBuffer();
276

    
277
    self.append("SELECT docid,docname,doctype,doctitle,");
278
    self.append("date_created, date_updated ");
279
    self.append("FROM xml_documents WHERE docid IN (");
280

    
281
    // This determines the documents that meet the query conditions
282
    self.append(query.printSQL());
283

    
284
    self.append(") ");
285
 
286
    // Add SQL to filter for doctypes requested in the query
287
    // This is an implicit OR for the list of doctypes
288
    if (!doctypeList.isEmpty()) {
289
      boolean firstdoctype = true;
290
      self.append(" AND ("); 
291
      Enumeration en = doctypeList.elements();
292
      while (en.hasMoreElements()) {
293
        String currentDoctype = (String)en.nextElement();
294
        if (firstdoctype) {
295
           firstdoctype = false;
296
           self.append(" doctype = '" + currentDoctype + "'"); 
297
        } else {
298
          self.append(" OR doctype = '" + currentDoctype + "'"); 
299
        }
300
      }
301
      self.append(") ");
302
    }
303
    
304
    // Add SQL to filter for owners requested in the query
305
    // This is an implicit OR for the list of owners
306
    if (!ownerList.isEmpty()) {
307
      boolean first = true;
308
      self.append(" AND ("); 
309
      Enumeration en = ownerList.elements();
310
      while (en.hasMoreElements()) {
311
        String current = (String)en.nextElement();
312
        if (first) {
313
           first = false;
314
           self.append(" user_owner = '" + current + "'"); 
315
        } else {
316
          self.append(" OR user_owner = '" + current + "'"); 
317
        }
318
      }
319
      self.append(") ");
320
    }
321

    
322
    // Add SQL to filter for sites requested in the query
323
    // This is an implicit OR for the list of sites
324
    if (!siteList.isEmpty()) {
325
      boolean first = true;
326
      self.append(" AND ("); 
327
      Enumeration en = siteList.elements();
328
      while (en.hasMoreElements()) {
329
        String current = (String)en.nextElement();
330
        if (first) {
331
           first = false;
332
           self.append(" SUBSTR(docid, 1, INSTR(docid, '" +
333
               accNumberSeparator + "')-1) = '" + current + "'"); 
334
        } else {
335
          self.append(" OR SUBSTR(docid, 1, INSTR(docid, '" +
336
               accNumberSeparator + "')-1) = '" + current + "'"); 
337
        }
338
      }
339
      self.append(") ");
340
    }
341

    
342
    return self.toString();
343
  }
344
  
345
  /**
346
   * This method prints sql based upon the &lt;returnfield&gt; tag in the
347
   * pathquery document.  This allows for customization of the 
348
   * returned fields
349
   * @param doclist the list of document ids to search by
350
   */
351
  public String printExtendedSQL(String doclist)
352
  {  
353
    StringBuffer self = new StringBuffer();
354
    self.append("select xml_nodes.docid, xml_index.path, xml_nodes.nodedata ");
355
    self.append("from xml_index, xml_nodes where xml_index.nodeid=");
356
    self.append("xml_nodes.parentnodeid and (xml_index.path like '");
357
    boolean firstfield = true;
358
    //put the returnfields into the query
359
    //the for loop allows for multiple fields
360
    for(int i=0; i<returnFieldList.size(); i++)
361
    {
362
      if(firstfield)
363
      {
364
        firstfield = false;
365
        self.append((String)returnFieldList.elementAt(i));
366
        self.append("' ");
367
      }
368
      else
369
      {
370
        self.append("or xml_index.path like '");
371
        self.append((String)returnFieldList.elementAt(i));
372
        self.append("' ");
373
      }
374
    }
375
    self.append(") AND xml_nodes.docid in (");
376
    //self.append(query.printSQL());
377
    self.append(doclist);
378
    self.append(")");
379
    self.append(" AND xml_nodes.nodetype = 'TEXT'");
380

    
381
    //System.out.println(self.toString());
382
    return self.toString();
383
  }
384
  
385
  public static String printRelationSQL(String docid)
386
  {
387
    StringBuffer self = new StringBuffer();
388
    self.append("select subject, relationship, object, subdoctype, ");
389
    self.append("objdoctype from xml_relation ");
390
    self.append("where subject like '").append(docid).append("'");
391
    return self.toString();
392
  }
393
   
394
  /**
395
   * Prints sql that returns all relations in the database.
396
   */
397
  public static String printPackageSQL()
398
  {
399
    StringBuffer self = new StringBuffer();
400
    self.append("select z.nodedata, x.nodedata, y.nodedata from ");
401
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
402
    self.append("'package/relation/subject') s, (select nodeid, parentnodeid ");
403
    self.append("from xml_index where path like ");
404
    self.append("'package/relation/relationship') rel, ");
405
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
406
    self.append("'package/relation/object') o, ");
407
    self.append("xml_nodes x, xml_nodes y, xml_nodes z ");
408
    self.append("where s.parentnodeid = rel.parentnodeid ");
409
    self.append("and rel.parentnodeid = o.parentnodeid ");
410
    self.append("and x.parentnodeid in rel.nodeid ");
411
    self.append("and y.parentnodeid in o.nodeid ");
412
    self.append("and z.parentnodeid in s.nodeid ");
413
    //self.append("and z.nodedata like '%");
414
    //self.append(docid);
415
    //self.append("%'");
416
    return self.toString();
417
  }
418
  
419
  /**
420
   * Prints sql that returns all relations in the database that were input
421
   * under a specific docid
422
   * @param docid the docid to search for.
423
   */
424
  public static String printPackageSQL(String docid)
425
  {
426
    StringBuffer self = new StringBuffer();
427
    self.append("select z.nodedata, x.nodedata, y.nodedata from ");
428
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
429
    self.append("'package/relation/subject') s, (select nodeid, parentnodeid ");
430
    self.append("from xml_index where path like ");
431
    self.append("'package/relation/relationship') rel, ");
432
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
433
    self.append("'package/relation/object') o, ");
434
    self.append("xml_nodes x, xml_nodes y, xml_nodes z ");
435
    self.append("where s.parentnodeid = rel.parentnodeid ");
436
    self.append("and rel.parentnodeid = o.parentnodeid ");
437
    self.append("and x.parentnodeid in rel.nodeid ");
438
    self.append("and y.parentnodeid in o.nodeid ");
439
    self.append("and z.parentnodeid in s.nodeid ");
440
    self.append("and z.docid like '").append(docid).append("'");
441
    
442
    return self.toString();
443
  }
444
  
445
  /**
446
   * Returns all of the relations that has a certain docid in the subject
447
   * or the object.
448
   * 
449
   * @param docid the docid to search for
450
   */
451
  public static String printPackageSQL(String subDocidURL, String objDocidURL)
452
  {
453
    StringBuffer self = new StringBuffer();
454
    self.append("select z.nodedata, x.nodedata, y.nodedata from ");
455
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
456
    self.append("'package/relation/subject') s, (select nodeid, parentnodeid ");
457
    self.append("from xml_index where path like ");
458
    self.append("'package/relation/relationship') rel, ");
459
    self.append("(select nodeid, parentnodeid from xml_index where path like ");
460
    self.append("'package/relation/object') o, ");
461
    self.append("xml_nodes x, xml_nodes y, xml_nodes z ");
462
    self.append("where s.parentnodeid = rel.parentnodeid ");
463
    self.append("and rel.parentnodeid = o.parentnodeid ");
464
    self.append("and x.parentnodeid in rel.nodeid ");
465
    self.append("and y.parentnodeid in o.nodeid ");
466
    self.append("and z.parentnodeid in s.nodeid ");
467
    self.append("and (z.nodedata like '");
468
    self.append(subDocidURL);
469
    self.append("' or y.nodedata like '");
470
    self.append(objDocidURL);
471
    self.append("')");
472
    return self.toString();
473
  }
474
  
475
  public static String printGetDocByDoctypeSQL(String docid)
476
  {
477
    StringBuffer self = new StringBuffer();
478

    
479
    self.append("SELECT docid,docname,doctype,doctitle,");
480
    self.append("date_created, date_updated ");
481
    self.append("FROM xml_documents WHERE docid IN (");
482
    self.append(docid).append(")");
483
    return self.toString();
484
  }
485
  
486
  /**
487
   * create a String description of the query that this instance represents.
488
   * This should become a way to get the XML serialization of the query.
489
   */
490
  public String toString() {
491
    return "meta_file_id=" + meta_file_id + "\n" + 
492
           "querytitle=" + querytitle + "\n" + query;
493
  }
494

    
495
  /** a utility class that represents a group of terms in a query */
496
  private class QueryGroup {
497
    private String operator = null;  // indicates how query terms are combined
498
    private Vector children = null;  // the list of query terms and groups
499

    
500
    /** 
501
     * construct a new QueryGroup 
502
     *
503
     * @param operator the boolean conector used to connect query terms 
504
     *                    in this query group
505
     */
506
    public QueryGroup(String operator) {
507
      this.operator = operator;
508
      children = new Vector();
509
    }
510

    
511
    /** 
512
     * Add a child QueryGroup to this QueryGroup
513
     *
514
     * @param qgroup the query group to be added to the list of terms
515
     */
516
    public void addChild(QueryGroup qgroup) {
517
      children.add((Object)qgroup); 
518
    }
519

    
520
    /**
521
     * Add a child QueryTerm to this QueryGroup
522
     *
523
     * @param qterm the query term to be added to the list of terms
524
     */
525
    public void addChild(QueryTerm qterm) {
526
      children.add((Object)qterm); 
527
    }
528

    
529
    /**
530
     * Retrieve an Enumeration of query terms for this QueryGroup
531
     */
532
    public Enumeration getChildren() {
533
      return children.elements();
534
    }
535
   
536
    /**
537
     * create a SQL serialization of the query that this instance represents
538
     */
539
    public String printSQL() {
540
      StringBuffer self = new StringBuffer();
541
      boolean first = true;
542

    
543
      self.append("(");
544

    
545
      Enumeration en= getChildren();
546
      while (en.hasMoreElements()) {
547
        Object qobject = en.nextElement();
548
        if (first) {
549
          first = false;
550
        } else {
551
          self.append(" " + operator + " ");
552
        }
553
        if (qobject instanceof QueryGroup) {
554
          QueryGroup qg = (QueryGroup)qobject;
555
          self.append(qg.printSQL());
556
        } else if (qobject instanceof QueryTerm) {
557
          QueryTerm qt = (QueryTerm)qobject;
558
          self.append(qt.printSQL());
559
        } else {
560
          System.err.println("qobject wrong type: fatal error");
561
        }
562
      }
563
      self.append(") \n");
564
      return self.toString();
565
    }
566

    
567
    /**
568
     * create a String description of the query that this instance represents.
569
     * This should become a way to get the XML serialization of the query.
570
     */
571
    public String toString() {
572
      StringBuffer self = new StringBuffer();
573

    
574
      self.append("  (Query group operator=" + operator + "\n");
575
      Enumeration en= getChildren();
576
      while (en.hasMoreElements()) {
577
        Object qobject = en.nextElement();
578
        self.append(qobject);
579
      }
580
      self.append("  )\n");
581
      return self.toString();
582
    }
583
  }
584

    
585
  /** a utility class that represents a single term in a query */
586
  private class QueryTerm {
587
    private boolean casesensitive = false;
588
    private String searchmode = null;
589
    private String value = null;
590
    private String pathexpr = null;
591

    
592
    /**
593
     * Construct a new instance of a query term for a free text search
594
     * (using the value only)
595
     *
596
     * @param casesensitive flag indicating whether case is used to match
597
     * @param searchmode determines what kind of substring match is performed
598
     *        (one of starts-with|ends-with|contains|matches-exactly)
599
     * @param value the text value to match
600
     */
601
    public QueryTerm(boolean casesensitive, String searchmode, 
602
                     String value) {
603
      this.casesensitive = casesensitive;
604
      this.searchmode = searchmode;
605
      this.value = value;
606
    }
607

    
608
    /**
609
     * Construct a new instance of a query term for a structured search
610
     * (matching the value only for those nodes in the pathexpr)
611
     *
612
     * @param casesensitive flag indicating whether case is used to match
613
     * @param searchmode determines what kind of substring match is performed
614
     *        (one of starts-with|ends-with|contains|matches-exactly)
615
     * @param value the text value to match
616
     * @param pathexpr the hierarchical path to the nodes to be searched
617
     */
618
    public QueryTerm(boolean casesensitive, String searchmode, 
619
                     String value, String pathexpr) {
620
      this(casesensitive, searchmode, value);
621
      this.pathexpr = pathexpr;
622
    }
623

    
624
    /** determine if the QueryTerm is case sensitive */
625
    public boolean isCaseSensitive() {
626
      return casesensitive;
627
    }
628

    
629
    /** get the searchmode parameter */
630
    public String getSearchMode() {
631
      return searchmode;
632
    }
633
 
634
    /** get the Value parameter */
635
    public String getValue() {
636
      return value;
637
    }
638

    
639
    /** get the path expression parameter */
640
    public String getPathExpression() {
641
      return pathexpr;
642
    }
643

    
644
    /**
645
     * create a SQL serialization of the query that this instance represents
646
     */
647
    public String printSQL() {
648
      StringBuffer self = new StringBuffer();
649

    
650
      // Uppercase the search string if case match is not important
651
      String casevalue = null;
652
      String nodedataterm = null;
653

    
654
      if (casesensitive) {
655
        nodedataterm = "nodedata";
656
        casevalue = value;
657
      } else {
658
        nodedataterm = "UPPER(nodedata)";
659
        casevalue = value.toUpperCase();
660
      }
661

    
662
      // Add appropriate wildcards to search string
663
      String searchvalue = null;
664
      if (searchmode.equals("starts-with")) {
665
        searchvalue = casevalue + "%";
666
      } else if (searchmode.equals("ends-with")) {
667
        searchvalue = "%" + casevalue;
668
      } else if (searchmode.equals("contains")) {
669
        searchvalue = "%" + casevalue + "%";
670
      } else {
671
        searchvalue = casevalue;
672
      }
673

    
674
      self.append("SELECT DISTINCT docid FROM xml_nodes WHERE \n");
675

    
676
      if (pathexpr != null) {
677
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
678
        self.append("AND parentnodeid IN ");
679
        self.append("(SELECT nodeid FROM xml_index WHERE path LIKE " + 
680
                    "'" +  pathexpr + "') " );
681
      } else {
682
        self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
683
      }
684

    
685
      return self.toString();
686
    }
687

    
688
    /**
689
     * create a String description of the query that this instance represents.
690
     * This should become a way to get the XML serialization of the query.
691
     */
692
    public String toString() {
693

    
694
      return this.printSQL();
695
    }
696
  }
697
}
(32-32/36)