1
|
/**
|
2
|
* '$RCSfile$'
|
3
|
* Purpose: A Class that represents a structured query, and can be
|
4
|
* constructed from an XML serialization conforming to
|
5
|
* pathquery.dtd. The printSQL() method can be used to print
|
6
|
* a SQL serialization of the query.
|
7
|
* Copyright: 2000 Regents of the University of California and the
|
8
|
* National Center for Ecological Analysis and Synthesis
|
9
|
* Authors: Matt Jones
|
10
|
* Release: @release@
|
11
|
*
|
12
|
* '$Author: berkley $'
|
13
|
* '$Date: 2000-08-31 14:20:39 -0700 (Thu, 31 Aug 2000) $'
|
14
|
* '$Revision: 423 $'
|
15
|
*/
|
16
|
|
17
|
package edu.ucsb.nceas.metacat;
|
18
|
|
19
|
import java.io.*;
|
20
|
import java.util.Stack;
|
21
|
import java.util.Vector;
|
22
|
import java.util.Enumeration;
|
23
|
|
24
|
import org.xml.sax.Attributes;
|
25
|
import org.xml.sax.InputSource;
|
26
|
import org.xml.sax.SAXException;
|
27
|
import org.xml.sax.SAXParseException;
|
28
|
import org.xml.sax.XMLReader;
|
29
|
import org.xml.sax.helpers.XMLReaderFactory;
|
30
|
import org.xml.sax.helpers.DefaultHandler;
|
31
|
|
32
|
/**
|
33
|
* A Class that represents a structured query, and can be
|
34
|
* constructed from an XML serialization conforming to @see pathquery.dtd.
|
35
|
* The printSQL() method can be used to print a SQL serialization of the query.
|
36
|
*/
|
37
|
public class QuerySpecification extends DefaultHandler {
|
38
|
|
39
|
private boolean containsExtendedSQL=false;
|
40
|
|
41
|
// Query data structures
|
42
|
private String meta_file_id;
|
43
|
private String querytitle;
|
44
|
private Vector doctypeList;
|
45
|
private Vector returnFieldList;
|
46
|
private QueryGroup query = null;
|
47
|
|
48
|
private Stack elementStack;
|
49
|
private Stack queryStack;
|
50
|
private String currentValue;
|
51
|
private String currentPathexpr;
|
52
|
private String parserName = null;
|
53
|
|
54
|
/**
|
55
|
* construct an instance of the QuerySpecification class
|
56
|
*
|
57
|
* @param queryspec the XML representation of the query (should conform
|
58
|
* to pathquery.dtd) as a Reader
|
59
|
* @param parserName the fully qualified name of a Java Class implementing
|
60
|
* the org.xml.sax.XMLReader interface
|
61
|
*/
|
62
|
public QuerySpecification( Reader queryspec, String parserName )
|
63
|
throws IOException {
|
64
|
super();
|
65
|
|
66
|
// Initialize the class variables
|
67
|
doctypeList = new Vector();
|
68
|
elementStack = new Stack();
|
69
|
queryStack = new Stack();
|
70
|
returnFieldList = new Vector();
|
71
|
this.parserName = parserName;
|
72
|
|
73
|
// Initialize the parser and read the queryspec
|
74
|
XMLReader parser = initializeParser();
|
75
|
if (parser == null) {
|
76
|
System.err.println("SAX parser not instantiated properly.");
|
77
|
}
|
78
|
try {
|
79
|
parser.parse(new InputSource(queryspec));
|
80
|
} catch (SAXException e) {
|
81
|
System.err.println("error parsing data");
|
82
|
System.err.println(e.getMessage());
|
83
|
}
|
84
|
}
|
85
|
|
86
|
/**
|
87
|
* construct an instance of the QuerySpecification class
|
88
|
*
|
89
|
* @param queryspec the XML representation of the query (should conform
|
90
|
* to pathquery.dtd) as a String
|
91
|
* @param parserName the fully qualified name of a Java Class implementing
|
92
|
* the org.xml.sax.Parser interface
|
93
|
*/
|
94
|
public QuerySpecification( String queryspec, String parserName )
|
95
|
throws IOException {
|
96
|
this(new StringReader(queryspec), parserName);
|
97
|
}
|
98
|
|
99
|
/** Main routine for testing */
|
100
|
static public void main(String[] args) {
|
101
|
|
102
|
if (args.length < 1) {
|
103
|
System.err.println("Wrong number of arguments!!!");
|
104
|
System.err.println("USAGE: java QuerySpecification <xmlfile>");
|
105
|
return;
|
106
|
} else {
|
107
|
String xmlfile = args[0];
|
108
|
|
109
|
try {
|
110
|
MetaCatUtil util = new MetaCatUtil();
|
111
|
FileReader xml = new FileReader(new File(xmlfile));
|
112
|
QuerySpecification qspec =
|
113
|
new QuerySpecification(xml, util.getOption("saxparser"));
|
114
|
System.out.println(qspec.printSQL());
|
115
|
|
116
|
} catch (IOException e) {
|
117
|
System.err.println(e.getMessage());
|
118
|
}
|
119
|
|
120
|
}
|
121
|
}
|
122
|
|
123
|
/**
|
124
|
* Returns true if the parsed query contains and extended xml query
|
125
|
* (i.e. there is at least one <returnfield> in the pathquery document)
|
126
|
*/
|
127
|
public boolean containsExtendedSQL()
|
128
|
{
|
129
|
if(containsExtendedSQL)
|
130
|
{
|
131
|
return true;
|
132
|
}
|
133
|
else
|
134
|
{
|
135
|
return false;
|
136
|
}
|
137
|
}
|
138
|
|
139
|
/**
|
140
|
* Accessor method to return a vector of the extended return fields as
|
141
|
* defined in the <returnfield> tag in the pathquery dtd.
|
142
|
*/
|
143
|
public Vector getReturnFieldList()
|
144
|
{
|
145
|
return this.returnFieldList;
|
146
|
}
|
147
|
|
148
|
/**
|
149
|
* Set up the SAX parser for reading the XML serialized query
|
150
|
*/
|
151
|
private XMLReader initializeParser() {
|
152
|
XMLReader parser = null;
|
153
|
|
154
|
// Set up the SAX document handlers for parsing
|
155
|
try {
|
156
|
|
157
|
// Get an instance of the parser
|
158
|
parser = XMLReaderFactory.createXMLReader(parserName);
|
159
|
|
160
|
// Set the ContentHandler to this instance
|
161
|
parser.setContentHandler(this);
|
162
|
|
163
|
// Set the error Handler to this instance
|
164
|
parser.setErrorHandler(this);
|
165
|
|
166
|
} catch (Exception e) {
|
167
|
System.err.println(e.toString());
|
168
|
}
|
169
|
|
170
|
return parser;
|
171
|
}
|
172
|
|
173
|
/**
|
174
|
* callback method used by the SAX Parser when the start tag of an
|
175
|
* element is detected. Used in this context to parse and store
|
176
|
* the query information in class variables.
|
177
|
*/
|
178
|
public void startElement (String uri, String localName,
|
179
|
String qName, Attributes atts)
|
180
|
throws SAXException {
|
181
|
BasicNode currentNode = new BasicNode(localName);
|
182
|
// add attributes to BasicNode here
|
183
|
if (atts != null) {
|
184
|
int len = atts.getLength();
|
185
|
for (int i = 0; i < len; i++) {
|
186
|
currentNode.setAttribute(atts.getLocalName(i), atts.getValue(i));
|
187
|
}
|
188
|
}
|
189
|
|
190
|
elementStack.push(currentNode);
|
191
|
if (currentNode.getTagName().equals("querygroup")) {
|
192
|
QueryGroup currentGroup = new QueryGroup(
|
193
|
currentNode.getAttribute("operator"));
|
194
|
if (query == null) {
|
195
|
query = currentGroup;
|
196
|
} else {
|
197
|
QueryGroup parentGroup = (QueryGroup)queryStack.peek();
|
198
|
parentGroup.addChild(currentGroup);
|
199
|
}
|
200
|
queryStack.push(currentGroup);
|
201
|
}
|
202
|
}
|
203
|
|
204
|
/**
|
205
|
* callback method used by the SAX Parser when the end tag of an
|
206
|
* element is detected. Used in this context to parse and store
|
207
|
* the query information in class variables.
|
208
|
*/
|
209
|
public void endElement (String uri, String localName,
|
210
|
String qName) throws SAXException {
|
211
|
BasicNode leaving = (BasicNode)elementStack.pop();
|
212
|
if (leaving.getTagName().equals("queryterm")) {
|
213
|
boolean isCaseSensitive = (new Boolean(
|
214
|
leaving.getAttribute("casesensitive"))).booleanValue();
|
215
|
QueryTerm currentTerm = null;
|
216
|
if (currentPathexpr == null) {
|
217
|
currentTerm = new QueryTerm(isCaseSensitive,
|
218
|
leaving.getAttribute("searchmode"),currentValue);
|
219
|
} else {
|
220
|
currentTerm = new QueryTerm(isCaseSensitive,
|
221
|
leaving.getAttribute("searchmode"),currentValue,
|
222
|
currentPathexpr);
|
223
|
}
|
224
|
QueryGroup currentGroup = (QueryGroup)queryStack.peek();
|
225
|
currentGroup.addChild(currentTerm);
|
226
|
currentValue = null;
|
227
|
currentPathexpr = null;
|
228
|
} else if (leaving.getTagName().equals("querygroup")) {
|
229
|
QueryGroup leavingGroup = (QueryGroup)queryStack.pop();
|
230
|
}
|
231
|
}
|
232
|
|
233
|
/**
|
234
|
* callback method used by the SAX Parser when the text sequences of an
|
235
|
* xml stream are detected. Used in this context to parse and store
|
236
|
* the query information in class variables.
|
237
|
*/
|
238
|
public void characters(char ch[], int start, int length) {
|
239
|
|
240
|
String inputString = new String(ch, start, length);
|
241
|
BasicNode currentNode = (BasicNode)elementStack.peek();
|
242
|
String currentTag = currentNode.getTagName();
|
243
|
if (currentTag.equals("meta_file_id")) {
|
244
|
meta_file_id = inputString;
|
245
|
} else if (currentTag.equals("querytitle")) {
|
246
|
querytitle = inputString;
|
247
|
} else if (currentTag.equals("value")) {
|
248
|
currentValue = inputString;
|
249
|
} else if (currentTag.equals("pathexpr")) {
|
250
|
currentPathexpr = inputString;
|
251
|
} else if (currentTag.equals("returndoctype")) {
|
252
|
doctypeList.add(inputString);
|
253
|
} else if (currentTag.equals("returnfield")) {
|
254
|
returnFieldList.add(inputString);
|
255
|
containsExtendedSQL = true;
|
256
|
}
|
257
|
}
|
258
|
|
259
|
|
260
|
/**
|
261
|
* create a SQL serialization of the query that this instance represents
|
262
|
*/
|
263
|
public String printSQL() {
|
264
|
StringBuffer self = new StringBuffer();
|
265
|
|
266
|
self.append("SELECT docid,docname,doctype,doctitle,");
|
267
|
self.append("date_created, date_updated ");
|
268
|
self.append("FROM xml_documents WHERE docid IN (");
|
269
|
|
270
|
// This determines the documents that meet the query conditions
|
271
|
self.append(query.printSQL());
|
272
|
|
273
|
self.append(") ");
|
274
|
|
275
|
// Add SQL to filter for doctypes requested in the query
|
276
|
if (!doctypeList.isEmpty()) {
|
277
|
boolean firstdoctype = true;
|
278
|
self.append(" AND (");
|
279
|
Enumeration en = doctypeList.elements();
|
280
|
while (en.hasMoreElements()) {
|
281
|
String currentDoctype = (String)en.nextElement();
|
282
|
if (firstdoctype) {
|
283
|
firstdoctype = false;
|
284
|
self.append(" doctype = '" + currentDoctype + "'");
|
285
|
} else {
|
286
|
self.append(" OR doctype = '" + currentDoctype + "'");
|
287
|
}
|
288
|
}
|
289
|
self.append(") ");
|
290
|
}
|
291
|
|
292
|
return self.toString();
|
293
|
}
|
294
|
|
295
|
/**
|
296
|
* This method prints sql based upon the <returnfield> tag in the
|
297
|
* pathquery document. This allows for customization of the
|
298
|
* returned fields
|
299
|
* The parameters of the query are changed to upper case before the query
|
300
|
* so that givenName is queryied the same as givenname.
|
301
|
*/
|
302
|
public String printExtendedSQL()
|
303
|
{
|
304
|
StringBuffer self = new StringBuffer();
|
305
|
self.append("select xml_nodes.docid, xml_index.path, xml_nodes.nodedata ");
|
306
|
self.append("from xml_index, xml_nodes where xml_index.nodeid=");
|
307
|
self.append("xml_nodes.parentnodeid and (xml_index.path like '");
|
308
|
boolean firstfield = true;
|
309
|
//put the returnfields into the query
|
310
|
//the for loop allows for multiple fields
|
311
|
for(int i=0; i<returnFieldList.size(); i++)
|
312
|
{
|
313
|
if(firstfield)
|
314
|
{
|
315
|
firstfield = false;
|
316
|
self.append((String)returnFieldList.elementAt(i));
|
317
|
self.append("' ");
|
318
|
}
|
319
|
else
|
320
|
{
|
321
|
self.append("or xml_index.path like '");
|
322
|
self.append((String)returnFieldList.elementAt(i));
|
323
|
self.append("' ");
|
324
|
}
|
325
|
}
|
326
|
self.append(") AND xml_nodes.docid in (");
|
327
|
self.append(query.printSQL());
|
328
|
self.append(")");
|
329
|
self.append(" AND xml_nodes.nodetype = 'TEXT'");
|
330
|
|
331
|
//System.out.println(self.toString());
|
332
|
return self.toString();
|
333
|
}
|
334
|
|
335
|
/**
|
336
|
* create a String description of the query that this instance represents.
|
337
|
* This should become a way to get the XML serialization of the query.
|
338
|
*/
|
339
|
public String toString() {
|
340
|
return "meta_file_id=" + meta_file_id + "\n" +
|
341
|
"querytitle=" + querytitle + "\n" + query;
|
342
|
}
|
343
|
|
344
|
/** a utility class that represents a group of terms in a query */
|
345
|
private class QueryGroup {
|
346
|
private String operator = null; // indicates how query terms are combined
|
347
|
private Vector children = null; // the list of query terms and groups
|
348
|
|
349
|
/**
|
350
|
* construct a new QueryGroup
|
351
|
*
|
352
|
* @param operator the boolean conector used to connect query terms
|
353
|
* in this query group
|
354
|
*/
|
355
|
public QueryGroup(String operator) {
|
356
|
this.operator = operator;
|
357
|
children = new Vector();
|
358
|
}
|
359
|
|
360
|
/**
|
361
|
* Add a child QueryGroup to this QueryGroup
|
362
|
*
|
363
|
* @param qgroup the query group to be added to the list of terms
|
364
|
*/
|
365
|
public void addChild(QueryGroup qgroup) {
|
366
|
children.add((Object)qgroup);
|
367
|
}
|
368
|
|
369
|
/**
|
370
|
* Add a child QueryTerm to this QueryGroup
|
371
|
*
|
372
|
* @param qterm the query term to be added to the list of terms
|
373
|
*/
|
374
|
public void addChild(QueryTerm qterm) {
|
375
|
children.add((Object)qterm);
|
376
|
}
|
377
|
|
378
|
/**
|
379
|
* Retrieve an Enumeration of query terms for this QueryGroup
|
380
|
*/
|
381
|
public Enumeration getChildren() {
|
382
|
return children.elements();
|
383
|
}
|
384
|
|
385
|
/**
|
386
|
* create a SQL serialization of the query that this instance represents
|
387
|
*/
|
388
|
public String printSQL() {
|
389
|
StringBuffer self = new StringBuffer();
|
390
|
boolean first = true;
|
391
|
|
392
|
self.append("(");
|
393
|
|
394
|
Enumeration en= getChildren();
|
395
|
while (en.hasMoreElements()) {
|
396
|
Object qobject = en.nextElement();
|
397
|
if (first) {
|
398
|
first = false;
|
399
|
} else {
|
400
|
self.append(" " + operator + " ");
|
401
|
}
|
402
|
if (qobject instanceof QueryGroup) {
|
403
|
QueryGroup qg = (QueryGroup)qobject;
|
404
|
self.append(qg.printSQL());
|
405
|
} else if (qobject instanceof QueryTerm) {
|
406
|
QueryTerm qt = (QueryTerm)qobject;
|
407
|
self.append(qt.printSQL());
|
408
|
} else {
|
409
|
System.err.println("qobject wrong type: fatal error");
|
410
|
}
|
411
|
}
|
412
|
self.append(") \n");
|
413
|
return self.toString();
|
414
|
}
|
415
|
|
416
|
/**
|
417
|
* create a String description of the query that this instance represents.
|
418
|
* This should become a way to get the XML serialization of the query.
|
419
|
*/
|
420
|
public String toString() {
|
421
|
StringBuffer self = new StringBuffer();
|
422
|
|
423
|
self.append(" (Query group operator=" + operator + "\n");
|
424
|
Enumeration en= getChildren();
|
425
|
while (en.hasMoreElements()) {
|
426
|
Object qobject = en.nextElement();
|
427
|
self.append(qobject);
|
428
|
}
|
429
|
self.append(" )\n");
|
430
|
return self.toString();
|
431
|
}
|
432
|
}
|
433
|
|
434
|
/** a utility class that represents a single term in a query */
|
435
|
private class QueryTerm {
|
436
|
private boolean casesensitive = false;
|
437
|
private String searchmode = null;
|
438
|
private String value = null;
|
439
|
private String pathexpr = null;
|
440
|
|
441
|
/**
|
442
|
* Construct a new instance of a query term for a free text search
|
443
|
* (using the value only)
|
444
|
*
|
445
|
* @param casesensitive flag indicating whether case is used to match
|
446
|
* @param searchmode determines what kind of substring match is performed
|
447
|
* (one of starts-with|ends-with|contains|matches-exactly)
|
448
|
* @param value the text value to match
|
449
|
*/
|
450
|
public QueryTerm(boolean casesensitive, String searchmode,
|
451
|
String value) {
|
452
|
this.casesensitive = casesensitive;
|
453
|
this.searchmode = searchmode;
|
454
|
this.value = value;
|
455
|
}
|
456
|
|
457
|
/**
|
458
|
* Construct a new instance of a query term for a structured search
|
459
|
* (matching the value only for those nodes in the pathexpr)
|
460
|
*
|
461
|
* @param casesensitive flag indicating whether case is used to match
|
462
|
* @param searchmode determines what kind of substring match is performed
|
463
|
* (one of starts-with|ends-with|contains|matches-exactly)
|
464
|
* @param value the text value to match
|
465
|
* @param pathexpr the hierarchical path to the nodes to be searched
|
466
|
*/
|
467
|
public QueryTerm(boolean casesensitive, String searchmode,
|
468
|
String value, String pathexpr) {
|
469
|
this(casesensitive, searchmode, value);
|
470
|
this.pathexpr = pathexpr;
|
471
|
}
|
472
|
|
473
|
/** determine if the QueryTerm is case sensitive */
|
474
|
public boolean isCaseSensitive() {
|
475
|
return casesensitive;
|
476
|
}
|
477
|
|
478
|
/** get the searchmode parameter */
|
479
|
public String getSearchMode() {
|
480
|
return searchmode;
|
481
|
}
|
482
|
|
483
|
/** get the Value parameter */
|
484
|
public String getValue() {
|
485
|
return value;
|
486
|
}
|
487
|
|
488
|
/** get the path expression parameter */
|
489
|
public String getPathExpression() {
|
490
|
return pathexpr;
|
491
|
}
|
492
|
|
493
|
/**
|
494
|
* create a SQL serialization of the query that this instance represents
|
495
|
*/
|
496
|
public String printSQL() {
|
497
|
StringBuffer self = new StringBuffer();
|
498
|
|
499
|
// Uppercase the search string if case match is not important
|
500
|
String casevalue = null;
|
501
|
String nodedataterm = null;
|
502
|
|
503
|
if (casesensitive) {
|
504
|
nodedataterm = "nodedata";
|
505
|
casevalue = value;
|
506
|
} else {
|
507
|
nodedataterm = "UPPER(nodedata)";
|
508
|
casevalue = value.toUpperCase();
|
509
|
}
|
510
|
|
511
|
// Add appropriate wildcards to search string
|
512
|
String searchvalue = null;
|
513
|
if (searchmode.equals("starts-with")) {
|
514
|
searchvalue = casevalue + "%";
|
515
|
} else if (searchmode.equals("ends-with")) {
|
516
|
searchvalue = "%" + casevalue;
|
517
|
} else if (searchmode.equals("contains")) {
|
518
|
searchvalue = "%" + casevalue + "%";
|
519
|
} else {
|
520
|
searchvalue = casevalue;
|
521
|
}
|
522
|
|
523
|
self.append("SELECT DISTINCT docid FROM xml_nodes WHERE \n");
|
524
|
|
525
|
if (pathexpr != null) {
|
526
|
self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
|
527
|
self.append("AND parentnodeid IN ");
|
528
|
self.append("(SELECT nodeid FROM xml_index WHERE path LIKE " +
|
529
|
"'" + pathexpr + "') " );
|
530
|
} else {
|
531
|
self.append(nodedataterm + " LIKE " + "'" + searchvalue + "' ");
|
532
|
}
|
533
|
|
534
|
return self.toString();
|
535
|
}
|
536
|
|
537
|
/**
|
538
|
* create a String description of the query that this instance represents.
|
539
|
* This should become a way to get the XML serialization of the query.
|
540
|
*/
|
541
|
public String toString() {
|
542
|
StringBuffer self = new StringBuffer();
|
543
|
|
544
|
self.append(" Query Term iscasesensitive=" + casesensitive + "\n");
|
545
|
self.append(" searchmode=" + searchmode + "\n");
|
546
|
self.append(" value=" + value + "\n");
|
547
|
if (pathexpr != null) {
|
548
|
self.append(" pathexpr=" + pathexpr + "\n");
|
549
|
}
|
550
|
|
551
|
return self.toString();
|
552
|
}
|
553
|
}
|
554
|
}
|
555
|
|
556
|
/**
|
557
|
* '$Log$
|
558
|
* 'Revision 1.13 2000/08/23 22:55:38 berkley
|
559
|
* 'changed the field names to be case-sensitive in the returnfields
|
560
|
* '
|
561
|
* 'Revision 1.12 2000/08/23 17:29:05 berkley
|
562
|
* 'added support for the returnfield parameter
|
563
|
* '-QuerySpecification now sets a flag (containsExtendedSQL) when there are returnfield items in the pathquery document.
|
564
|
* 'the accessor method containsExtendedSQL() can be called by other classes to check for extended return parameters
|
565
|
* '-getReturnFields returns a Vector of the names of each specified return field.
|
566
|
* '-printExtendedSQL returns a string of the extra SQL statements required for the query.
|
567
|
* '
|
568
|
* '-a calling class should first check containsExtendedSQL to make sure that there are extra fields being returned, then call printExtendedSQL to
|
569
|
* 'insert the extra SQL into the query. (Note that this is how DBQuery implements this.)
|
570
|
* '
|
571
|
* 'Revision 1.11 2000/08/14 20:53:34 jones
|
572
|
* 'Added "release" keyword to all metacat source files so that the release
|
573
|
* 'number will be evident in software distributions.
|
574
|
* '
|
575
|
* 'Revision 1.10 2000/06/26 10:35:05 jones
|
576
|
* 'Merged in substantial changes to DBWriter and associated classes and to
|
577
|
* 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
|
578
|
* 'functions. The command line tools and the parameters for the
|
579
|
* 'servlet have changed substantially.
|
580
|
* '
|
581
|
* 'Revision 1.9.2.3 2000/06/25 23:38:17 jones
|
582
|
* 'Added RCSfile keyword
|
583
|
* '
|
584
|
* 'Revision 1.9.2.2 2000/06/25 23:34:18 jones
|
585
|
* 'Changed documentation formatting, added log entries at bottom of source files
|
586
|
* ''
|
587
|
*/
|