1
|
/**
|
2
|
* '$RCSfile$'
|
3
|
* Purpose: A Class that represents a structured query, and can be
|
4
|
* constructed from an XML serialization conforming to
|
5
|
* pathquery.dtd. The printSQL() method can be used to print
|
6
|
* a SQL serialization of the query.
|
7
|
* Copyright: 2000 Regents of the University of California and the
|
8
|
* National Center for Ecological Analysis and Synthesis
|
9
|
* Authors: Matt Jones
|
10
|
*
|
11
|
* '$Author: tao $'
|
12
|
* '$Date: 2007-07-26 18:25:03 -0700 (Thu, 26 Jul 2007) $'
|
13
|
* '$Revision: 3329 $'
|
14
|
*
|
15
|
* This program is free software; you can redistribute it and/or modify
|
16
|
* it under the terms of the GNU General Public License as published by
|
17
|
* the Free Software Foundation; either version 2 of the License, or
|
18
|
* (at your option) any later version.
|
19
|
*
|
20
|
* This program is distributed in the hope that it will be useful,
|
21
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
22
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
23
|
* GNU General Public License for more details.
|
24
|
*
|
25
|
* You should have received a copy of the GNU General Public License
|
26
|
* along with this program; if not, write to the Free Software
|
27
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
28
|
*/
|
29
|
|
30
|
package edu.ucsb.nceas.metacat;
|
31
|
|
32
|
import java.util.Vector;
|
33
|
import org.apache.log4j.Logger;
|
34
|
|
35
|
/** a utility class that represents a single term in a query */
|
36
|
public class QueryTerm
|
37
|
{
|
38
|
private static Logger log = Logger.getLogger(QueryTerm.class);
|
39
|
|
40
|
private boolean casesensitive = false;
|
41
|
|
42
|
private String searchmode = null;
|
43
|
|
44
|
private String value = null;
|
45
|
|
46
|
private String pathexpr = null;
|
47
|
|
48
|
private boolean percentageSymbol = false;
|
49
|
|
50
|
private int countPercentageSearchItem = 0;
|
51
|
|
52
|
private boolean inUnionGroup = false;
|
53
|
|
54
|
public static final String CONTAINS = "contains";
|
55
|
|
56
|
public static final String EQUALS = "equals";
|
57
|
|
58
|
/**
|
59
|
* Construct a new instance of a query term for a free text search (using
|
60
|
* the value only)
|
61
|
*
|
62
|
* @param casesensitive
|
63
|
* flag indicating whether case is used to match
|
64
|
* @param searchmode
|
65
|
* determines what kind of substring match is performed (one of
|
66
|
* starts-with|ends-with|contains|matches-exactly)
|
67
|
* @param value
|
68
|
* the text value to match
|
69
|
*/
|
70
|
public QueryTerm(boolean casesensitive, String searchmode, String value)
|
71
|
{
|
72
|
this.casesensitive = casesensitive;
|
73
|
this.searchmode = searchmode;
|
74
|
this.value = value;
|
75
|
}
|
76
|
|
77
|
/**
|
78
|
* Construct a new instance of a query term for a structured search
|
79
|
* (matching the value only for those nodes in the pathexpr)
|
80
|
*
|
81
|
* @param casesensitive
|
82
|
* flag indicating whether case is used to match
|
83
|
* @param searchmode
|
84
|
* determines what kind of substring match is performed (one of
|
85
|
* starts-with|ends-with|contains|matches-exactly)
|
86
|
* @param value
|
87
|
* the text value to match
|
88
|
* @param pathexpr
|
89
|
* the hierarchical path to the nodes to be searched
|
90
|
*/
|
91
|
public QueryTerm(boolean casesensitive, String searchmode, String value,
|
92
|
String pathexpr)
|
93
|
{
|
94
|
this(casesensitive, searchmode, value);
|
95
|
this.pathexpr = pathexpr;
|
96
|
}
|
97
|
|
98
|
/** determine if the QueryTerm is case sensitive */
|
99
|
public boolean isCaseSensitive()
|
100
|
{
|
101
|
return casesensitive;
|
102
|
}
|
103
|
|
104
|
/** get the searchmode parameter */
|
105
|
public String getSearchMode()
|
106
|
{
|
107
|
return searchmode;
|
108
|
}
|
109
|
|
110
|
/** get the Value parameter */
|
111
|
public String getValue()
|
112
|
{
|
113
|
return value;
|
114
|
}
|
115
|
|
116
|
/** get the path expression parameter */
|
117
|
public String getPathExpression()
|
118
|
{
|
119
|
return pathexpr;
|
120
|
}
|
121
|
|
122
|
/** get the percentage count for one query term */
|
123
|
public int getPercentageSymbolCount()
|
124
|
{
|
125
|
return countPercentageSearchItem;
|
126
|
}
|
127
|
|
128
|
/**
|
129
|
* Set the query term in a union group
|
130
|
* @param inUnionGroup
|
131
|
*/
|
132
|
public void setInUnionGroup (boolean inUnionGroup)
|
133
|
{
|
134
|
this.inUnionGroup = inUnionGroup;
|
135
|
}
|
136
|
|
137
|
/**
|
138
|
* If this query group in Union group
|
139
|
* @return
|
140
|
*/
|
141
|
public boolean isInUnionGroup()
|
142
|
{
|
143
|
return this.inUnionGroup;
|
144
|
}
|
145
|
|
146
|
/**
|
147
|
* create a SQL serialization of the query that this instance represents
|
148
|
*/
|
149
|
public String printSQL(boolean useXMLIndex)
|
150
|
{
|
151
|
StringBuffer self = new StringBuffer();
|
152
|
|
153
|
// Uppercase the search string if case match is not important
|
154
|
String casevalue = null;
|
155
|
String nodedataterm = null;
|
156
|
boolean notEqual = false;
|
157
|
if (casesensitive) {
|
158
|
nodedataterm = "nodedata";
|
159
|
casevalue = value;
|
160
|
} else {
|
161
|
nodedataterm = "UPPER(nodedata)";
|
162
|
casevalue = value.toUpperCase();
|
163
|
}
|
164
|
|
165
|
// Add appropriate wildcards to search string
|
166
|
String searchexpr = null;
|
167
|
if (searchmode.equals("starts-with")) {
|
168
|
searchexpr = nodedataterm + " LIKE '" + casevalue + "%' ";
|
169
|
} else if (searchmode.equals("ends-with")) {
|
170
|
searchexpr = nodedataterm + " LIKE '%" + casevalue + "' ";
|
171
|
} else if (searchmode.equals("contains")) {
|
172
|
if (!casevalue.equals("%")) {
|
173
|
searchexpr = nodedataterm + " LIKE '%" + casevalue + "%' ";
|
174
|
} else {
|
175
|
searchexpr = nodedataterm + " LIKE '" + casevalue + "' ";
|
176
|
// find percentage symbol
|
177
|
percentageSymbol = true;
|
178
|
}
|
179
|
} else if (searchmode.equals("not-contains")) {
|
180
|
notEqual = true;
|
181
|
searchexpr = nodedataterm + " LIKE '%" + casevalue + "%' ";
|
182
|
} else if (searchmode.equals("equals")) {
|
183
|
searchexpr = nodedataterm + " = '" + casevalue + "' ";
|
184
|
} else if (searchmode.equals("isnot-equal")) {
|
185
|
notEqual = true;
|
186
|
searchexpr = nodedataterm + " = '" + casevalue + "' ";
|
187
|
} else {
|
188
|
String oper = null;
|
189
|
if (searchmode.equals("greater-than")) {
|
190
|
oper = ">";
|
191
|
nodedataterm = "nodedatanumerical";
|
192
|
} else if (searchmode.equals("greater-than-equals")) {
|
193
|
oper = ">=";
|
194
|
nodedataterm = "nodedatanumerical";
|
195
|
} else if (searchmode.equals("less-than")) {
|
196
|
oper = "<";
|
197
|
nodedataterm = "nodedatanumerical";
|
198
|
} else if (searchmode.equals("less-than-equals")) {
|
199
|
oper = "<=";
|
200
|
nodedataterm = "nodedatanumerical";
|
201
|
} else {
|
202
|
System.out
|
203
|
.println("NOT expected case. NOT recognized operator: "
|
204
|
+ searchmode);
|
205
|
return null;
|
206
|
}
|
207
|
try {
|
208
|
// it is number; numeric comparison
|
209
|
// but we need to make sure there is no string in node data
|
210
|
searchexpr = nodedataterm + " " + oper + " "
|
211
|
+ new Double(casevalue) + " ";
|
212
|
} catch (NumberFormatException nfe) {
|
213
|
// these are characters; character comparison
|
214
|
searchexpr = nodedataterm + " " + oper + " '" + casevalue
|
215
|
+ "' ";
|
216
|
}
|
217
|
}
|
218
|
|
219
|
|
220
|
// to check xml_path_index can be used
|
221
|
boolean usePathIndex = false;
|
222
|
|
223
|
// if pathexpr has been specified in metacat.properties for indexing
|
224
|
if(pathexpr != null){
|
225
|
if(MetaCatUtil.pathsForIndexing.contains(pathexpr)){
|
226
|
usePathIndex = true;
|
227
|
}
|
228
|
}
|
229
|
|
230
|
if(usePathIndex){
|
231
|
// using xml_path_index table.....
|
232
|
if(notEqual == true ){
|
233
|
if (!inUnionGroup)
|
234
|
{
|
235
|
self.append("SELECT DISTINCT docid from xml_path_index WHERE");
|
236
|
self.append(" docid NOT IN (Select docid FROM xml_path_index WHERE ");
|
237
|
self.append(searchexpr);
|
238
|
self.append("AND path LIKE '" + pathexpr + "') ");
|
239
|
}
|
240
|
else
|
241
|
{
|
242
|
//if this is in union group we need to use "OR" to modify query
|
243
|
self.append("("+searchexpr);
|
244
|
self.append("AND path LIKE '" + pathexpr + "') ");
|
245
|
}
|
246
|
} else {
|
247
|
if (!inUnionGroup)
|
248
|
{
|
249
|
self.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
|
250
|
self.append(searchexpr);
|
251
|
self.append("AND path LIKE '" + pathexpr + "' ");
|
252
|
}
|
253
|
else
|
254
|
{
|
255
|
//if this is in union group we need to use "OR" to modify query
|
256
|
self.append("("+searchexpr);
|
257
|
self.append("AND path LIKE '" + pathexpr + "') ");
|
258
|
}
|
259
|
}
|
260
|
|
261
|
} else {
|
262
|
// using xml_nodes and xml_index tables
|
263
|
|
264
|
if(notEqual == true){
|
265
|
self.append("SELECT DISTINCT docid from xml_nodes WHERE");
|
266
|
self.append(" docid NOT IN (Select docid FROM xml_nodes WHERE ");
|
267
|
} else {
|
268
|
self.append("(SELECT DISTINCT docid FROM xml_nodes WHERE ");
|
269
|
}
|
270
|
self.append(searchexpr);
|
271
|
|
272
|
if (pathexpr != null) {
|
273
|
|
274
|
// use XML Index
|
275
|
if (useXMLIndex) {
|
276
|
if (!hasAttributeInPath(pathexpr)) {
|
277
|
// without attributes in path
|
278
|
self.append("AND parentnodeid IN ");
|
279
|
} else {
|
280
|
// has a attribute in path
|
281
|
String attributeName = QuerySpecification
|
282
|
.getAttributeName(pathexpr);
|
283
|
self.append(
|
284
|
"AND nodetype LIKE 'ATTRIBUTE' AND nodename LIKE '"
|
285
|
+ attributeName + "' ");
|
286
|
// and the path expression includes element content other than
|
287
|
// just './' or '../'
|
288
|
if ( (!pathexpr.startsWith(QuerySpecification.
|
289
|
ATTRIBUTESYMBOL)) &&
|
290
|
(!pathexpr.startsWith("./" +
|
291
|
QuerySpecification.ATTRIBUTESYMBOL)) &&
|
292
|
(!pathexpr.startsWith("../" +
|
293
|
QuerySpecification.ATTRIBUTESYMBOL))) {
|
294
|
|
295
|
self.append("AND parentnodeid IN ");
|
296
|
pathexpr = QuerySpecification
|
297
|
.newPathExpressionWithOutAttribute(pathexpr);
|
298
|
}
|
299
|
}
|
300
|
self.append(
|
301
|
"(SELECT nodeid FROM xml_index WHERE path LIKE "
|
302
|
+ "'" + pathexpr + "') ");
|
303
|
}
|
304
|
else {
|
305
|
// without using XML Index; using nested statements instead
|
306
|
self.append("AND parentnodeid IN ");
|
307
|
self.append(useNestedStatements(pathexpr));
|
308
|
}
|
309
|
}
|
310
|
else if ( (value.trim()).equals("%")) {
|
311
|
//if pathexpr is null and search value is %, is a
|
312
|
// percentageSearchItem
|
313
|
// the count number will be increase one
|
314
|
countPercentageSearchItem++;
|
315
|
|
316
|
}
|
317
|
self.append(") ");
|
318
|
}
|
319
|
|
320
|
return self.toString();
|
321
|
}
|
322
|
|
323
|
/** A method to judge if a path have attribute */
|
324
|
private boolean hasAttributeInPath(String path)
|
325
|
{
|
326
|
if (path.indexOf(QuerySpecification.ATTRIBUTESYMBOL) != -1) {
|
327
|
return true;
|
328
|
} else {
|
329
|
return false;
|
330
|
}
|
331
|
}
|
332
|
|
333
|
|
334
|
public static String useNestedStatements(String pathexpr)
|
335
|
{
|
336
|
log.info("useNestedStatements()");
|
337
|
log.info("pathexpr: " + pathexpr);
|
338
|
StringBuffer nestedStmts = new StringBuffer();
|
339
|
String path = pathexpr.trim();
|
340
|
|
341
|
if (path.indexOf('/') == 0)
|
342
|
{
|
343
|
nestedStmts.append("AND parentnodeid = rootnodeid ");
|
344
|
path = path.substring(1).trim();
|
345
|
}
|
346
|
|
347
|
do
|
348
|
{
|
349
|
int inx = path.indexOf('/');
|
350
|
int predicateStart = -1;
|
351
|
int predicateEnd;
|
352
|
String node;
|
353
|
Vector predicates = new Vector();
|
354
|
|
355
|
// extract predicates
|
356
|
predicateStart = path.indexOf(QuerySpecification.PREDICATE_START, predicateStart + 1);
|
357
|
|
358
|
// any predicates in this node?
|
359
|
if (inx != -1 && (predicateStart == -1 || predicateStart > inx))
|
360
|
{
|
361
|
// no
|
362
|
node = path.substring(0, inx).trim();
|
363
|
path = path.substring(inx + 1).trim();
|
364
|
}
|
365
|
else if (predicateStart == -1)
|
366
|
{
|
367
|
// no and it's the last node
|
368
|
node = path;
|
369
|
path = "";
|
370
|
}
|
371
|
else
|
372
|
{
|
373
|
// yes
|
374
|
node = path.substring(0, predicateStart).trim();
|
375
|
path = path.substring(predicateStart);
|
376
|
predicateStart = 0;
|
377
|
|
378
|
while (predicateStart == 0)
|
379
|
{
|
380
|
predicateEnd = path.indexOf(QuerySpecification.PREDICATE_END,
|
381
|
predicateStart);
|
382
|
|
383
|
if (predicateEnd == -1)
|
384
|
{
|
385
|
log.warn("useNestedStatements(): ");
|
386
|
log.warn(" Invalid path: " + pathexpr);
|
387
|
return "";
|
388
|
}
|
389
|
|
390
|
predicates.add(path.substring(1, predicateEnd).trim());
|
391
|
path = path.substring(predicateEnd + 1).trim();
|
392
|
inx = path.indexOf('/');
|
393
|
predicateStart = path.indexOf(QuerySpecification.PREDICATE_START);
|
394
|
}
|
395
|
|
396
|
if (inx == 0)
|
397
|
path = path.substring(1).trim();
|
398
|
else if (!path.equals(""))
|
399
|
{
|
400
|
log.warn("useNestedStatements(): ");
|
401
|
log.warn(" Invalid path: " + pathexpr);
|
402
|
return "";
|
403
|
}
|
404
|
}
|
405
|
|
406
|
nestedStmts.insert(0, "' ").insert(0, node).insert(0,
|
407
|
"(SELECT nodeid FROM xml_nodes WHERE nodename LIKE '");
|
408
|
|
409
|
// for the last statement: it is without " AND parentnodeid IN "
|
410
|
if (!path.equals(""))
|
411
|
nestedStmts.insert(0, "AND parentnodeid IN ");
|
412
|
|
413
|
if (predicates.size() > 0)
|
414
|
{
|
415
|
for (int n = 0; n < predicates.size(); n++)
|
416
|
{
|
417
|
String predSQL = predicate2SQL((String) predicates.get(n));
|
418
|
|
419
|
if (predSQL.equals(""))
|
420
|
return "";
|
421
|
|
422
|
nestedStmts.append(predSQL).append(' ');
|
423
|
}
|
424
|
}
|
425
|
|
426
|
nestedStmts.append(") ");
|
427
|
}
|
428
|
while (!path.equals(""));
|
429
|
|
430
|
return nestedStmts.toString();
|
431
|
}
|
432
|
|
433
|
/**
|
434
|
*
|
435
|
*/
|
436
|
public static String predicate2SQL(String predicate)
|
437
|
{
|
438
|
String path = predicate.trim();
|
439
|
int equals = path.indexOf('=');
|
440
|
String literal = null;
|
441
|
|
442
|
if (equals != -1)
|
443
|
{
|
444
|
literal = path.substring(equals + 1).trim();
|
445
|
path = path.substring(0, equals).trim();
|
446
|
int sQuote = literal.indexOf('\'');
|
447
|
int dQuote = literal.indexOf('"');
|
448
|
|
449
|
if (sQuote == -1 && dQuote == -1)
|
450
|
{
|
451
|
log.warn("predicate2SQL(): ");
|
452
|
log.warn(" Invalid or unsupported predicate: " + predicate);
|
453
|
return "";
|
454
|
}
|
455
|
|
456
|
if (sQuote == -1 &&
|
457
|
(dQuote != 0 ||
|
458
|
literal.indexOf('"', dQuote + 1) != literal.length() - 1))
|
459
|
{
|
460
|
log.warn("predicate2SQL(): ");
|
461
|
log.warn(" Invalid or unsupported predicate: " + predicate);
|
462
|
return "";
|
463
|
}
|
464
|
|
465
|
if (sQuote != 0 ||
|
466
|
literal.indexOf('\'', sQuote + 1) != literal.length() - 1)
|
467
|
{
|
468
|
log.warn("predicate2SQL(): ");
|
469
|
log.warn(" Invalid or unsupported predicate: " + predicate);
|
470
|
return "";
|
471
|
}
|
472
|
}
|
473
|
|
474
|
StringBuffer sql = new StringBuffer();
|
475
|
int attribute = path.indexOf('@');
|
476
|
|
477
|
if (attribute == -1)
|
478
|
{
|
479
|
if (literal != null)
|
480
|
{
|
481
|
sql.append("AND nodeid IN (SELECT parentnodeid FROM xml_nodes WHERE nodetype = 'TEXT' AND nodedata LIKE ")
|
482
|
.append(literal).append(")");
|
483
|
}
|
484
|
}
|
485
|
else
|
486
|
{
|
487
|
sql.append(
|
488
|
"AND nodeid IN (SELECT parentnodeid FROM xml_nodes WHERE nodetype = 'ATTRIBUTE' AND nodename LIKE '")
|
489
|
.append(path.substring(attribute + 1).trim()).append("' ");
|
490
|
|
491
|
if (literal != null)
|
492
|
{
|
493
|
sql.append("AND nodedata LIKE ").append(literal);
|
494
|
}
|
495
|
|
496
|
sql.append(")");
|
497
|
path = path.substring(0, attribute).trim();
|
498
|
|
499
|
if (path.endsWith("/"))
|
500
|
path = path.substring(0, path.length() - 1).trim();
|
501
|
else
|
502
|
{
|
503
|
if (!path.equals(""))
|
504
|
{
|
505
|
log.warn("predicate2SQL(): ");
|
506
|
log.warn(" Invalid or unsupported predicate: " + predicate);
|
507
|
return "";
|
508
|
}
|
509
|
}
|
510
|
}
|
511
|
|
512
|
while (!path.equals(""))
|
513
|
{
|
514
|
int ndx = path.lastIndexOf('/');
|
515
|
int predicateEnd = -1;
|
516
|
int predicateStart;
|
517
|
String node;
|
518
|
|
519
|
if (ndx != -1)
|
520
|
{
|
521
|
node = path.substring(ndx + 1).trim();
|
522
|
path = path.substring(0, ndx).trim();
|
523
|
}
|
524
|
else
|
525
|
{
|
526
|
node = path;
|
527
|
path = "";
|
528
|
}
|
529
|
|
530
|
if (!node.equals(""))
|
531
|
sql.insert(0, "' ").insert(0, node)
|
532
|
.insert(0, "(SELECT parentnodeid FROM xml_nodes WHERE nodename LIKE '").append(") ");
|
533
|
else if (!path.equals(""))
|
534
|
{
|
535
|
log.warn("predicate2SQL(): ");
|
536
|
log.warn(" Invalid or unsupported predicate: " + predicate);
|
537
|
return "";
|
538
|
}
|
539
|
|
540
|
if (path.equals(""))
|
541
|
{
|
542
|
sql.insert(0,
|
543
|
node.equals("") ? "AND rootnodeid IN " : "AND nodeid IN ");
|
544
|
}
|
545
|
else
|
546
|
{
|
547
|
sql.append("AND nodeid IN ");
|
548
|
}
|
549
|
}
|
550
|
|
551
|
return sql.toString();
|
552
|
}
|
553
|
|
554
|
/**
|
555
|
* create a String description of the query that this instance represents.
|
556
|
* This should become a way to get the XML serialization of the query.
|
557
|
*/
|
558
|
public String toString()
|
559
|
{
|
560
|
|
561
|
return this.printSQL(true);
|
562
|
}
|
563
|
|
564
|
/**
|
565
|
* Compare two query terms to see if they have same search value.
|
566
|
* @param term
|
567
|
* @return
|
568
|
*/
|
569
|
public boolean hasSameSearchValue(QueryTerm term)
|
570
|
{
|
571
|
boolean same = false;
|
572
|
if (term != null)
|
573
|
{
|
574
|
String searchValue = term.getValue();
|
575
|
if (searchValue != null && this.value != null)
|
576
|
{
|
577
|
if (searchValue.equalsIgnoreCase(this.value))
|
578
|
{
|
579
|
same = true;
|
580
|
}
|
581
|
}
|
582
|
}
|
583
|
return same;
|
584
|
}
|
585
|
}
|