Project

General

Profile

1 1831 tao
/**
2
 *  '$RCSfile$'
3 2357 sgarg
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6 1831 tao
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author$'
12
 *     '$Date$'
13
 * '$Revision$'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29
30
package edu.ucsb.nceas.metacat;
31
32 6020 leinfelder
import java.util.Calendar;
33 1831 tao
import java.util.Vector;
34 6020 leinfelder
35
import javax.xml.bind.DatatypeConverter;
36
37 3235 sledge
import org.apache.log4j.Logger;
38 1831 tao
39 5015 daigle
import edu.ucsb.nceas.metacat.shared.MetacatUtilException;
40 4812 daigle
import edu.ucsb.nceas.metacat.util.SystemUtil;
41 4080 daigle
42 1831 tao
/** a utility class that represents a single term in a query */
43 2068 jones
public class QueryTerm
44
{
45 3235 sledge
    private static Logger log = Logger.getLogger(QueryTerm.class);
46 2068 jones
47 1831 tao
    private boolean casesensitive = false;
48 2068 jones
49 1831 tao
    private String searchmode = null;
50 2068 jones
51 1831 tao
    private String value = null;
52 2068 jones
53 1831 tao
    private String pathexpr = null;
54 2068 jones
55 1831 tao
    private boolean percentageSymbol = false;
56 2068 jones
57 1831 tao
    private int countPercentageSearchItem = 0;
58 3238 tao
59 3311 tao
    private boolean inUnionGroup = false;
60
61 3238 tao
    public static final String CONTAINS = "contains";
62
63
    public static final String EQUALS = "equals";
64 1831 tao
65
    /**
66 2068 jones
     * Construct a new instance of a query term for a free text search (using
67
     * the value only)
68 2357 sgarg
     *
69 2068 jones
     * @param casesensitive
70
     *            flag indicating whether case is used to match
71
     * @param searchmode
72
     *            determines what kind of substring match is performed (one of
73
     *            starts-with|ends-with|contains|matches-exactly)
74
     * @param value
75
     *            the text value to match
76 1831 tao
     */
77 2068 jones
    public QueryTerm(boolean casesensitive, String searchmode, String value)
78
    {
79
        this.casesensitive = casesensitive;
80
        this.searchmode = searchmode;
81
        this.value = value;
82 1831 tao
    }
83
84
    /**
85
     * Construct a new instance of a query term for a structured search
86
     * (matching the value only for those nodes in the pathexpr)
87 2357 sgarg
     *
88 2068 jones
     * @param casesensitive
89
     *            flag indicating whether case is used to match
90
     * @param searchmode
91
     *            determines what kind of substring match is performed (one of
92
     *            starts-with|ends-with|contains|matches-exactly)
93
     * @param value
94
     *            the text value to match
95
     * @param pathexpr
96
     *            the hierarchical path to the nodes to be searched
97 1831 tao
     */
98 2068 jones
    public QueryTerm(boolean casesensitive, String searchmode, String value,
99
            String pathexpr)
100
    {
101
        this(casesensitive, searchmode, value);
102
        this.pathexpr = pathexpr;
103 1831 tao
    }
104
105
    /** determine if the QueryTerm is case sensitive */
106 2068 jones
    public boolean isCaseSensitive()
107
    {
108
        return casesensitive;
109 1831 tao
    }
110
111
    /** get the searchmode parameter */
112 2068 jones
    public String getSearchMode()
113
    {
114
        return searchmode;
115 1831 tao
    }
116 2068 jones
117 1831 tao
    /** get the Value parameter */
118 2068 jones
    public String getValue()
119
    {
120
        return value;
121 1831 tao
    }
122
123
    /** get the path expression parameter */
124 2068 jones
    public String getPathExpression()
125
    {
126
        return pathexpr;
127 1831 tao
    }
128 2068 jones
129
    /** get the percentage count for one query term */
130 1831 tao
    public int getPercentageSymbolCount()
131
    {
132 2068 jones
        return countPercentageSearchItem;
133 1831 tao
    }
134 3311 tao
135
    /**
136
     * Set the query term in a union group
137
     * @param inUnionGroup
138
     */
139
    public void setInUnionGroup (boolean inUnionGroup)
140
    {
141
    	this.inUnionGroup = inUnionGroup;
142
    }
143
144
    /**
145
     * If this query group in Union group
146
     * @return
147
     */
148
    public boolean isInUnionGroup()
149
    {
150
    	return this.inUnionGroup;
151
    }
152 1831 tao
153
    /**
154
     * create a SQL serialization of the query that this instance represents
155
     */
156 2068 jones
    public String printSQL(boolean useXMLIndex)
157
    {
158
        StringBuffer self = new StringBuffer();
159 1831 tao
160 2068 jones
        // Uppercase the search string if case match is not important
161
        String casevalue = null;
162
        String nodedataterm = null;
163 2654 sgarg
        boolean notEqual = false;
164 2068 jones
        if (casesensitive) {
165
            nodedataterm = "nodedata";
166
            casevalue = value;
167
        } else {
168
            nodedataterm = "UPPER(nodedata)";
169
            casevalue = value.toUpperCase();
170
        }
171 1831 tao
172 2068 jones
        // Add appropriate wildcards to search string
173
        String searchexpr = null;
174
        if (searchmode.equals("starts-with")) {
175 3053 jones
            searchexpr = nodedataterm + " LIKE '" + casevalue + "%' ";
176 2068 jones
        } else if (searchmode.equals("ends-with")) {
177 3053 jones
            searchexpr = nodedataterm + " LIKE '%" + casevalue + "' ";
178 2068 jones
        } else if (searchmode.equals("contains")) {
179
            if (!casevalue.equals("%")) {
180 3053 jones
                searchexpr = nodedataterm + " LIKE '%" + casevalue + "%' ";
181 2068 jones
            } else {
182 3053 jones
                searchexpr = nodedataterm + " LIKE '" + casevalue + "' ";
183 2068 jones
                // find percentage symbol
184
                percentageSymbol = true;
185
            }
186
        } else if (searchmode.equals("not-contains")) {
187 2654 sgarg
        	notEqual = true;
188 3053 jones
            searchexpr = nodedataterm + " LIKE '%" + casevalue + "%' ";
189 2068 jones
        } else if (searchmode.equals("equals")) {
190
            searchexpr = nodedataterm + " = '" + casevalue + "' ";
191
        } else if (searchmode.equals("isnot-equal")) {
192 2654 sgarg
        	notEqual = true;
193
            searchexpr = nodedataterm + " = '" + casevalue + "' ";
194 1831 tao
        } else {
195 2068 jones
            String oper = null;
196
            if (searchmode.equals("greater-than")) {
197
                oper = ">";
198 2357 sgarg
                nodedataterm = "nodedatanumerical";
199 2068 jones
            } else if (searchmode.equals("greater-than-equals")) {
200
                oper = ">=";
201 2357 sgarg
                nodedataterm = "nodedatanumerical";
202 2068 jones
            } else if (searchmode.equals("less-than")) {
203
                oper = "<";
204 2357 sgarg
                nodedataterm = "nodedatanumerical";
205 2068 jones
            } else if (searchmode.equals("less-than-equals")) {
206
                oper = "<=";
207 2357 sgarg
                nodedataterm = "nodedatanumerical";
208 2068 jones
            } else {
209
                System.out
210
                        .println("NOT expected case. NOT recognized operator: "
211
                                + searchmode);
212
                return null;
213
            }
214 6012 leinfelder
215 6020 leinfelder
216 2068 jones
            try {
217 6020 leinfelder
                // it is number; numeric comparison
218
                searchexpr = nodedataterm + " " + oper + " " + new Double(casevalue) + " ";
219
            } catch (NumberFormatException nfe) {
220
            	// is it a date?
221
            	try {
222
                	// try ISO 8601 formats
223
                	Calendar dataDateValue = DatatypeConverter.parseDateTime(casevalue);
224
                	String lexicalString = DatatypeConverter.printDateTime(dataDateValue);
225
                    nodedataterm = "nodedatadate";
226
            		searchexpr =
227
            			nodedataterm + " " + oper + " '" + lexicalString + "' ";
228
            	} catch (Exception pe) {
229
            		// these are characters; character comparison
230
                    searchexpr = nodedataterm + " " + oper + " '" + casevalue + "' ";
231
            	}
232
            }
233 1831 tao
        }
234
235 2068 jones
236 2522 sgarg
        // to check xml_path_index can be used
237
        boolean usePathIndex = false;
238
239
        // if pathexpr has been specified in metacat.properties for indexing
240
        if(pathexpr != null){
241 4812 daigle
        	try {
242
				if (SystemUtil.getPathsForIndexing().contains(pathexpr)) {
243
					usePathIndex = true;
244
				}
245 4854 daigle
			} catch (MetacatUtilException ue) {
246 4812 daigle
				log.warn("Could not get index paths: " + ue.getMessage());
247
			}
248 2522 sgarg
        }
249 3786 tao
250 2522 sgarg
        if(usePathIndex){
251
            // using xml_path_index table.....
252 3329 tao
        	if(notEqual == true ){
253
        		if (!inUnionGroup)
254
        		{
255
        			self.append("SELECT DISTINCT docid from xml_path_index WHERE");
256
        			self.append(" docid NOT IN (Select docid FROM xml_path_index WHERE ");
257
        			self.append(searchexpr);
258
        			self.append("AND path LIKE '" + pathexpr + "') ");
259
        		}
260
        		else
261
        		{
262
        			//if this is in union group we need to use "OR" to modify query
263
        			self.append("("+searchexpr);
264
        			self.append("AND path LIKE '" + pathexpr + "') ");
265
        		}
266 2654 sgarg
        	} else {
267 3329 tao
        		if (!inUnionGroup)
268
        		{
269
        			self.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
270
        			self.append(searchexpr);
271
        			self.append("AND path LIKE '" + pathexpr + "' ");
272
        		}
273
        		else
274
        		{
275
        			//if this is in union group we need to use "OR" to modify query
276
        			self.append("("+searchexpr);
277
        			self.append("AND path LIKE '" + pathexpr + "') ");
278
        		}
279 2654 sgarg
        	}
280 2522 sgarg
281
        } else {
282
            // using xml_nodes and xml_index tables
283
284 2654 sgarg
        	if(notEqual == true){
285
        		self.append("SELECT DISTINCT docid from xml_nodes WHERE");
286
        		self.append(" docid NOT IN (Select docid FROM xml_nodes WHERE ");
287
        	} else {
288 2693 sgarg
        		self.append("(SELECT DISTINCT docid FROM xml_nodes WHERE ");
289 2654 sgarg
        	}
290
        	self.append(searchexpr);
291
292 2522 sgarg
            if (pathexpr != null) {
293 3786 tao
             String path = pathexpr;
294 2522 sgarg
                // use XML Index
295
                if (useXMLIndex) {
296
                    if (!hasAttributeInPath(pathexpr)) {
297
                        // without attributes in path
298
                        self.append("AND parentnodeid IN ");
299 5093 cjones
                        self.append(
300
                            "(SELECT nodeid FROM xml_index WHERE path LIKE "
301
                            + "'" + path + "') ");
302 2654 sgarg
                    } else {
303 2522 sgarg
                        // has a attribute in path
304
                        String attributeName = QuerySpecification
305 2068 jones
                            .getAttributeName(pathexpr);
306 2522 sgarg
                        self.append(
307
                            "AND nodetype LIKE 'ATTRIBUTE' AND nodename LIKE '"
308
                            + attributeName + "' ");
309
                        // and the path expression includes element content other than
310
                        // just './' or '../'
311
                        if ( (!pathexpr.startsWith(QuerySpecification.
312
                            ATTRIBUTESYMBOL)) &&
313
                            (!pathexpr.startsWith("./" +
314
                                                  QuerySpecification.ATTRIBUTESYMBOL)) &&
315
                            (!pathexpr.startsWith("../" +
316
                                                  QuerySpecification.ATTRIBUTESYMBOL))) {
317 2068 jones
318 2522 sgarg
                            self.append("AND parentnodeid IN ");
319 3786 tao
                            path = QuerySpecification
320 2522 sgarg
                                .newPathExpressionWithOutAttribute(pathexpr);
321 5093 cjones
                            self.append(
322
                                "(SELECT nodeid FROM xml_index WHERE path LIKE "
323
                                + "'" + path + "') ");
324 2522 sgarg
                        }
325 2459 cjones
                    }
326 2068 jones
                }
327 2522 sgarg
                else {
328
                    // without using XML Index; using nested statements instead
329 3771 tao
                    //self.append("AND parentnodeid IN ");
330
                	self.append("AND ");
331 2522 sgarg
                    self.append(useNestedStatements(pathexpr));
332
                }
333 2068 jones
            }
334 2522 sgarg
            else if ( (value.trim()).equals("%")) {
335
                //if pathexpr is null and search value is %, is a
336
                // percentageSearchItem
337
                // the count number will be increase one
338
                countPercentageSearchItem++;
339 2068 jones
340 2522 sgarg
            }
341 2654 sgarg
            self.append(") ");
342 1831 tao
        }
343
344 2068 jones
        return self.toString();
345 1831 tao
    }
346 2068 jones
347
    /** A method to judge if a path have attribute */
348 1831 tao
    private boolean hasAttributeInPath(String path)
349
    {
350 2068 jones
        if (path.indexOf(QuerySpecification.ATTRIBUTESYMBOL) != -1) {
351
            return true;
352
        } else {
353
            return false;
354
        }
355 1831 tao
    }
356 2068 jones
357 3311 tao
358 2069 jones
    public static String useNestedStatements(String pathexpr)
359 1831 tao
    {
360 3235 sledge
        log.info("useNestedStatements()");
361
        log.info("pathexpr: " + pathexpr);
362 3771 tao
        String elementPrefix = " parentnodeid IN ";
363
        String attributePrefix  =  " nodeid IN ";
364
        boolean lastOneIsAttribute = false;
365 3235 sledge
        StringBuffer nestedStmts = new StringBuffer();
366
        String path = pathexpr.trim();
367 3771 tao
        String sql = "";
368 1831 tao
369 3235 sledge
        if (path.indexOf('/') == 0)
370
        {
371
            nestedStmts.append("AND parentnodeid = rootnodeid ");
372
            path = path.substring(1).trim();
373
        }
374
375
        do
376
        {
377
            int inx = path.indexOf('/');
378
            int predicateStart = -1;
379
            int predicateEnd;
380
            String node;
381
            Vector predicates = new Vector();
382
383
            // extract predicates
384
            predicateStart = path.indexOf(QuerySpecification.PREDICATE_START, predicateStart + 1);
385
386
            // any predicates in this node?
387
            if (inx != -1 && (predicateStart == -1 || predicateStart > inx))
388
            {
389
                // no
390
                node = path.substring(0, inx).trim();
391
                path = path.substring(inx + 1).trim();
392
            }
393
            else if (predicateStart == -1)
394
            {
395
                // no and it's the last node
396
                node = path;
397 3771 tao
                if (node != null && node.indexOf(QuerySpecification.ATTRIBUTESYMBOL) != -1)
398
                {
399
                	lastOneIsAttribute = true;
400
                	node = removeAttributeSymbol(node);
401
                }
402 3235 sledge
                path = "";
403
            }
404
            else
405
            {
406
                // yes
407
                node = path.substring(0, predicateStart).trim();
408
                path = path.substring(predicateStart);
409
                predicateStart = 0;
410
411
                while (predicateStart == 0)
412
                {
413
                    predicateEnd = path.indexOf(QuerySpecification.PREDICATE_END,
414
                            predicateStart);
415
416
                    if (predicateEnd == -1)
417
                    {
418
                        log.warn("useNestedStatements(): ");
419
                        log.warn("    Invalid path: " + pathexpr);
420
                        return "";
421
                    }
422
423
                    predicates.add(path.substring(1, predicateEnd).trim());
424
                    path = path.substring(predicateEnd + 1).trim();
425
                    inx = path.indexOf('/');
426
                    predicateStart = path.indexOf(QuerySpecification.PREDICATE_START);
427
                }
428
429
                if (inx == 0)
430
                    path = path.substring(1).trim();
431
                else if (!path.equals(""))
432
                {
433
                    log.warn("useNestedStatements(): ");
434
                    log.warn("    Invalid path: " + pathexpr);
435
                    return "";
436
                }
437
            }
438
439
            nestedStmts.insert(0, "' ").insert(0, node).insert(0,
440
                    "(SELECT nodeid FROM xml_nodes WHERE nodename LIKE '");
441
442
            // for the last statement: it is without " AND parentnodeid IN "
443
            if (!path.equals(""))
444
                nestedStmts.insert(0, "AND parentnodeid IN ");
445
446
            if (predicates.size() > 0)
447
            {
448
                for (int n = 0; n < predicates.size(); n++)
449
                {
450
                    String predSQL = predicate2SQL((String) predicates.get(n));
451
452
                    if (predSQL.equals(""))
453
                        return "";
454
455
                    nestedStmts.append(predSQL).append(' ');
456
                }
457
            }
458
459
            nestedStmts.append(") ");
460
        }
461
        while (!path.equals(""));
462 3771 tao
        if (lastOneIsAttribute)
463
        {
464
        	sql = attributePrefix+nestedStmts.toString();
465
        }
466
        else
467
        {
468
        	sql = elementPrefix+nestedStmts.toString();
469
        }
470
        return sql;
471 3235 sledge
    }
472 3771 tao
473
474
    /*
475
     * Removes @ symbol from path. For example, if path is @entity, entity will be returned.
476
     * If path is entity, entity will be returned.
477
     */
478
    private static String removeAttributeSymbol(String path)
479
    {
480
    	String newPath  ="";
481
    	log.debug("Original string before removing @ is " + path);
482
    	if (path != null)
483
    	{
484
485
    		int attribute = path.indexOf(QuerySpecification.ATTRIBUTESYMBOL);
486
    		if (attribute != -1)
487
    		{
488
    			// has attribute symbol. Reomve it and return the remained part.
489
    			try
490
    			{
491
    		         newPath = path.substring(attribute + 1).trim();
492
    			}
493
    			catch (Exception e)
494
    			{
495
    				newPath = path;
496
    			}
497
    		}
498
    		else
499
    		{
500
    			// doesn't have attribute symbol. Return original string
501
    			newPath = path;
502
    		}
503
    	}
504
    	else
505
    	{
506
    		// if is null, return null;
507
    		newPath = path;
508
    	}
509
    	log.debug("String after removing @ is " + newPath);
510
    	return newPath;
511
512
    }
513 3235 sledge
514 1831 tao
    /**
515 3235 sledge
     *
516
     */
517
    public static String predicate2SQL(String predicate)
518
    {
519
        String path = predicate.trim();
520
        int equals = path.indexOf('=');
521
        String literal = null;
522
523
        if (equals != -1)
524
        {
525
            literal = path.substring(equals + 1).trim();
526
            path = path.substring(0, equals).trim();
527
            int sQuote = literal.indexOf('\'');
528
            int dQuote = literal.indexOf('"');
529
530
            if (sQuote == -1 && dQuote == -1)
531
            {
532
                log.warn("predicate2SQL(): ");
533
                log.warn("    Invalid or unsupported predicate: " + predicate);
534
                return "";
535
            }
536
537
            if (sQuote == -1 &&
538
                (dQuote != 0 ||
539
                 literal.indexOf('"', dQuote + 1) != literal.length() - 1))
540
            {
541
                log.warn("predicate2SQL(): ");
542
                log.warn("    Invalid or unsupported predicate: " + predicate);
543
                return "";
544
            }
545
546
            if (sQuote != 0 ||
547
                literal.indexOf('\'', sQuote + 1) != literal.length() - 1)
548
            {
549
                log.warn("predicate2SQL(): ");
550
                log.warn("    Invalid or unsupported predicate: " + predicate);
551
                return "";
552
            }
553
        }
554
555
        StringBuffer sql = new StringBuffer();
556
        int attribute = path.indexOf('@');
557
558
        if (attribute == -1)
559
        {
560
            if (literal != null)
561
            {
562
                sql.append("AND nodeid IN (SELECT parentnodeid FROM xml_nodes WHERE nodetype = 'TEXT' AND nodedata LIKE ")
563
                    .append(literal).append(")");
564
            }
565
        }
566
        else
567
        {
568
            sql.append(
569
                    "AND nodeid IN (SELECT parentnodeid FROM xml_nodes WHERE nodetype = 'ATTRIBUTE' AND nodename LIKE '")
570
                    .append(path.substring(attribute + 1).trim()).append("' ");
571
572
            if (literal != null)
573
            {
574
                sql.append("AND nodedata LIKE ").append(literal);
575
            }
576
577
            sql.append(")");
578
            path = path.substring(0, attribute).trim();
579
580
            if (path.endsWith("/"))
581
                path = path.substring(0, path.length() - 1).trim();
582
            else
583
            {
584
                if (!path.equals(""))
585
                {
586
                    log.warn("predicate2SQL(): ");
587
                    log.warn("    Invalid or unsupported predicate: " + predicate);
588
                    return "";
589
                }
590
            }
591
        }
592
593
        while (!path.equals(""))
594
        {
595
            int ndx = path.lastIndexOf('/');
596
            int predicateEnd = -1;
597
            int predicateStart;
598
            String node;
599
600
            if (ndx != -1)
601
            {
602
                node = path.substring(ndx + 1).trim();
603
                path = path.substring(0, ndx).trim();
604
            }
605
            else
606
            {
607
                node = path;
608
                path = "";
609
            }
610
611
            if (!node.equals(""))
612
                sql.insert(0, "' ").insert(0, node)
613
                    .insert(0, "(SELECT parentnodeid FROM xml_nodes WHERE nodename LIKE '").append(") ");
614
            else if (!path.equals(""))
615
            {
616
                log.warn("predicate2SQL(): ");
617
                log.warn("    Invalid or unsupported predicate: " + predicate);
618
                return "";
619
            }
620
621
            if (path.equals(""))
622
            {
623
                sql.insert(0,
624
                        node.equals("") ? "AND rootnodeid IN " : "AND nodeid IN ");
625
            }
626
            else
627
            {
628
                sql.append("AND nodeid IN ");
629
            }
630
        }
631
632
        return sql.toString();
633
    }
634
635
    /**
636 1831 tao
     * create a String description of the query that this instance represents.
637
     * This should become a way to get the XML serialization of the query.
638
     */
639 2068 jones
    public String toString()
640
    {
641 1831 tao
642 2068 jones
        return this.printSQL(true);
643 1831 tao
    }
644 3224 tao
645
    /**
646
     * Compare two query terms to see if they have same search value.
647
     * @param term
648
     * @return
649
     */
650
    public boolean hasSameSearchValue(QueryTerm term)
651
    {
652
    	boolean same = false;
653
    	if (term != null)
654
    	{
655
    		String searchValue = term.getValue();
656
    		if (searchValue != null && this.value != null)
657
    		{
658
    			if (searchValue.equalsIgnoreCase(this.value))
659
    			{
660
    				same = true;
661
    			}
662
    		}
663
    	}
664
    	return same;
665
    }
666 2068 jones
}