Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author: leinfelder $'
12
 *     '$Date: 2011-07-20 14:51:04 -0700 (Wed, 20 Jul 2011) $'
13
 * '$Revision: 6361 $'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29

    
30
package edu.ucsb.nceas.metacat;
31

    
32
import java.util.Calendar;
33
import java.util.Vector;
34

    
35
import javax.xml.bind.DatatypeConverter;
36

    
37
import org.apache.log4j.Logger;
38

    
39
import edu.ucsb.nceas.metacat.shared.MetacatUtilException;
40
import edu.ucsb.nceas.metacat.util.SystemUtil;
41

    
42
/** a utility class that represents a single term in a query */
43
public class QueryTerm
44
{
45
    private static Logger log = Logger.getLogger(QueryTerm.class);
46

    
47
    private boolean casesensitive = false;
48

    
49
    private String searchmode = null;
50

    
51
    private String value = null;
52

    
53
    private String pathexpr = null;
54

    
55
    private boolean percentageSymbol = false;
56

    
57
    private int countPercentageSearchItem = 0;
58
    
59
    private boolean inUnionGroup = false;
60
    
61
    public static final String CONTAINS = "contains";
62
    
63
    public static final String EQUALS = "equals";
64

    
65
    /**
66
     * Construct a new instance of a query term for a free text search (using
67
     * the value only)
68
     *
69
     * @param casesensitive
70
     *            flag indicating whether case is used to match
71
     * @param searchmode
72
     *            determines what kind of substring match is performed (one of
73
     *            starts-with|ends-with|contains|matches-exactly)
74
     * @param value
75
     *            the text value to match
76
     */
77
    public QueryTerm(boolean casesensitive, String searchmode, String value)
78
    {
79
        this.casesensitive = casesensitive;
80
        this.searchmode = searchmode;
81
        this.value = value;
82
    }
83

    
84
    /**
85
     * Construct a new instance of a query term for a structured search
86
     * (matching the value only for those nodes in the pathexpr)
87
     *
88
     * @param casesensitive
89
     *            flag indicating whether case is used to match
90
     * @param searchmode
91
     *            determines what kind of substring match is performed (one of
92
     *            starts-with|ends-with|contains|matches-exactly)
93
     * @param value
94
     *            the text value to match
95
     * @param pathexpr
96
     *            the hierarchical path to the nodes to be searched
97
     */
98
    public QueryTerm(boolean casesensitive, String searchmode, String value,
99
            String pathexpr)
100
    {
101
        this(casesensitive, searchmode, value);
102
        this.pathexpr = pathexpr;
103
    }
104

    
105
    /** determine if the QueryTerm is case sensitive */
106
    public boolean isCaseSensitive()
107
    {
108
        return casesensitive;
109
    }
110

    
111
    /** get the searchmode parameter */
112
    public String getSearchMode()
113
    {
114
        return searchmode;
115
    }
116

    
117
    /** get the Value parameter */
118
    public String getValue()
119
    {
120
        return value;
121
    }
122

    
123
    /** get the path expression parameter */
124
    public String getPathExpression()
125
    {
126
        return pathexpr;
127
    }
128

    
129
    /** get the percentage count for one query term */
130
    public int getPercentageSymbolCount()
131
    {
132
        return countPercentageSearchItem;
133
    }
134
    
135
    /**
136
     * Set the query term in a union group
137
     * @param inUnionGroup
138
     */
139
    public void setInUnionGroup (boolean inUnionGroup)
140
    {
141
    	this.inUnionGroup = inUnionGroup;
142
    }
143
    
144
    /**
145
     * If this query group in Union group
146
     * @return
147
     */
148
    public boolean isInUnionGroup()
149
    {
150
    	return this.inUnionGroup;
151
    }
152

    
153
    
154
    public String printSearchExprSQL() {
155

    
156
        // Uppercase the search string if case match is not important
157
        String casevalue = null;
158
        String nodedataterm = null;
159
        boolean notEqual = false;
160
        if (casesensitive) {
161
            nodedataterm = "nodedata";
162
            casevalue = value;
163
        } else {
164
            nodedataterm = "UPPER(nodedata)";
165
            casevalue = value.toUpperCase();
166
        }
167

    
168
        // Add appropriate wildcards to search string
169
        String searchexpr = null;
170
        if (searchmode.equals("starts-with")) {
171
            searchexpr = nodedataterm + " LIKE '" + casevalue + "%' ";
172
        } else if (searchmode.equals("ends-with")) {
173
            searchexpr = nodedataterm + " LIKE '%" + casevalue + "' ";
174
        } else if (searchmode.equals("contains")) {
175
            if (!casevalue.equals("%")) {
176
                searchexpr = nodedataterm + " LIKE '%" + casevalue + "%' ";
177
            } else {
178
                searchexpr = nodedataterm + " LIKE '" + casevalue + "' ";
179
                // find percentage symbol
180
                percentageSymbol = true;
181
            }
182
        } else if (searchmode.equals("not-contains")) {
183
        	notEqual = true;
184
            searchexpr = nodedataterm + " LIKE '%" + casevalue + "%' ";
185
        } else if (searchmode.equals("equals")) {
186
            searchexpr = nodedataterm + " = '" + casevalue + "' ";
187
        } else if (searchmode.equals("isnot-equal")) {
188
        	notEqual = true;
189
            searchexpr = nodedataterm + " = '" + casevalue + "' ";
190
        } else {
191
            String oper = null;
192
            if (searchmode.equals("greater-than")) {
193
                oper = ">";
194
                nodedataterm = "nodedatanumerical";
195
            } else if (searchmode.equals("greater-than-equals")) {
196
                oper = ">=";
197
                nodedataterm = "nodedatanumerical";
198
            } else if (searchmode.equals("less-than")) {
199
                oper = "<";
200
                nodedataterm = "nodedatanumerical";
201
            } else if (searchmode.equals("less-than-equals")) {
202
                oper = "<=";
203
                nodedataterm = "nodedatanumerical";
204
            } else {
205
                System.out
206
                        .println("NOT expected case. NOT recognized operator: "
207
                                + searchmode);
208
                return null;
209
            }
210
            
211
            
212
            try {
213
                // it is number; numeric comparison
214
                searchexpr = nodedataterm + " " + oper + " " + new Double(casevalue) + " ";
215
            } catch (NumberFormatException nfe) {
216
            	// is it a date?
217
            	try {
218
                	// try ISO 8601 formats
219
                	Calendar dataDateValue = DatatypeConverter.parseDateTime(casevalue);
220
                	String lexicalString = DatatypeConverter.printDateTime(dataDateValue);
221
                    nodedataterm = "nodedatadate";
222
            		searchexpr = 
223
            			nodedataterm + " " + oper + " '" + lexicalString + "' ";
224
            	} catch (Exception pe) {
225
            		// these are characters; character comparison
226
                    searchexpr = nodedataterm + " " + oper + " '" + casevalue + "' ";
227
            	}
228
            }
229
        }
230
        return searchexpr;
231
    }
232
    
233
    public boolean isNotEqualTerm() {
234

    
235
        boolean notEqual = false;
236

    
237
        if (searchmode.equals("not-contains")) {
238
        	notEqual = true;
239
        } else if (searchmode.equals("isnot-equal")) {
240
        	notEqual = true;
241
        }
242
        return notEqual;
243
    }
244
    
245
    /**
246
     * create a SQL serialization of the query that this instance represents
247
     */
248
    public String printSQL(boolean useXMLIndex)
249
    {
250
        StringBuffer self = new StringBuffer();
251
        
252
        // does it contain a not equals?
253
        boolean notEqual = isNotEqualTerm();
254

    
255
        // get the general search expression
256
        String searchexpr = printSearchExprSQL();
257
       
258
        // to check xml_path_index can be used
259
        boolean usePathIndex = false;
260

    
261
        // if pathexpr has been specified in metacat.properties for indexing
262
        if(pathexpr != null){
263
        	try {
264
				if (SystemUtil.getPathsForIndexing().contains(pathexpr)) {
265
					usePathIndex = true;
266
				}
267
			} catch (MetacatUtilException ue) {
268
				log.warn("Could not get index paths: " + ue.getMessage());
269
			}
270
        }
271
        
272
        if(usePathIndex){
273
            // using xml_path_index table.....
274
        	if(notEqual == true ){
275
        		if (!inUnionGroup)
276
        		{
277
        			self.append("SELECT DISTINCT docid from xml_path_index WHERE");
278
        			self.append(" docid NOT IN (Select docid FROM xml_path_index WHERE ");
279
        			self.append(searchexpr);
280
        			self.append("AND path LIKE '" + pathexpr + "') ");
281
        		}
282
        		else
283
        		{
284
        			//if this is in union group we need to use "OR" to modify query
285
        			self.append("("+searchexpr);
286
        			self.append("AND path LIKE '" + pathexpr + "') ");
287
        		}
288
        	} else {
289
        		if (!inUnionGroup)
290
        		{
291
        			self.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
292
        			self.append(searchexpr);
293
        			self.append("AND path LIKE '" + pathexpr + "' ");
294
        		}
295
        		else
296
        		{
297
        			//if this is in union group we need to use "OR" to modify query
298
        			self.append("("+searchexpr);
299
        			self.append("AND path LIKE '" + pathexpr + "') ");
300
        		}
301
        	}
302

    
303
        } else {
304
            // using xml_nodes and xml_index tables
305

    
306
        	if(notEqual == true){
307
        		self.append("SELECT DISTINCT docid from xml_nodes WHERE");
308
        		self.append(" docid NOT IN (Select docid FROM xml_nodes WHERE ");
309
        	} else {
310
        		self.append("(SELECT DISTINCT docid FROM xml_nodes WHERE ");
311
        	}
312
        	self.append(searchexpr);
313
        	
314
            if (pathexpr != null) {
315
             String path = pathexpr;
316
                // use XML Index
317
                if (useXMLIndex) {
318
                    if (!hasAttributeInPath(pathexpr)) {
319
                        // without attributes in path
320
                        self.append("AND parentnodeid IN ");
321
                        self.append(
322
                            "(SELECT nodeid FROM xml_index WHERE path LIKE "
323
                            + "'" + path + "') ");
324
                    } else {
325
                        // has a attribute in path
326
                        String attributeName = QuerySpecification
327
                            .getAttributeName(pathexpr);
328
                        self.append(
329
                            "AND nodetype LIKE 'ATTRIBUTE' AND nodename LIKE '"
330
                            + attributeName + "' ");
331
                        // and the path expression includes element content other than
332
                        // just './' or '../'
333
                        if ( (!pathexpr.startsWith(QuerySpecification.
334
                            ATTRIBUTESYMBOL)) &&
335
                            (!pathexpr.startsWith("./" +
336
                                                  QuerySpecification.ATTRIBUTESYMBOL)) &&
337
                            (!pathexpr.startsWith("../" +
338
                                                  QuerySpecification.ATTRIBUTESYMBOL))) {
339

    
340
                            self.append("AND parentnodeid IN ");
341
                            path = QuerySpecification
342
                                .newPathExpressionWithOutAttribute(pathexpr);
343
                            self.append(
344
                                "(SELECT nodeid FROM xml_index WHERE path LIKE "
345
                                + "'" + path + "') ");
346
                        }
347
                    }
348
                }
349
                else {
350
                    // without using XML Index; using nested statements instead
351
                    //self.append("AND parentnodeid IN ");
352
                	self.append("AND ");
353
                    self.append(useNestedStatements(pathexpr));
354
                }
355
            }
356
            else if ( (value.trim()).equals("%")) {
357
                //if pathexpr is null and search value is %, is a
358
                // percentageSearchItem
359
                // the count number will be increase one
360
                countPercentageSearchItem++;
361

    
362
            }
363
            self.append(") ");
364
        }
365

    
366
        return self.toString();
367
    }
368

    
369
    /** A method to judge if a path have attribute */
370
    private boolean hasAttributeInPath(String path)
371
    {
372
        if (path.indexOf(QuerySpecification.ATTRIBUTESYMBOL) != -1) {
373
            return true;
374
        } else {
375
            return false;
376
        }
377
    }
378

    
379
   
380
    public static String useNestedStatements(String pathexpr)
381
    {
382
        log.info("useNestedStatements()");
383
        log.info("pathexpr: " + pathexpr);
384
        String elementPrefix = " parentnodeid IN ";
385
        String attributePrefix  =  " nodeid IN ";
386
        boolean lastOneIsAttribute = false;
387
        StringBuffer nestedStmts = new StringBuffer();
388
        String path = pathexpr.trim();
389
        String sql = "";
390

    
391
        if (path.indexOf('/') == 0)
392
        {
393
            nestedStmts.append("AND parentnodeid = rootnodeid ");
394
            path = path.substring(1).trim();
395
        }
396

    
397
        do
398
        {
399
            int inx = path.indexOf('/');
400
            int predicateStart = -1;
401
            int predicateEnd;
402
            String node;
403
            Vector predicates = new Vector();
404

    
405
            // extract predicates
406
            predicateStart = path.indexOf(QuerySpecification.PREDICATE_START, predicateStart + 1);
407

    
408
            // any predicates in this node?
409
            if (inx != -1 && (predicateStart == -1 || predicateStart > inx))
410
            {
411
                // no
412
                node = path.substring(0, inx).trim();
413
                path = path.substring(inx + 1).trim();
414
            }
415
            else if (predicateStart == -1)
416
            {
417
                // no and it's the last node
418
                node = path;
419
                if (node != null && node.indexOf(QuerySpecification.ATTRIBUTESYMBOL) != -1)
420
                {
421
                	lastOneIsAttribute = true;
422
                	node = removeAttributeSymbol(node);
423
                }
424
                path = "";
425
            }
426
            else
427
            {
428
                // yes
429
                node = path.substring(0, predicateStart).trim();
430
                path = path.substring(predicateStart);
431
                predicateStart = 0;
432

    
433
                while (predicateStart == 0)
434
                {
435
                    predicateEnd = path.indexOf(QuerySpecification.PREDICATE_END,
436
                            predicateStart);
437

    
438
                    if (predicateEnd == -1)
439
                    {
440
                        log.warn("useNestedStatements(): ");
441
                        log.warn("    Invalid path: " + pathexpr);
442
                        return "";
443
                    }
444

    
445
                    predicates.add(path.substring(1, predicateEnd).trim());
446
                    path = path.substring(predicateEnd + 1).trim();
447
                    inx = path.indexOf('/');
448
                    predicateStart = path.indexOf(QuerySpecification.PREDICATE_START);
449
                }
450

    
451
                if (inx == 0)
452
                    path = path.substring(1).trim();
453
                else if (!path.equals(""))
454
                {
455
                    log.warn("useNestedStatements(): ");
456
                    log.warn("    Invalid path: " + pathexpr);
457
                    return "";
458
                }
459
            }
460

    
461
            nestedStmts.insert(0, "' ").insert(0, node).insert(0,
462
                    "(SELECT nodeid FROM xml_nodes WHERE nodename LIKE '");
463

    
464
            // for the last statement: it is without " AND parentnodeid IN "
465
            if (!path.equals(""))
466
                nestedStmts.insert(0, "AND parentnodeid IN ");
467

    
468
            if (predicates.size() > 0)
469
            {
470
                for (int n = 0; n < predicates.size(); n++)
471
                {
472
                    String predSQL = predicate2SQL((String) predicates.get(n));
473

    
474
                    if (predSQL.equals(""))
475
                        return "";
476

    
477
                    nestedStmts.append(predSQL).append(' ');
478
                }
479
            }
480

    
481
            nestedStmts.append(") ");
482
        }
483
        while (!path.equals(""));
484
        if (lastOneIsAttribute)
485
        {
486
        	sql = attributePrefix+nestedStmts.toString();
487
        }
488
        else
489
        {
490
        	sql = elementPrefix+nestedStmts.toString();
491
        }
492
        return sql;
493
    }
494
    
495
    
496
    /*
497
     * Removes @ symbol from path. For example, if path is @entity, entity will be returned.
498
     * If path is entity, entity will be returned. 
499
     */
500
    private static String removeAttributeSymbol(String path)
501
    {
502
    	String newPath  ="";
503
    	log.debug("Original string before removing @ is " + path);
504
    	if (path != null)
505
    	{
506
    		
507
    		int attribute = path.indexOf(QuerySpecification.ATTRIBUTESYMBOL);
508
    		if (attribute != -1)
509
    		{
510
    			// has attribute symbol. Reomve it and return the remained part. 
511
    			try
512
    			{
513
    		         newPath = path.substring(attribute + 1).trim();
514
    			}
515
    			catch (Exception e)
516
    			{
517
    				newPath = path;
518
    			}
519
    		}
520
    		else
521
    		{
522
    			// doesn't have attribute symbol. Return original string
523
    			newPath = path;
524
    		}
525
    	}
526
    	else
527
    	{
528
    		// if is null, return null;
529
    		newPath = path;
530
    	}
531
    	log.debug("String after removing @ is " + newPath);
532
    	return newPath;
533
    	
534
    }
535

    
536
    /**
537
     * 
538
     */
539
    public static String predicate2SQL(String predicate)
540
    {
541
        String path = predicate.trim();
542
        int equals = path.indexOf('=');
543
        String literal = null;
544

    
545
        if (equals != -1)
546
        {
547
            literal = path.substring(equals + 1).trim();
548
            path = path.substring(0, equals).trim();
549
            int sQuote = literal.indexOf('\'');
550
            int dQuote = literal.indexOf('"');
551

    
552
            if (sQuote == -1 && dQuote == -1)
553
            {
554
                log.warn("predicate2SQL(): ");
555
                log.warn("    Invalid or unsupported predicate: " + predicate);
556
                return "";
557
            }
558

    
559
            if (sQuote == -1 &&
560
                (dQuote != 0 ||
561
                 literal.indexOf('"', dQuote + 1) != literal.length() - 1))
562
            {
563
                log.warn("predicate2SQL(): ");
564
                log.warn("    Invalid or unsupported predicate: " + predicate);
565
                return "";
566
            }
567

    
568
            if (sQuote != 0 ||
569
                literal.indexOf('\'', sQuote + 1) != literal.length() - 1)
570
            {
571
                log.warn("predicate2SQL(): ");
572
                log.warn("    Invalid or unsupported predicate: " + predicate);
573
                return "";
574
            }
575
        }
576

    
577
        StringBuffer sql = new StringBuffer();
578
        int attribute = path.indexOf('@');
579

    
580
        if (attribute == -1)
581
        {
582
            if (literal != null)
583
            {
584
                sql.append("AND nodeid IN (SELECT parentnodeid FROM xml_nodes WHERE nodetype = 'TEXT' AND nodedata LIKE ")
585
                    .append(literal).append(")");
586
            }
587
        }
588
        else
589
        {
590
            sql.append(
591
                    "AND nodeid IN (SELECT parentnodeid FROM xml_nodes WHERE nodetype = 'ATTRIBUTE' AND nodename LIKE '")
592
                    .append(path.substring(attribute + 1).trim()).append("' ");
593

    
594
            if (literal != null)
595
            {
596
                sql.append("AND nodedata LIKE ").append(literal);
597
            }
598

    
599
            sql.append(")");
600
            path = path.substring(0, attribute).trim();
601

    
602
            if (path.endsWith("/"))
603
                path = path.substring(0, path.length() - 1).trim();
604
            else
605
            {
606
                if (!path.equals(""))
607
                {
608
                    log.warn("predicate2SQL(): ");
609
                    log.warn("    Invalid or unsupported predicate: " + predicate);
610
                    return "";
611
                }
612
            }
613
        }
614

    
615
        while (!path.equals(""))
616
        {
617
            int ndx = path.lastIndexOf('/');
618
            int predicateEnd = -1;
619
            int predicateStart;
620
            String node;
621

    
622
            if (ndx != -1)
623
            {
624
                node = path.substring(ndx + 1).trim();
625
                path = path.substring(0, ndx).trim();
626
            }
627
            else
628
            {
629
                node = path;
630
                path = "";
631
            }
632

    
633
            if (!node.equals(""))
634
                sql.insert(0, "' ").insert(0, node)
635
                    .insert(0, "(SELECT parentnodeid FROM xml_nodes WHERE nodename LIKE '").append(") ");
636
            else if (!path.equals(""))
637
            {
638
                log.warn("predicate2SQL(): ");
639
                log.warn("    Invalid or unsupported predicate: " + predicate);
640
                return "";
641
            }
642

    
643
            if (path.equals(""))
644
            {
645
                sql.insert(0,
646
                        node.equals("") ? "AND rootnodeid IN " : "AND nodeid IN ");
647
            }
648
            else
649
            {
650
                sql.append("AND nodeid IN ");
651
            }
652
        }
653

    
654
        return sql.toString();
655
    }
656

    
657
    /**
658
     * create a String description of the query that this instance represents.
659
     * This should become a way to get the XML serialization of the query.
660
     */
661
    public String toString()
662
    {
663

    
664
        return this.printSQL(true);
665
    }
666
    
667
    /**
668
     * Compare two query terms to see if they have same search value.
669
     * @param term
670
     * @return
671
     */
672
    public boolean hasSameSearchValue(QueryTerm term)
673
    {
674
    	boolean same = false;
675
    	if (term != null)
676
    	{
677
    		String searchValue = term.getValue();
678
    		if (searchValue != null && this.value != null)
679
    		{
680
    			if (searchValue.equalsIgnoreCase(this.value))
681
    			{
682
    				if (this.getSearchMode().equals(term.getSearchMode())) {
683
        				same = true;
684
    				}
685
    			}
686
    		}
687
    	}
688
    	return same;
689
    }
690
}
(56-56/65)