Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author: leinfelder $'
12
 *     '$Date: 2011-03-16 22:56:31 -0700 (Wed, 16 Mar 2011) $'
13
 * '$Revision: 6012 $'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29

    
30
package edu.ucsb.nceas.metacat;
31

    
32
import java.text.ParseException;
33
import java.text.SimpleDateFormat;
34
import java.util.Date;
35
import java.util.Vector;
36
import org.apache.log4j.Logger;
37

    
38
import edu.ucsb.nceas.metacat.shared.MetacatUtilException;
39
import edu.ucsb.nceas.metacat.util.MetacatUtil;
40
import edu.ucsb.nceas.metacat.util.SystemUtil;
41

    
42
/** a utility class that represents a single term in a query */
43
public class QueryTerm
44
{
45
    private static Logger log = Logger.getLogger(QueryTerm.class);
46

    
47
    private boolean casesensitive = false;
48

    
49
    private String searchmode = null;
50

    
51
    private String value = null;
52

    
53
    private String pathexpr = null;
54

    
55
    private boolean percentageSymbol = false;
56

    
57
    private int countPercentageSearchItem = 0;
58
    
59
    private boolean inUnionGroup = false;
60
    
61
    public static final String CONTAINS = "contains";
62
    
63
    public static final String EQUALS = "equals";
64

    
65
    /**
66
     * Construct a new instance of a query term for a free text search (using
67
     * the value only)
68
     *
69
     * @param casesensitive
70
     *            flag indicating whether case is used to match
71
     * @param searchmode
72
     *            determines what kind of substring match is performed (one of
73
     *            starts-with|ends-with|contains|matches-exactly)
74
     * @param value
75
     *            the text value to match
76
     */
77
    public QueryTerm(boolean casesensitive, String searchmode, String value)
78
    {
79
        this.casesensitive = casesensitive;
80
        this.searchmode = searchmode;
81
        this.value = value;
82
    }
83

    
84
    /**
85
     * Construct a new instance of a query term for a structured search
86
     * (matching the value only for those nodes in the pathexpr)
87
     *
88
     * @param casesensitive
89
     *            flag indicating whether case is used to match
90
     * @param searchmode
91
     *            determines what kind of substring match is performed (one of
92
     *            starts-with|ends-with|contains|matches-exactly)
93
     * @param value
94
     *            the text value to match
95
     * @param pathexpr
96
     *            the hierarchical path to the nodes to be searched
97
     */
98
    public QueryTerm(boolean casesensitive, String searchmode, String value,
99
            String pathexpr)
100
    {
101
        this(casesensitive, searchmode, value);
102
        this.pathexpr = pathexpr;
103
    }
104

    
105
    /** determine if the QueryTerm is case sensitive */
106
    public boolean isCaseSensitive()
107
    {
108
        return casesensitive;
109
    }
110

    
111
    /** get the searchmode parameter */
112
    public String getSearchMode()
113
    {
114
        return searchmode;
115
    }
116

    
117
    /** get the Value parameter */
118
    public String getValue()
119
    {
120
        return value;
121
    }
122

    
123
    /** get the path expression parameter */
124
    public String getPathExpression()
125
    {
126
        return pathexpr;
127
    }
128

    
129
    /** get the percentage count for one query term */
130
    public int getPercentageSymbolCount()
131
    {
132
        return countPercentageSearchItem;
133
    }
134
    
135
    /**
136
     * Set the query term in a union group
137
     * @param inUnionGroup
138
     */
139
    public void setInUnionGroup (boolean inUnionGroup)
140
    {
141
    	this.inUnionGroup = inUnionGroup;
142
    }
143
    
144
    /**
145
     * If this query group in Union group
146
     * @return
147
     */
148
    public boolean isInUnionGroup()
149
    {
150
    	return this.inUnionGroup;
151
    }
152

    
153
    /**
154
     * create a SQL serialization of the query that this instance represents
155
     */
156
    public String printSQL(boolean useXMLIndex)
157
    {
158
        StringBuffer self = new StringBuffer();
159

    
160
        // Uppercase the search string if case match is not important
161
        String casevalue = null;
162
        String nodedataterm = null;
163
        boolean notEqual = false;
164
        if (casesensitive) {
165
            nodedataterm = "nodedata";
166
            casevalue = value;
167
        } else {
168
            nodedataterm = "UPPER(nodedata)";
169
            casevalue = value.toUpperCase();
170
        }
171

    
172
        // Add appropriate wildcards to search string
173
        String searchexpr = null;
174
        if (searchmode.equals("starts-with")) {
175
            searchexpr = nodedataterm + " LIKE '" + casevalue + "%' ";
176
        } else if (searchmode.equals("ends-with")) {
177
            searchexpr = nodedataterm + " LIKE '%" + casevalue + "' ";
178
        } else if (searchmode.equals("contains")) {
179
            if (!casevalue.equals("%")) {
180
                searchexpr = nodedataterm + " LIKE '%" + casevalue + "%' ";
181
            } else {
182
                searchexpr = nodedataterm + " LIKE '" + casevalue + "' ";
183
                // find percentage symbol
184
                percentageSymbol = true;
185
            }
186
        } else if (searchmode.equals("not-contains")) {
187
        	notEqual = true;
188
            searchexpr = nodedataterm + " LIKE '%" + casevalue + "%' ";
189
        } else if (searchmode.equals("equals")) {
190
            searchexpr = nodedataterm + " = '" + casevalue + "' ";
191
        } else if (searchmode.equals("isnot-equal")) {
192
        	notEqual = true;
193
            searchexpr = nodedataterm + " = '" + casevalue + "' ";
194
        } else {
195
            String oper = null;
196
            if (searchmode.equals("greater-than")) {
197
                oper = ">";
198
                nodedataterm = "nodedatanumerical";
199
            } else if (searchmode.equals("greater-than-equals")) {
200
                oper = ">=";
201
                nodedataterm = "nodedatanumerical";
202
            } else if (searchmode.equals("less-than")) {
203
                oper = "<";
204
                nodedataterm = "nodedatanumerical";
205
            } else if (searchmode.equals("less-than-equals")) {
206
                oper = "<=";
207
                nodedataterm = "nodedatanumerical";
208
            } else {
209
                System.out
210
                        .println("NOT expected case. NOT recognized operator: "
211
                                + searchmode);
212
                return null;
213
            }
214
            
215
            try {
216
            	// try some common ISO 8601 formats
217
        		SimpleDateFormat sdf = null;
218
        		if (casevalue.length() == 10) {
219
        			sdf = new SimpleDateFormat("yyyy-MM-dd");
220
        		} else if (casevalue.length() == 19) {
221
        			sdf = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss");
222
        		}
223
        		Date dataDateValue = sdf.parse(casevalue);
224
                nodedataterm = "nodedatadate";
225
        		searchexpr = 
226
        			nodedataterm + " " + oper + " '"
227
        			+ casevalue + "' ";
228
        	} catch (ParseException pe) {
229
	            try {
230
	                // it is number; numeric comparison
231
	                // but we need to make sure there is no string in node data
232
	                searchexpr = nodedataterm + " " + oper + " "
233
	                        + new Double(casevalue) + " ";
234
	            } catch (NumberFormatException nfe) {
235
	                // these are characters; character comparison
236
	                searchexpr = nodedataterm + " " + oper + " '" + casevalue
237
	                        + "' ";
238
	            }
239
        	}
240
            
241
        }
242

    
243

    
244
        // to check xml_path_index can be used
245
        boolean usePathIndex = false;
246

    
247
        // if pathexpr has been specified in metacat.properties for indexing
248
        if(pathexpr != null){
249
        	try {
250
				if (SystemUtil.getPathsForIndexing().contains(pathexpr)) {
251
					usePathIndex = true;
252
				}
253
			} catch (MetacatUtilException ue) {
254
				log.warn("Could not get index paths: " + ue.getMessage());
255
			}
256
        }
257
        
258
        if(usePathIndex){
259
            // using xml_path_index table.....
260
        	if(notEqual == true ){
261
        		if (!inUnionGroup)
262
        		{
263
        			self.append("SELECT DISTINCT docid from xml_path_index WHERE");
264
        			self.append(" docid NOT IN (Select docid FROM xml_path_index WHERE ");
265
        			self.append(searchexpr);
266
        			self.append("AND path LIKE '" + pathexpr + "') ");
267
        		}
268
        		else
269
        		{
270
        			//if this is in union group we need to use "OR" to modify query
271
        			self.append("("+searchexpr);
272
        			self.append("AND path LIKE '" + pathexpr + "') ");
273
        		}
274
        	} else {
275
        		if (!inUnionGroup)
276
        		{
277
        			self.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
278
        			self.append(searchexpr);
279
        			self.append("AND path LIKE '" + pathexpr + "' ");
280
        		}
281
        		else
282
        		{
283
        			//if this is in union group we need to use "OR" to modify query
284
        			self.append("("+searchexpr);
285
        			self.append("AND path LIKE '" + pathexpr + "') ");
286
        		}
287
        	}
288

    
289
        } else {
290
            // using xml_nodes and xml_index tables
291

    
292
        	if(notEqual == true){
293
        		self.append("SELECT DISTINCT docid from xml_nodes WHERE");
294
        		self.append(" docid NOT IN (Select docid FROM xml_nodes WHERE ");
295
        	} else {
296
        		self.append("(SELECT DISTINCT docid FROM xml_nodes WHERE ");
297
        	}
298
        	self.append(searchexpr);
299
        	
300
            if (pathexpr != null) {
301
             String path = pathexpr;
302
                // use XML Index
303
                if (useXMLIndex) {
304
                    if (!hasAttributeInPath(pathexpr)) {
305
                        // without attributes in path
306
                        self.append("AND parentnodeid IN ");
307
                        self.append(
308
                            "(SELECT nodeid FROM xml_index WHERE path LIKE "
309
                            + "'" + path + "') ");
310
                    } else {
311
                        // has a attribute in path
312
                        String attributeName = QuerySpecification
313
                            .getAttributeName(pathexpr);
314
                        self.append(
315
                            "AND nodetype LIKE 'ATTRIBUTE' AND nodename LIKE '"
316
                            + attributeName + "' ");
317
                        // and the path expression includes element content other than
318
                        // just './' or '../'
319
                        if ( (!pathexpr.startsWith(QuerySpecification.
320
                            ATTRIBUTESYMBOL)) &&
321
                            (!pathexpr.startsWith("./" +
322
                                                  QuerySpecification.ATTRIBUTESYMBOL)) &&
323
                            (!pathexpr.startsWith("../" +
324
                                                  QuerySpecification.ATTRIBUTESYMBOL))) {
325

    
326
                            self.append("AND parentnodeid IN ");
327
                            path = QuerySpecification
328
                                .newPathExpressionWithOutAttribute(pathexpr);
329
                            self.append(
330
                                "(SELECT nodeid FROM xml_index WHERE path LIKE "
331
                                + "'" + path + "') ");
332
                        }
333
                    }
334
                }
335
                else {
336
                    // without using XML Index; using nested statements instead
337
                    //self.append("AND parentnodeid IN ");
338
                	self.append("AND ");
339
                    self.append(useNestedStatements(pathexpr));
340
                }
341
            }
342
            else if ( (value.trim()).equals("%")) {
343
                //if pathexpr is null and search value is %, is a
344
                // percentageSearchItem
345
                // the count number will be increase one
346
                countPercentageSearchItem++;
347

    
348
            }
349
            self.append(") ");
350
        }
351

    
352
        return self.toString();
353
    }
354

    
355
    /** A method to judge if a path have attribute */
356
    private boolean hasAttributeInPath(String path)
357
    {
358
        if (path.indexOf(QuerySpecification.ATTRIBUTESYMBOL) != -1) {
359
            return true;
360
        } else {
361
            return false;
362
        }
363
    }
364

    
365
   
366
    public static String useNestedStatements(String pathexpr)
367
    {
368
        log.info("useNestedStatements()");
369
        log.info("pathexpr: " + pathexpr);
370
        String elementPrefix = " parentnodeid IN ";
371
        String attributePrefix  =  " nodeid IN ";
372
        boolean lastOneIsAttribute = false;
373
        StringBuffer nestedStmts = new StringBuffer();
374
        String path = pathexpr.trim();
375
        String sql = "";
376

    
377
        if (path.indexOf('/') == 0)
378
        {
379
            nestedStmts.append("AND parentnodeid = rootnodeid ");
380
            path = path.substring(1).trim();
381
        }
382

    
383
        do
384
        {
385
            int inx = path.indexOf('/');
386
            int predicateStart = -1;
387
            int predicateEnd;
388
            String node;
389
            Vector predicates = new Vector();
390

    
391
            // extract predicates
392
            predicateStart = path.indexOf(QuerySpecification.PREDICATE_START, predicateStart + 1);
393

    
394
            // any predicates in this node?
395
            if (inx != -1 && (predicateStart == -1 || predicateStart > inx))
396
            {
397
                // no
398
                node = path.substring(0, inx).trim();
399
                path = path.substring(inx + 1).trim();
400
            }
401
            else if (predicateStart == -1)
402
            {
403
                // no and it's the last node
404
                node = path;
405
                if (node != null && node.indexOf(QuerySpecification.ATTRIBUTESYMBOL) != -1)
406
                {
407
                	lastOneIsAttribute = true;
408
                	node = removeAttributeSymbol(node);
409
                }
410
                path = "";
411
            }
412
            else
413
            {
414
                // yes
415
                node = path.substring(0, predicateStart).trim();
416
                path = path.substring(predicateStart);
417
                predicateStart = 0;
418

    
419
                while (predicateStart == 0)
420
                {
421
                    predicateEnd = path.indexOf(QuerySpecification.PREDICATE_END,
422
                            predicateStart);
423

    
424
                    if (predicateEnd == -1)
425
                    {
426
                        log.warn("useNestedStatements(): ");
427
                        log.warn("    Invalid path: " + pathexpr);
428
                        return "";
429
                    }
430

    
431
                    predicates.add(path.substring(1, predicateEnd).trim());
432
                    path = path.substring(predicateEnd + 1).trim();
433
                    inx = path.indexOf('/');
434
                    predicateStart = path.indexOf(QuerySpecification.PREDICATE_START);
435
                }
436

    
437
                if (inx == 0)
438
                    path = path.substring(1).trim();
439
                else if (!path.equals(""))
440
                {
441
                    log.warn("useNestedStatements(): ");
442
                    log.warn("    Invalid path: " + pathexpr);
443
                    return "";
444
                }
445
            }
446

    
447
            nestedStmts.insert(0, "' ").insert(0, node).insert(0,
448
                    "(SELECT nodeid FROM xml_nodes WHERE nodename LIKE '");
449

    
450
            // for the last statement: it is without " AND parentnodeid IN "
451
            if (!path.equals(""))
452
                nestedStmts.insert(0, "AND parentnodeid IN ");
453

    
454
            if (predicates.size() > 0)
455
            {
456
                for (int n = 0; n < predicates.size(); n++)
457
                {
458
                    String predSQL = predicate2SQL((String) predicates.get(n));
459

    
460
                    if (predSQL.equals(""))
461
                        return "";
462

    
463
                    nestedStmts.append(predSQL).append(' ');
464
                }
465
            }
466

    
467
            nestedStmts.append(") ");
468
        }
469
        while (!path.equals(""));
470
        if (lastOneIsAttribute)
471
        {
472
        	sql = attributePrefix+nestedStmts.toString();
473
        }
474
        else
475
        {
476
        	sql = elementPrefix+nestedStmts.toString();
477
        }
478
        return sql;
479
    }
480
    
481
    
482
    /*
483
     * Removes @ symbol from path. For example, if path is @entity, entity will be returned.
484
     * If path is entity, entity will be returned. 
485
     */
486
    private static String removeAttributeSymbol(String path)
487
    {
488
    	String newPath  ="";
489
    	log.debug("Original string before removing @ is " + path);
490
    	if (path != null)
491
    	{
492
    		
493
    		int attribute = path.indexOf(QuerySpecification.ATTRIBUTESYMBOL);
494
    		if (attribute != -1)
495
    		{
496
    			// has attribute symbol. Reomve it and return the remained part. 
497
    			try
498
    			{
499
    		         newPath = path.substring(attribute + 1).trim();
500
    			}
501
    			catch (Exception e)
502
    			{
503
    				newPath = path;
504
    			}
505
    		}
506
    		else
507
    		{
508
    			// doesn't have attribute symbol. Return original string
509
    			newPath = path;
510
    		}
511
    	}
512
    	else
513
    	{
514
    		// if is null, return null;
515
    		newPath = path;
516
    	}
517
    	log.debug("String after removing @ is " + newPath);
518
    	return newPath;
519
    	
520
    }
521

    
522
    /**
523
     * 
524
     */
525
    public static String predicate2SQL(String predicate)
526
    {
527
        String path = predicate.trim();
528
        int equals = path.indexOf('=');
529
        String literal = null;
530

    
531
        if (equals != -1)
532
        {
533
            literal = path.substring(equals + 1).trim();
534
            path = path.substring(0, equals).trim();
535
            int sQuote = literal.indexOf('\'');
536
            int dQuote = literal.indexOf('"');
537

    
538
            if (sQuote == -1 && dQuote == -1)
539
            {
540
                log.warn("predicate2SQL(): ");
541
                log.warn("    Invalid or unsupported predicate: " + predicate);
542
                return "";
543
            }
544

    
545
            if (sQuote == -1 &&
546
                (dQuote != 0 ||
547
                 literal.indexOf('"', dQuote + 1) != literal.length() - 1))
548
            {
549
                log.warn("predicate2SQL(): ");
550
                log.warn("    Invalid or unsupported predicate: " + predicate);
551
                return "";
552
            }
553

    
554
            if (sQuote != 0 ||
555
                literal.indexOf('\'', sQuote + 1) != literal.length() - 1)
556
            {
557
                log.warn("predicate2SQL(): ");
558
                log.warn("    Invalid or unsupported predicate: " + predicate);
559
                return "";
560
            }
561
        }
562

    
563
        StringBuffer sql = new StringBuffer();
564
        int attribute = path.indexOf('@');
565

    
566
        if (attribute == -1)
567
        {
568
            if (literal != null)
569
            {
570
                sql.append("AND nodeid IN (SELECT parentnodeid FROM xml_nodes WHERE nodetype = 'TEXT' AND nodedata LIKE ")
571
                    .append(literal).append(")");
572
            }
573
        }
574
        else
575
        {
576
            sql.append(
577
                    "AND nodeid IN (SELECT parentnodeid FROM xml_nodes WHERE nodetype = 'ATTRIBUTE' AND nodename LIKE '")
578
                    .append(path.substring(attribute + 1).trim()).append("' ");
579

    
580
            if (literal != null)
581
            {
582
                sql.append("AND nodedata LIKE ").append(literal);
583
            }
584

    
585
            sql.append(")");
586
            path = path.substring(0, attribute).trim();
587

    
588
            if (path.endsWith("/"))
589
                path = path.substring(0, path.length() - 1).trim();
590
            else
591
            {
592
                if (!path.equals(""))
593
                {
594
                    log.warn("predicate2SQL(): ");
595
                    log.warn("    Invalid or unsupported predicate: " + predicate);
596
                    return "";
597
                }
598
            }
599
        }
600

    
601
        while (!path.equals(""))
602
        {
603
            int ndx = path.lastIndexOf('/');
604
            int predicateEnd = -1;
605
            int predicateStart;
606
            String node;
607

    
608
            if (ndx != -1)
609
            {
610
                node = path.substring(ndx + 1).trim();
611
                path = path.substring(0, ndx).trim();
612
            }
613
            else
614
            {
615
                node = path;
616
                path = "";
617
            }
618

    
619
            if (!node.equals(""))
620
                sql.insert(0, "' ").insert(0, node)
621
                    .insert(0, "(SELECT parentnodeid FROM xml_nodes WHERE nodename LIKE '").append(") ");
622
            else if (!path.equals(""))
623
            {
624
                log.warn("predicate2SQL(): ");
625
                log.warn("    Invalid or unsupported predicate: " + predicate);
626
                return "";
627
            }
628

    
629
            if (path.equals(""))
630
            {
631
                sql.insert(0,
632
                        node.equals("") ? "AND rootnodeid IN " : "AND nodeid IN ");
633
            }
634
            else
635
            {
636
                sql.append("AND nodeid IN ");
637
            }
638
        }
639

    
640
        return sql.toString();
641
    }
642

    
643
    /**
644
     * create a String description of the query that this instance represents.
645
     * This should become a way to get the XML serialization of the query.
646
     */
647
    public String toString()
648
    {
649

    
650
        return this.printSQL(true);
651
    }
652
    
653
    /**
654
     * Compare two query terms to see if they have same search value.
655
     * @param term
656
     * @return
657
     */
658
    public boolean hasSameSearchValue(QueryTerm term)
659
    {
660
    	boolean same = false;
661
    	if (term != null)
662
    	{
663
    		String searchValue = term.getValue();
664
    		if (searchValue != null && this.value != null)
665
    		{
666
    			if (searchValue.equalsIgnoreCase(this.value))
667
    			{
668
    				same = true;
669
    			}
670
    		}
671
    	}
672
    	return same;
673
    }
674
}
(56-56/65)