Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author: leinfelder $'
12
 *     '$Date: 2011-11-04 12:32:37 -0700 (Fri, 04 Nov 2011) $'
13
 * '$Revision: 6602 $'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29

    
30
package edu.ucsb.nceas.metacat;
31

    
32
import edu.ucsb.nceas.dbadapter.*;
33
import edu.ucsb.nceas.metacat.shared.MetacatUtilException;
34
import edu.ucsb.nceas.metacat.util.MetacatUtil;
35
import edu.ucsb.nceas.metacat.util.SystemUtil;
36

    
37
import java.io.*;
38
import java.util.ArrayList;
39
import java.util.Hashtable;
40
import java.util.List;
41
import java.util.Stack;
42
import java.util.Vector;
43
import java.util.Enumeration;
44

    
45
import org.apache.log4j.Logger;
46

    
47
 /** a utility class that represents a group of terms in a query */
48
public  class QueryGroup {
49
    private String operator = null;  // indicates how query terms are combined
50
    private Vector children = null;  // the list of query terms and groups
51
    private int countPercentageSearchItem = 0;
52
    private Vector queryTermsWithSameValue = null;//this two dimension vectors.
53
                                                  //will hold query terms which has same search value.
54
    private Vector queryTermsInPathIndex = null; //this vector holds query terms without same value
55
                                                                                 // and search path is in path index.
56
    private Vector<QueryTerm> queryTerms = null;//this vector only holds query terms without same search value.
57
                                                             // and search path is NOT in path index.
58
    private Vector queryGroupsChildren = null;
59
    private static Logger logMetacat = Logger.getLogger(QueryGroup.class);
60
    public static String UNION = "UNION";
61
    public static String INTERSECT = "INTERSECT";
62

    
63

    
64
    /**
65
     * construct a new QueryGroup
66
     *
67
     * @param operator the boolean conector used to connect query terms
68
     *                    in this query group
69
     */
70
    public QueryGroup(String operator) {
71
      this.operator = operator;
72
      children = new Vector();
73
      queryTermsWithSameValue = new Vector();
74
      queryTermsInPathIndex = new Vector(); 
75
      queryTerms = new Vector<QueryTerm>();
76
      queryGroupsChildren = new Vector();
77
    }
78

    
79
    /**
80
     * Add a child QueryGroup to this QueryGroup
81
     *
82
     * @param qgroup the query group to be added to the list of terms
83
     */
84
    public void addChild(QueryGroup qgroup) {
85
      children.add((Object)qgroup);
86
      queryGroupsChildren.add(qgroup);
87
    }
88

    
89
    /**
90
     * Add a child QueryTerm to this QueryGroup
91
     *
92
     * @param qterm the query term to be added to the list of terms
93
     */
94
    public void addChild(QueryTerm qterm) {
95
      children.add((Object)qterm);
96
      handleNewQueryTerms(qterm);
97
      
98
    }
99

    
100
    /*
101
     * Retrieve an Enumeration of query terms for this QueryGroup
102
     */
103
    private Enumeration getChildren() {
104
      return children.elements();
105
    }
106

    
107
    public int getPercentageSymbolCount()
108
    {
109
      return countPercentageSearchItem;
110
    }
111

    
112
    /**
113
     * create a SQL serialization of the query that this instance represents
114
     */
115
    public String printSQL(boolean useXMLIndex, List<Object> parameterValues) {
116
    	
117
      StringBuffer self = new StringBuffer();
118
      StringBuffer queryString = new StringBuffer();
119

    
120
      boolean first = true;
121
      
122
      if (!queryTermsWithSameValue.isEmpty() || !queryTermsInPathIndex.isEmpty())
123
      {
124
    	  // keep track of the values we add as prepared statement question marks (?)
125
    	  List<Object> groupValues = new ArrayList<Object>();
126
    	  String pathIndexQueryString = printSQLStringInPathIndex(groupValues);
127
    	  parameterValues.addAll(groupValues);
128
    	  queryString.append(pathIndexQueryString);
129
    	  if (queryString != null)
130
    	  {
131
    		  first = false;
132
    	  }
133
      }
134
      
135
      for (int i=0; i<queryGroupsChildren.size(); i++)
136
      {
137
      
138
    	  // keep track of the values we add as prepared statement question marks (?)
139
    	  List<Object> childrenValues = new ArrayList<Object>();
140
    	  // get the group
141
    	  QueryGroup qg = (QueryGroup) queryGroupsChildren.elementAt(i);
142
    	  String queryGroupSQL = qg.printSQL(useXMLIndex, childrenValues);
143
    	  logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: " + queryGroupSQL);       	
144
        	if (first) {
145
        		first = false;
146
        	} else {
147
        		if(!queryString.toString().equals("") && queryGroupSQL != null &&!queryGroupSQL.equals("")){
148
                    queryString.append(" " + operator + " ");
149
        		}
150
        	}
151
        	// add the sql
152
   		  	queryString.append(queryGroupSQL);
153
   		  	// add the parameter values
154
   		  	parameterValues.addAll(childrenValues);
155
   		  	
156
   		  	// count percentage number
157
   		  	int count = qg.getPercentageSymbolCount();
158
   		  	countPercentageSearchItem = countPercentageSearchItem + count;
159
      }
160
      
161
      for (int i=0; i<queryTerms.size(); i++)
162
      {
163
    	  // keep track of the values we add as prepared statement question marks (?)
164
    	  List<Object> termValues = new ArrayList<Object>();
165
    	  // get the term
166
    	  QueryTerm qt = (QueryTerm)queryTerms.elementAt(i);
167
    	  String termQueryString = qt.printSQL(useXMLIndex, termValues);
168
    	  logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: " + termQueryString);
169
           if (!(qt.getSearchMode().equals("contains") && qt.getValue().equals("%"))){
170
        	   if (first) {
171
                   first = false;
172
               } else {
173
                   if(!queryString.toString().equals("")){
174
                       queryString.append(" " + operator + " ");
175
                   }
176
               }
177
        	   // include the sql
178
               queryString.append(termQueryString);
179
               // include the parameter values
180
               parameterValues.addAll(termValues);
181
               
182
           // count percerntage number
183
           int count = qt.getPercentageSymbolCount();
184
           countPercentageSearchItem = countPercentageSearchItem + count;
185
        } 
186
      }
187

    
188
      if(!queryString.toString().equals("")){
189
          self.append("(");
190
          self.append(queryString.toString());
191
          self.append(")");
192
      }
193
      
194
      logMetacat.info("In QueryGroup.printSQL.. final query returned is: " 
195
			+ self.toString());
196
      return self.toString();
197
    }
198
    
199
    
200
    
201
    
202
    /*
203
     * If every query term in a queryGroup share a search value and search path
204
     * is in xml_path_index, we should use a new query to replace the original query term query in order to
205
     * improve performance. Also if even the term doesn't share any value with other term
206
     * we still use "OR" to replace UNION action (we only handle union operator in the query group).
207
     * 
208
     */
209
    private String printSQLStringInPathIndex(List<Object> parameterValues)
210
    {
211
    	String sql ="";
212
    	String value ="";
213
    	StringBuffer sqlBuff = new StringBuffer();
214
    	int index =0;
215
    	if (queryTermsWithSameValue != null && queryTermsInPathIndex != null)
216
    	{
217
    		
218
    		sqlBuff.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
219
    		if (!queryTermsWithSameValue.isEmpty())
220
    		{
221
    			boolean firstVector = true;
222
	    		for (int j=0; j<queryTermsWithSameValue.size(); j++)
223
	    		{
224
	    	   		Vector queryTermVector = (Vector)queryTermsWithSameValue.elementAt(j);
225
		    		QueryTerm term1 = (QueryTerm)queryTermVector.elementAt(0);
226
		        	value = term1.getValue();
227
		        	boolean first = true;
228
		        	if (firstVector)
229
		        	{
230
					  firstVector = false;
231
		        	}
232
		        	else
233
		        	{
234
		        		sqlBuff.append(" "+"OR"+" ");
235
		        	}
236
		        	
237
					sqlBuff.append(" (");
238
		        	
239
					// keep track of parameter values
240
			        List<Object> searchValues = new ArrayList<Object>();
241
			        
242
		        	// get the general search criteria (no path info)
243
		        	String searchTermSQL = term1.printSearchExprSQL(searchValues);
244
		        	
245
		        	// add the SQL
246
					sqlBuff.append(searchTermSQL);
247
					
248
					// add parameter values
249
					parameterValues.addAll(searchValues);
250
					
251
					sqlBuff.append("AND path IN ( ");
252

    
253
		    		//gets every path in query term object
254
		    		for (int i=0; i<queryTermVector.size(); i++)
255
		    		{
256
		    			QueryTerm term = (QueryTerm)queryTermVector.elementAt(i);
257
		    			value = term.getValue();
258
		    			String path = term.getPathExpression();
259
		    			if (path != null && !path.equals(""))
260
		    			{
261
		    				if (first)
262
		    				{
263
		    					first = false;
264
		    					sqlBuff.append("?");
265
		    					parameterValues.add(path);
266
		    				}
267
		    				else
268
		    				{
269
		    					sqlBuff.append(", ?");
270
		    					parameterValues.add(path);
271
		    				}
272
		    				index++;
273
		     				if (value != null && (value.equals("%") || value.equals("%%%")))
274
		                    {
275
		    				  countPercentageSearchItem++;
276
		                    }
277
	    			     }
278
	    		    }
279
	    		    sqlBuff.append(" ))");
280
	    	
281
	    	    }
282
	    	}
283
    		if (!queryTermsInPathIndex.isEmpty())
284
    		{
285
    			for (int j=0; j<queryTermsInPathIndex.size(); j++)
286
    			{
287
    				QueryTerm term = (QueryTerm)queryTermsInPathIndex.elementAt(j);
288
    				if (term != null)
289
    				{
290
	    				term.setInUnionGroup(true);
291
		    			 if (index > 0)
292
		    			 {
293
		    				 sqlBuff.append(" "+"OR"+" ");
294
		    			 }
295
		    			 sqlBuff.append("(");
296
		    			 // keep track of the parameter values for this sql
297
		    			 List<Object> termParameterValues = new ArrayList<Object>();
298
		    			 String termSQL = term.printSQL(true, termParameterValues);
299
	    				 sqlBuff.append(termSQL);
300
	    				 sqlBuff.append(")");
301
	    				 // add the param values
302
	    				 parameterValues.addAll(termParameterValues);
303
	    				 index++;
304
	    			}
305
    			}
306
    		}
307
    	}
308
    	if (index >0)
309
    	{
310
    		sql = sqlBuff.toString();
311
    	}
312
    	return sql;
313
    }
314

    
315
    /**
316
     * create a String description of the query that this instance represents.
317
     * This should become a way to get the XML serialization of the query.
318
     */
319
    public String toString() {
320
      StringBuffer self = new StringBuffer();
321

    
322
      self.append("  (Query group operator=" + operator + "\n");
323
      Enumeration en= getChildren();
324
      while (en.hasMoreElements()) {
325
        Object qobject = en.nextElement();
326
        self.append(qobject);
327
      }
328
      self.append("  )\n");
329
      return self.toString();
330
    }
331
    
332
    /*
333
     * When a new QueryTerm come, first we need to compare it to
334
     * the queryTerm vector, which contains queryTerm that doesn't
335
     * have same search value to any other queryTerm. Here is algorithm.
336
     * 1) If new QueryTerm find a QueryTerm in queryTerms which has same search value,
337
     *    them create a new vector which contain both QueryTerms and add the new vector
338
     *    to two-dimention vector queryTermsWithSameValue, and remove the QueryTerm which
339
     *    was in queryTerm.
340
     * 2) If new QueryTerm couldn't find a QueryTerm in queryTerms which has same search value,
341
     *    then search queryTermsWithSameValue, to see if this vector already has the search value.
342
     *    2.1) if has the search value, add the new QueryTerm to the queryTermsWithSameValue.
343
     *    2.2) if hasn't, add the new QueryTerm to queryTerms vector.
344
     */
345
    private void handleNewQueryTerms(QueryTerm newTerm)
346
    {
347
    	// currently we only handle UNION group
348
    	if (newTerm != null )
349
    	{
350
    		//System.out.println("new term is not null branch in handle new query term");
351
    		//we only handle union operator now.
352
    		try {
353
    			if (operator != null
354
						&& operator.equalsIgnoreCase(UNION)
355
						&& SystemUtil.getPathsForIndexing().contains(
356
								newTerm.getPathExpression())) {
357
					// System.out.println("in only union branch in handle new
358
					// query term");
359
					for (int i = 0; i < queryTermsInPathIndex.size(); i++) {
360
						QueryTerm term = (QueryTerm) queryTermsInPathIndex.elementAt(i);
361
						if (term != null && term.hasSameSearchValue(newTerm)) {
362
							// System.out.println("1Move a query term and add a
363
							// new query term into search value in handle new
364
							// query term");
365
							// find a target which has same search value
366
							Vector<QueryTerm> newSameValueVector = new Vector<QueryTerm>();
367
							newSameValueVector.add(term);
368
							newSameValueVector.addElement(newTerm);
369
							queryTermsWithSameValue.add(newSameValueVector);
370
							queryTermsInPathIndex.remove(i);
371
							return;
372
						}
373
					}
374
					// no same search value was found in queryTerms.
375
					// then we need search queryTermsWithSameValue
376
					for (int i = 0; i < queryTermsWithSameValue.size(); i++) {
377
						Vector sameValueVec = (Vector) queryTermsWithSameValue.elementAt(i);
378
						// we only compare the first query term
379
						QueryTerm term = (QueryTerm) sameValueVec.elementAt(0);
380
						if (term != null && term.hasSameSearchValue(newTerm)) {
381
							// System.out.println("2add a new query term into
382
							// search value in handle new query term");
383
							sameValueVec.add(newTerm);
384
							return;
385
						}
386
					}
387
					// nothing found, but the search path is still in
388
					// xml_path_index,
389
					// save it into queryTermsInPathIndex vector
390
					queryTermsInPathIndex.add(newTerm);
391
					return;
392
				}    		
393
    		} catch (MetacatUtilException ue) {
394
				logMetacat.warn("Could not get index paths: " + ue.getMessage());
395
			}
396
    		
397
    		// add this newTerm to queryTerms since we couldn't find it in xml_path_index
398
    		queryTerms.add(newTerm);
399
    	}
400
    	
401
    }
402
  }
(53-53/64)