Project

General

Profile

1 1831 tao
/**
2
 *  '$RCSfile$'
3 2366 sgarg
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6 1831 tao
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author$'
12
 *     '$Date$'
13
 * '$Revision$'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29
30
package edu.ucsb.nceas.metacat;
31
32 6602 leinfelder
import java.util.ArrayList;
33 7407 leinfelder
import java.util.Enumeration;
34 6602 leinfelder
import java.util.List;
35 1831 tao
import java.util.Vector;
36
37 2677 sgarg
import org.apache.log4j.Logger;
38
39 7407 leinfelder
import edu.ucsb.nceas.metacat.shared.MetacatUtilException;
40
import edu.ucsb.nceas.metacat.util.SystemUtil;
41
42 1831 tao
 /** a utility class that represents a group of terms in a query */
43
public  class QueryGroup {
44
    private String operator = null;  // indicates how query terms are combined
45
    private Vector children = null;  // the list of query terms and groups
46
    private int countPercentageSearchItem = 0;
47 3225 tao
    private Vector queryTermsWithSameValue = null;//this two dimension vectors.
48
                                                  //will hold query terms which has same search value.
49 3328 tao
    private Vector queryTermsInPathIndex = null; //this vector holds query terms without same value
50
                                                                                 // and search path is in path index.
51 6602 leinfelder
    private Vector<QueryTerm> queryTerms = null;//this vector only holds query terms without same search value.
52 3328 tao
                                                             // and search path is NOT in path index.
53 3226 tao
    private Vector queryGroupsChildren = null;
54 2677 sgarg
    private static Logger logMetacat = Logger.getLogger(QueryGroup.class);
55 6035 leinfelder
    public static String UNION = "UNION";
56
    public static String INTERSECT = "INTERSECT";
57 2677 sgarg
58 6035 leinfelder
59 2366 sgarg
    /**
60
     * construct a new QueryGroup
61 1831 tao
     *
62 2366 sgarg
     * @param operator the boolean conector used to connect query terms
63 1831 tao
     *                    in this query group
64
     */
65
    public QueryGroup(String operator) {
66
      this.operator = operator;
67
      children = new Vector();
68 3225 tao
      queryTermsWithSameValue = new Vector();
69 3328 tao
      queryTermsInPathIndex = new Vector();
70 6602 leinfelder
      queryTerms = new Vector<QueryTerm>();
71 3226 tao
      queryGroupsChildren = new Vector();
72 1831 tao
    }
73
74 2366 sgarg
    /**
75 1831 tao
     * Add a child QueryGroup to this QueryGroup
76
     *
77
     * @param qgroup the query group to be added to the list of terms
78
     */
79
    public void addChild(QueryGroup qgroup) {
80 3237 tao
      children.add((Object)qgroup);
81 3226 tao
      queryGroupsChildren.add(qgroup);
82 1831 tao
    }
83
84
    /**
85
     * Add a child QueryTerm to this QueryGroup
86
     *
87
     * @param qterm the query term to be added to the list of terms
88
     */
89
    public void addChild(QueryTerm qterm) {
90 3237 tao
      children.add((Object)qterm);
91 3225 tao
      handleNewQueryTerms(qterm);
92
93 1831 tao
    }
94
95 3237 tao
    /*
96 1831 tao
     * Retrieve an Enumeration of query terms for this QueryGroup
97
     */
98 3237 tao
    private Enumeration getChildren() {
99 1831 tao
      return children.elements();
100
    }
101 2366 sgarg
102 1831 tao
    public int getPercentageSymbolCount()
103
    {
104
      return countPercentageSearchItem;
105
    }
106 2366 sgarg
107 1831 tao
    /**
108
     * create a SQL serialization of the query that this instance represents
109
     */
110 6602 leinfelder
    public String printSQL(boolean useXMLIndex, List<Object> parameterValues) {
111
112 1831 tao
      StringBuffer self = new StringBuffer();
113 2373 sgarg
      StringBuffer queryString = new StringBuffer();
114
115 1831 tao
      boolean first = true;
116 2677 sgarg
117 3430 tao
      if (!queryTermsWithSameValue.isEmpty() || !queryTermsInPathIndex.isEmpty())
118 3237 tao
      {
119 6602 leinfelder
    	  // keep track of the values we add as prepared statement question marks (?)
120
    	  List<Object> groupValues = new ArrayList<Object>();
121
    	  String pathIndexQueryString = printSQLStringInPathIndex(groupValues);
122
    	  parameterValues.addAll(groupValues);
123 3328 tao
    	  queryString.append(pathIndexQueryString);
124 3245 tao
    	  if (queryString != null)
125
    	  {
126
    		  first = false;
127
    	  }
128 3237 tao
      }
129
130
      for (int i=0; i<queryGroupsChildren.size(); i++)
131
      {
132
133 6602 leinfelder
    	  // keep track of the values we add as prepared statement question marks (?)
134
    	  List<Object> childrenValues = new ArrayList<Object>();
135
    	  // get the group
136
    	  QueryGroup qg = (QueryGroup) queryGroupsChildren.elementAt(i);
137
    	  String queryGroupSQL = qg.printSQL(useXMLIndex, childrenValues);
138
    	  logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: " + queryGroupSQL);
139 2634 sgarg
        	if (first) {
140
        		first = false;
141
        	} else {
142
        		if(!queryString.toString().equals("") && queryGroupSQL != null &&!queryGroupSQL.equals("")){
143 2503 sgarg
                    queryString.append(" " + operator + " ");
144 2677 sgarg
        		}
145
        	}
146 6602 leinfelder
        	// add the sql
147 2677 sgarg
   		  	queryString.append(queryGroupSQL);
148 6602 leinfelder
   		  	// add the parameter values
149
   		  	parameterValues.addAll(childrenValues);
150 2677 sgarg
151 6602 leinfelder
   		  	// count percentage number
152 2677 sgarg
   		  	int count = qg.getPercentageSymbolCount();
153
   		  	countPercentageSearchItem = countPercentageSearchItem + count;
154 3237 tao
      }
155
156
      for (int i=0; i<queryTerms.size(); i++)
157
      {
158 6602 leinfelder
    	  // keep track of the values we add as prepared statement question marks (?)
159
    	  List<Object> termValues = new ArrayList<Object>();
160
    	  // get the term
161
    	  QueryTerm qt = (QueryTerm)queryTerms.elementAt(i);
162
    	  String termQueryString = qt.printSQL(useXMLIndex, termValues);
163
    	  logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: " + termQueryString);
164
           if (!(qt.getSearchMode().equals("contains") && qt.getValue().equals("%"))){
165 2634 sgarg
        	   if (first) {
166 2503 sgarg
                   first = false;
167
               } else {
168
                   if(!queryString.toString().equals("")){
169
                       queryString.append(" " + operator + " ");
170
                   }
171
               }
172 6602 leinfelder
        	   // include the sql
173 2373 sgarg
               queryString.append(termQueryString);
174 6602 leinfelder
               // include the parameter values
175
               parameterValues.addAll(termValues);
176
177 1831 tao
           // count percerntage number
178
           int count = qt.getPercentageSymbolCount();
179
           countPercentageSearchItem = countPercentageSearchItem + count;
180 3237 tao
        }
181 1831 tao
      }
182 2373 sgarg
183
      if(!queryString.toString().equals("")){
184
          self.append("(");
185
          self.append(queryString.toString());
186
          self.append(")");
187
      }
188 2677 sgarg
189
      logMetacat.info("In QueryGroup.printSQL.. final query returned is: "
190
			+ self.toString());
191 1831 tao
      return self.toString();
192
    }
193 3226 tao
194
195 3237 tao
196
197 3226 tao
    /*
198 3328 tao
     * If every query term in a queryGroup share a search value and search path
199
     * is in xml_path_index, we should use a new query to replace the original query term query in order to
200
     * improve performance. Also if even the term doesn't share any value with other term
201
     * we still use "OR" to replace UNION action (we only handle union operator in the query group).
202
     *
203 3226 tao
     */
204 6602 leinfelder
    private String printSQLStringInPathIndex(List<Object> parameterValues)
205 3226 tao
    {
206
    	String sql ="";
207
    	String value ="";
208
    	StringBuffer sqlBuff = new StringBuffer();
209
    	int index =0;
210 3328 tao
    	if (queryTermsWithSameValue != null && queryTermsInPathIndex != null)
211 3226 tao
    	{
212 3328 tao
213
    		sqlBuff.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
214
    		if (!queryTermsWithSameValue.isEmpty())
215 3226 tao
    		{
216 3328 tao
    			boolean firstVector = true;
217
	    		for (int j=0; j<queryTermsWithSameValue.size(); j++)
218 3237 tao
	    		{
219 3328 tao
	    	   		Vector queryTermVector = (Vector)queryTermsWithSameValue.elementAt(j);
220
		    		QueryTerm term1 = (QueryTerm)queryTermVector.elementAt(0);
221
		        	value = term1.getValue();
222
		        	boolean first = true;
223
		        	if (firstVector)
224
		        	{
225
					  firstVector = false;
226
		        	}
227
		        	else
228
		        	{
229
		        		sqlBuff.append(" "+"OR"+" ");
230
		        	}
231 6361 leinfelder
232
					sqlBuff.append(" (");
233
234 6602 leinfelder
					// keep track of parameter values
235
			        List<Object> searchValues = new ArrayList<Object>();
236
237 6361 leinfelder
		        	// get the general search criteria (no path info)
238 6602 leinfelder
		        	String searchTermSQL = term1.printSearchExprSQL(searchValues);
239
240
		        	// add the SQL
241 6361 leinfelder
					sqlBuff.append(searchTermSQL);
242
243 6602 leinfelder
					// add parameter values
244
					parameterValues.addAll(searchValues);
245
246
					sqlBuff.append("AND path IN ( ");
247 6361 leinfelder
248 3328 tao
		    		//gets every path in query term object
249
		    		for (int i=0; i<queryTermVector.size(); i++)
250
		    		{
251
		    			QueryTerm term = (QueryTerm)queryTermVector.elementAt(i);
252
		    			value = term.getValue();
253
		    			String path = term.getPathExpression();
254
		    			if (path != null && !path.equals(""))
255
		    			{
256
		    				if (first)
257
		    				{
258
		    					first = false;
259 6602 leinfelder
		    					sqlBuff.append("?");
260
		    					parameterValues.add(path);
261 3328 tao
		    				}
262
		    				else
263
		    				{
264 6602 leinfelder
		    					sqlBuff.append(", ?");
265
		    					parameterValues.add(path);
266 3328 tao
		    				}
267
		    				index++;
268
		     				if (value != null && (value.equals("%") || value.equals("%%%")))
269
		                    {
270
		    				  countPercentageSearchItem++;
271
		                    }
272
	    			     }
273
	    		    }
274 6602 leinfelder
	    		    sqlBuff.append(" ))");
275 3328 tao
276
	    	    }
277
	    	}
278
    		if (!queryTermsInPathIndex.isEmpty())
279
    		{
280
    			for (int j=0; j<queryTermsInPathIndex.size(); j++)
281
    			{
282
    				QueryTerm term = (QueryTerm)queryTermsInPathIndex.elementAt(j);
283
    				if (term != null)
284
    				{
285
	    				term.setInUnionGroup(true);
286
		    			 if (index > 0)
287
		    			 {
288
		    				 sqlBuff.append(" "+"OR"+" ");
289
		    			 }
290
		    			 sqlBuff.append("(");
291 6602 leinfelder
		    			 // keep track of the parameter values for this sql
292
		    			 List<Object> termParameterValues = new ArrayList<Object>();
293
		    			 String termSQL = term.printSQL(true, termParameterValues);
294
	    				 sqlBuff.append(termSQL);
295 3328 tao
	    				 sqlBuff.append(")");
296 6602 leinfelder
	    				 // add the param values
297
	    				 parameterValues.addAll(termParameterValues);
298 3328 tao
	    				 index++;
299
	    			}
300
    			}
301
    		}
302 3226 tao
    	}
303
    	if (index >0)
304
    	{
305
    		sql = sqlBuff.toString();
306
    	}
307
    	return sql;
308
    }
309 1831 tao
310
    /**
311
     * create a String description of the query that this instance represents.
312
     * This should become a way to get the XML serialization of the query.
313
     */
314
    public String toString() {
315
      StringBuffer self = new StringBuffer();
316
317
      self.append("  (Query group operator=" + operator + "\n");
318
      Enumeration en= getChildren();
319
      while (en.hasMoreElements()) {
320
        Object qobject = en.nextElement();
321
        self.append(qobject);
322
      }
323
      self.append("  )\n");
324
      return self.toString();
325
    }
326 3225 tao
327
    /*
328
     * When a new QueryTerm come, first we need to compare it to
329
     * the queryTerm vector, which contains queryTerm that doesn't
330
     * have same search value to any other queryTerm. Here is algorithm.
331
     * 1) If new QueryTerm find a QueryTerm in queryTerms which has same search value,
332
     *    them create a new vector which contain both QueryTerms and add the new vector
333
     *    to two-dimention vector queryTermsWithSameValue, and remove the QueryTerm which
334
     *    was in queryTerm.
335
     * 2) If new QueryTerm couldn't find a QueryTerm in queryTerms which has same search value,
336
     *    then search queryTermsWithSameValue, to see if this vector already has the search value.
337
     *    2.1) if has the search value, add the new QueryTerm to the queryTermsWithSameValue.
338
     *    2.2) if hasn't, add the new QueryTerm to queryTerms vector.
339
     */
340
    private void handleNewQueryTerms(QueryTerm newTerm)
341
    {
342 3237 tao
    	// currently we only handle UNION group
343
    	if (newTerm != null )
344 3225 tao
    	{
345 3239 tao
    		//System.out.println("new term is not null branch in handle new query term");
346 3237 tao
    		//we only handle union operator now.
347 4812 daigle
    		try {
348
    			if (operator != null
349
						&& operator.equalsIgnoreCase(UNION)
350
						&& SystemUtil.getPathsForIndexing().contains(
351
								newTerm.getPathExpression())) {
352
					// System.out.println("in only union branch in handle new
353
					// query term");
354
					for (int i = 0; i < queryTermsInPathIndex.size(); i++) {
355
						QueryTerm term = (QueryTerm) queryTermsInPathIndex.elementAt(i);
356
						if (term != null && term.hasSameSearchValue(newTerm)) {
357
							// System.out.println("1Move a query term and add a
358
							// new query term into search value in handle new
359
							// query term");
360
							// find a target which has same search value
361 6602 leinfelder
							Vector<QueryTerm> newSameValueVector = new Vector<QueryTerm>();
362 4812 daigle
							newSameValueVector.add(term);
363
							newSameValueVector.addElement(newTerm);
364
							queryTermsWithSameValue.add(newSameValueVector);
365
							queryTermsInPathIndex.remove(i);
366
							return;
367
						}
368
					}
369
					// no same search value was found in queryTerms.
370
					// then we need search queryTermsWithSameValue
371
					for (int i = 0; i < queryTermsWithSameValue.size(); i++) {
372 6602 leinfelder
						Vector sameValueVec = (Vector) queryTermsWithSameValue.elementAt(i);
373 4812 daigle
						// we only compare the first query term
374
						QueryTerm term = (QueryTerm) sameValueVec.elementAt(0);
375
						if (term != null && term.hasSameSearchValue(newTerm)) {
376
							// System.out.println("2add a new query term into
377
							// search value in handle new query term");
378
							sameValueVec.add(newTerm);
379
							return;
380
						}
381
					}
382
					// nothing found, but the search path is still in
383
					// xml_path_index,
384
					// save it into queryTermsInPathIndex vector
385
					queryTermsInPathIndex.add(newTerm);
386
					return;
387
				}
388 4854 daigle
    		} catch (MetacatUtilException ue) {
389 4812 daigle
				logMetacat.warn("Could not get index paths: " + ue.getMessage());
390
			}
391 3328 tao
392
    		// add this newTerm to queryTerms since we couldn't find it in xml_path_index
393 3225 tao
    		queryTerms.add(newTerm);
394
    	}
395
396
    }
397 1831 tao
  }