Project

General

Profile

1 1831 tao
/**
2
 *  '$RCSfile$'
3 2366 sgarg
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6 1831 tao
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author$'
12
 *     '$Date$'
13
 * '$Revision$'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29
30
package edu.ucsb.nceas.metacat;
31
32
import edu.ucsb.nceas.dbadapter.*;
33 5015 daigle
import edu.ucsb.nceas.metacat.shared.MetacatUtilException;
34 4698 daigle
import edu.ucsb.nceas.metacat.util.MetacatUtil;
35 4812 daigle
import edu.ucsb.nceas.metacat.util.SystemUtil;
36 1831 tao
37
import java.io.*;
38
import java.util.Hashtable;
39
import java.util.Stack;
40
import java.util.Vector;
41
import java.util.Enumeration;
42
43 2677 sgarg
import org.apache.log4j.Logger;
44
45 1831 tao
 /** a utility class that represents a group of terms in a query */
46
public  class QueryGroup {
47
    private String operator = null;  // indicates how query terms are combined
48
    private Vector children = null;  // the list of query terms and groups
49
    private int countPercentageSearchItem = 0;
50 3225 tao
    private Vector queryTermsWithSameValue = null;//this two dimension vectors.
51
                                                  //will hold query terms which has same search value.
52 3328 tao
    private Vector queryTermsInPathIndex = null; //this vector holds query terms without same value
53
                                                                                 // and search path is in path index.
54 3225 tao
    private Vector queryTerms = null;//this vector only holds query terms without same search value.
55 3328 tao
                                                             // and search path is NOT in path index.
56 3226 tao
    private Vector queryGroupsChildren = null;
57 2677 sgarg
    private static Logger logMetacat = Logger.getLogger(QueryGroup.class);
58 6035 leinfelder
    public static String UNION = "UNION";
59
    public static String INTERSECT = "INTERSECT";
60 2677 sgarg
61 6035 leinfelder
62 2366 sgarg
    /**
63
     * construct a new QueryGroup
64 1831 tao
     *
65 2366 sgarg
     * @param operator the boolean conector used to connect query terms
66 1831 tao
     *                    in this query group
67
     */
68
    public QueryGroup(String operator) {
69
      this.operator = operator;
70
      children = new Vector();
71 3225 tao
      queryTermsWithSameValue = new Vector();
72 3328 tao
      queryTermsInPathIndex = new Vector();
73 3225 tao
      queryTerms = new Vector();
74 3226 tao
      queryGroupsChildren = new Vector();
75 1831 tao
    }
76
77 2366 sgarg
    /**
78 1831 tao
     * Add a child QueryGroup to this QueryGroup
79
     *
80
     * @param qgroup the query group to be added to the list of terms
81
     */
82
    public void addChild(QueryGroup qgroup) {
83 3237 tao
      children.add((Object)qgroup);
84 3226 tao
      queryGroupsChildren.add(qgroup);
85 1831 tao
    }
86
87
    /**
88
     * Add a child QueryTerm to this QueryGroup
89
     *
90
     * @param qterm the query term to be added to the list of terms
91
     */
92
    public void addChild(QueryTerm qterm) {
93 3237 tao
      children.add((Object)qterm);
94 3225 tao
      handleNewQueryTerms(qterm);
95
96 1831 tao
    }
97
98 3237 tao
    /*
99 1831 tao
     * Retrieve an Enumeration of query terms for this QueryGroup
100
     */
101 3237 tao
    private Enumeration getChildren() {
102 1831 tao
      return children.elements();
103
    }
104 2366 sgarg
105 1831 tao
    public int getPercentageSymbolCount()
106
    {
107
      return countPercentageSearchItem;
108
    }
109 2366 sgarg
110 1831 tao
    /**
111
     * create a SQL serialization of the query that this instance represents
112
     */
113
    public String printSQL(boolean useXMLIndex) {
114
      StringBuffer self = new StringBuffer();
115 2373 sgarg
      StringBuffer queryString = new StringBuffer();
116
117 1831 tao
      boolean first = true;
118 2677 sgarg
119 3430 tao
      if (!queryTermsWithSameValue.isEmpty() || !queryTermsInPathIndex.isEmpty())
120 3237 tao
      {
121 3328 tao
    	  String pathIndexQueryString = printSQLStringInPathIndex();
122
    	  queryString.append(pathIndexQueryString);
123 3245 tao
    	  if (queryString != null)
124
    	  {
125
    		  first = false;
126
    	  }
127 3237 tao
      }
128
129
      for (int i=0; i<queryGroupsChildren.size(); i++)
130
      {
131
132
133
            QueryGroup qg = (QueryGroup)queryGroupsChildren.elementAt(i);
134 2634 sgarg
        	String queryGroupSQL = qg.printSQL(useXMLIndex);
135 2677 sgarg
        	logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: "
136 3237 tao
        			+ queryGroupSQL);
137 2634 sgarg
        	if (first) {
138
        		first = false;
139
        	} else {
140
        		if(!queryString.toString().equals("") && queryGroupSQL != null &&!queryGroupSQL.equals("")){
141 2503 sgarg
                    queryString.append(" " + operator + " ");
142 2677 sgarg
        		}
143
        	}
144
   		  	queryString.append(queryGroupSQL);
145
146
   		  	// count percerntage number
147
   		  	int count = qg.getPercentageSymbolCount();
148
   		  	countPercentageSearchItem = countPercentageSearchItem + count;
149 3237 tao
      }
150
151
      for (int i=0; i<queryTerms.size(); i++)
152
      {
153
           QueryTerm qt = (QueryTerm)queryTerms.elementAt(i);
154 2373 sgarg
           String termQueryString = qt.printSQL(useXMLIndex);
155 2677 sgarg
       	   logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: "
156
        			+ termQueryString);
157 2366 sgarg
           if(!(qt.getSearchMode().equals("contains") && qt.getValue().equals("%"))){
158 2634 sgarg
        	   if (first) {
159 2503 sgarg
                   first = false;
160
               } else {
161
                   if(!queryString.toString().equals("")){
162
                       queryString.append(" " + operator + " ");
163
                   }
164
               }
165 2373 sgarg
               queryString.append(termQueryString);
166 3237 tao
167 1831 tao
           // count percerntage number
168
           int count = qt.getPercentageSymbolCount();
169
           countPercentageSearchItem = countPercentageSearchItem + count;
170 3237 tao
        }
171 1831 tao
      }
172 2373 sgarg
173
      if(!queryString.toString().equals("")){
174
          self.append("(");
175
          self.append(queryString.toString());
176
          self.append(")");
177
      }
178 2677 sgarg
179
      logMetacat.info("In QueryGroup.printSQL.. final query returned is: "
180
			+ self.toString());
181 1831 tao
      return self.toString();
182
    }
183 3226 tao
184
185 3237 tao
186
187 3226 tao
    /*
188 3328 tao
     * If every query term in a queryGroup share a search value and search path
189
     * is in xml_path_index, we should use a new query to replace the original query term query in order to
190
     * improve performance. Also if even the term doesn't share any value with other term
191
     * we still use "OR" to replace UNION action (we only handle union operator in the query group).
192
     *
193 3226 tao
     */
194 3328 tao
    private String printSQLStringInPathIndex()
195 3226 tao
    {
196
    	String sql ="";
197
    	String value ="";
198 4325 leinfelder
    	boolean casesensitive = false;
199 3226 tao
    	StringBuffer sqlBuff = new StringBuffer();
200
    	Vector pathVector = new Vector();
201
    	int index =0;
202 3328 tao
    	if (queryTermsWithSameValue != null && queryTermsInPathIndex != null)
203 3226 tao
    	{
204 3328 tao
205
    		sqlBuff.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
206
    		if (!queryTermsWithSameValue.isEmpty())
207 3226 tao
    		{
208 3328 tao
    			boolean firstVector = true;
209
	    		for (int j=0; j<queryTermsWithSameValue.size(); j++)
210 3237 tao
	    		{
211 3328 tao
	    	   		Vector queryTermVector = (Vector)queryTermsWithSameValue.elementAt(j);
212
		    		QueryTerm term1 = (QueryTerm)queryTermVector.elementAt(0);
213
		        	value = term1.getValue();
214 4325 leinfelder
		        	casesensitive = term1.isCaseSensitive();
215 3328 tao
		        	boolean first = true;
216
		        	if (firstVector)
217
		        	{
218
					  firstVector = false;
219
		        	}
220
		        	else
221
		        	{
222
		        		sqlBuff.append(" "+"OR"+" ");
223
		        	}
224 4325 leinfelder
		        	if (casesensitive) {
225
			        	sqlBuff.append(" (nodedata");
226
		        	} else {
227
		        		sqlBuff.append(" (UPPER(nodedata)");
228
		        	}
229
		        	sqlBuff.append(" LIKE '%");
230
		        	if (value != null && !casesensitive)
231 3328 tao
		        	{
232
		        	    sqlBuff.append(value.toUpperCase());
233
		        	}
234
		        	else
235
		        	{
236
		        		sqlBuff.append(value);
237
		        	}
238
					sqlBuff.append("%' AND path IN (");
239
		    		//gets every path in query term object
240
		    		for (int i=0; i<queryTermVector.size(); i++)
241
		    		{
242
		    			QueryTerm term = (QueryTerm)queryTermVector.elementAt(i);
243
		    			value = term.getValue();
244
		    			String path = term.getPathExpression();
245
		    			if (path != null && !path.equals(""))
246
		    			{
247
		    				if (first)
248
		    				{
249
		    					first = false;
250
		    					sqlBuff.append("'");
251
		    					sqlBuff.append(path);
252
		    					sqlBuff.append("'");
253
254
		    				}
255
		    				else
256
		    				{
257
		    					sqlBuff.append(",'");
258
		    					sqlBuff.append(path);
259
		    					sqlBuff.append("'");
260
		    				}
261
		    				index++;
262
		     				if (value != null && (value.equals("%") || value.equals("%%%")))
263
		                    {
264
		    				  countPercentageSearchItem++;
265
		                    }
266
	    			     }
267
	    		    }
268
	    		    sqlBuff.append("))");
269
270
	    	    }
271
	    	}
272
    		if (!queryTermsInPathIndex.isEmpty())
273
    		{
274
    			for (int j=0; j<queryTermsInPathIndex.size(); j++)
275
    			{
276
    				QueryTerm term = (QueryTerm)queryTermsInPathIndex.elementAt(j);
277
    				if (term != null)
278
    				{
279
	    				term.setInUnionGroup(true);
280
		    			 if (index > 0)
281
		    			 {
282
		    				 sqlBuff.append(" "+"OR"+" ");
283
		    			 }
284
		    			 sqlBuff.append("(");
285
	    				 sqlBuff.append(term.printSQL(true));
286
	    				 sqlBuff.append(")");
287
	    				 index++;
288
	    			}
289
    			}
290
    		}
291 3226 tao
    	}
292
    	if (index >0)
293
    	{
294
    		sql = sqlBuff.toString();
295
    	}
296
    	return sql;
297
    }
298 1831 tao
299
    /**
300
     * create a String description of the query that this instance represents.
301
     * This should become a way to get the XML serialization of the query.
302
     */
303
    public String toString() {
304
      StringBuffer self = new StringBuffer();
305
306
      self.append("  (Query group operator=" + operator + "\n");
307
      Enumeration en= getChildren();
308
      while (en.hasMoreElements()) {
309
        Object qobject = en.nextElement();
310
        self.append(qobject);
311
      }
312
      self.append("  )\n");
313
      return self.toString();
314
    }
315 3225 tao
316
    /*
317
     * When a new QueryTerm come, first we need to compare it to
318
     * the queryTerm vector, which contains queryTerm that doesn't
319
     * have same search value to any other queryTerm. Here is algorithm.
320
     * 1) If new QueryTerm find a QueryTerm in queryTerms which has same search value,
321
     *    them create a new vector which contain both QueryTerms and add the new vector
322
     *    to two-dimention vector queryTermsWithSameValue, and remove the QueryTerm which
323
     *    was in queryTerm.
324
     * 2) If new QueryTerm couldn't find a QueryTerm in queryTerms which has same search value,
325
     *    then search queryTermsWithSameValue, to see if this vector already has the search value.
326
     *    2.1) if has the search value, add the new QueryTerm to the queryTermsWithSameValue.
327
     *    2.2) if hasn't, add the new QueryTerm to queryTerms vector.
328
     */
329
    private void handleNewQueryTerms(QueryTerm newTerm)
330
    {
331 3237 tao
    	// currently we only handle UNION group
332
    	if (newTerm != null )
333 3225 tao
    	{
334 3239 tao
    		//System.out.println("new term is not null branch in handle new query term");
335 3237 tao
    		//we only handle union operator now.
336 4812 daigle
    		try {
337
    			if (operator != null
338
						&& operator.equalsIgnoreCase(UNION)
339
						&& SystemUtil.getPathsForIndexing().contains(
340
								newTerm.getPathExpression())) {
341
					// System.out.println("in only union branch in handle new
342
					// query term");
343
					for (int i = 0; i < queryTermsInPathIndex.size(); i++) {
344
						QueryTerm term = (QueryTerm) queryTermsInPathIndex.elementAt(i);
345
						if (term != null && term.hasSameSearchValue(newTerm)) {
346
							// System.out.println("1Move a query term and add a
347
							// new query term into search value in handle new
348
							// query term");
349
							// find a target which has same search value
350
							Vector newSameValueVector = new Vector();
351
							newSameValueVector.add(term);
352
							newSameValueVector.addElement(newTerm);
353
							queryTermsWithSameValue.add(newSameValueVector);
354
							queryTermsInPathIndex.remove(i);
355
							return;
356
						}
357
					}
358
					// no same search value was found in queryTerms.
359
					// then we need search queryTermsWithSameValue
360
					for (int i = 0; i < queryTermsWithSameValue.size(); i++) {
361
						Vector sameValueVec = (Vector) queryTermsWithSameValue
362
								.elementAt(i);
363
						// we only compare the first query term
364
						QueryTerm term = (QueryTerm) sameValueVec.elementAt(0);
365
						if (term != null && term.hasSameSearchValue(newTerm)) {
366
							// System.out.println("2add a new query term into
367
							// search value in handle new query term");
368
							sameValueVec.add(newTerm);
369
							return;
370
						}
371
					}
372
					// nothing found, but the search path is still in
373
					// xml_path_index,
374
					// save it into queryTermsInPathIndex vector
375
					queryTermsInPathIndex.add(newTerm);
376
					return;
377
				}
378 4854 daigle
    		} catch (MetacatUtilException ue) {
379 4812 daigle
				logMetacat.warn("Could not get index paths: " + ue.getMessage());
380
			}
381 3328 tao
382
    		// add this newTerm to queryTerms since we couldn't find it in xml_path_index
383 3225 tao
    		queryTerms.add(newTerm);
384
    	}
385
386
    }
387 1831 tao
  }