Project

General

Profile

1 1831 tao
/**
2
 *  '$RCSfile$'
3 2366 sgarg
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6 1831 tao
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author$'
12
 *     '$Date$'
13
 * '$Revision$'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29
30
package edu.ucsb.nceas.metacat;
31
32
import edu.ucsb.nceas.dbadapter.*;
33
34
import java.io.*;
35
import java.util.Hashtable;
36
import java.util.Stack;
37
import java.util.Vector;
38
import java.util.Enumeration;
39
40 2677 sgarg
import org.apache.log4j.Logger;
41
42 1831 tao
 /** a utility class that represents a group of terms in a query */
43
public  class QueryGroup {
44
    private String operator = null;  // indicates how query terms are combined
45
    private Vector children = null;  // the list of query terms and groups
46
    private int countPercentageSearchItem = 0;
47 3225 tao
    private Vector queryTermsWithSameValue = null;//this two dimension vectors.
48
                                                  //will hold query terms which has same search value.
49 3328 tao
    private Vector queryTermsInPathIndex = null; //this vector holds query terms without same value
50
                                                                                 // and search path is in path index.
51 3225 tao
    private Vector queryTerms = null;//this vector only holds query terms without same search value.
52 3328 tao
                                                             // and search path is NOT in path index.
53 3226 tao
    private Vector queryGroupsChildren = null;
54 2677 sgarg
    private static Logger logMetacat = Logger.getLogger(QueryGroup.class);
55 3237 tao
    private static String UNION = "UNION";
56 2677 sgarg
57 2366 sgarg
    /**
58
     * construct a new QueryGroup
59 1831 tao
     *
60 2366 sgarg
     * @param operator the boolean conector used to connect query terms
61 1831 tao
     *                    in this query group
62
     */
63
    public QueryGroup(String operator) {
64
      this.operator = operator;
65
      children = new Vector();
66 3225 tao
      queryTermsWithSameValue = new Vector();
67 3328 tao
      queryTermsInPathIndex = new Vector();
68 3225 tao
      queryTerms = new Vector();
69 3226 tao
      queryGroupsChildren = new Vector();
70 1831 tao
    }
71
72 2366 sgarg
    /**
73 1831 tao
     * Add a child QueryGroup to this QueryGroup
74
     *
75
     * @param qgroup the query group to be added to the list of terms
76
     */
77
    public void addChild(QueryGroup qgroup) {
78 3237 tao
      children.add((Object)qgroup);
79 3226 tao
      queryGroupsChildren.add(qgroup);
80 1831 tao
    }
81
82
    /**
83
     * Add a child QueryTerm to this QueryGroup
84
     *
85
     * @param qterm the query term to be added to the list of terms
86
     */
87
    public void addChild(QueryTerm qterm) {
88 3237 tao
      children.add((Object)qterm);
89 3225 tao
      handleNewQueryTerms(qterm);
90
91 1831 tao
    }
92
93 3237 tao
    /*
94 1831 tao
     * Retrieve an Enumeration of query terms for this QueryGroup
95
     */
96 3237 tao
    private Enumeration getChildren() {
97 1831 tao
      return children.elements();
98
    }
99 2366 sgarg
100 1831 tao
    public int getPercentageSymbolCount()
101
    {
102
      return countPercentageSearchItem;
103
    }
104 2366 sgarg
105 1831 tao
    /**
106
     * create a SQL serialization of the query that this instance represents
107
     */
108
    public String printSQL(boolean useXMLIndex) {
109
      StringBuffer self = new StringBuffer();
110 2373 sgarg
      StringBuffer queryString = new StringBuffer();
111
112 1831 tao
      boolean first = true;
113 2677 sgarg
114 3430 tao
      if (!queryTermsWithSameValue.isEmpty() || !queryTermsInPathIndex.isEmpty())
115 3237 tao
      {
116 3328 tao
    	  String pathIndexQueryString = printSQLStringInPathIndex();
117
    	  queryString.append(pathIndexQueryString);
118 3245 tao
    	  if (queryString != null)
119
    	  {
120
    		  first = false;
121
    	  }
122 3237 tao
      }
123
124
      for (int i=0; i<queryGroupsChildren.size(); i++)
125
      {
126
127
128
            QueryGroup qg = (QueryGroup)queryGroupsChildren.elementAt(i);
129 2634 sgarg
        	String queryGroupSQL = qg.printSQL(useXMLIndex);
130 2677 sgarg
        	logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: "
131 3237 tao
        			+ queryGroupSQL);
132 2634 sgarg
        	if (first) {
133
        		first = false;
134
        	} else {
135
        		if(!queryString.toString().equals("") && queryGroupSQL != null &&!queryGroupSQL.equals("")){
136 2503 sgarg
                    queryString.append(" " + operator + " ");
137 2677 sgarg
        		}
138
        	}
139
   		  	queryString.append(queryGroupSQL);
140
141
   		  	// count percerntage number
142
   		  	int count = qg.getPercentageSymbolCount();
143
   		  	countPercentageSearchItem = countPercentageSearchItem + count;
144 3237 tao
      }
145
146
      for (int i=0; i<queryTerms.size(); i++)
147
      {
148
           QueryTerm qt = (QueryTerm)queryTerms.elementAt(i);
149 2373 sgarg
           String termQueryString = qt.printSQL(useXMLIndex);
150 2677 sgarg
       	   logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: "
151
        			+ termQueryString);
152 2366 sgarg
           if(!(qt.getSearchMode().equals("contains") && qt.getValue().equals("%"))){
153 2634 sgarg
        	   if (first) {
154 2503 sgarg
                   first = false;
155
               } else {
156
                   if(!queryString.toString().equals("")){
157
                       queryString.append(" " + operator + " ");
158
                   }
159
               }
160 2373 sgarg
               queryString.append(termQueryString);
161 3237 tao
162 1831 tao
           // count percerntage number
163
           int count = qt.getPercentageSymbolCount();
164
           countPercentageSearchItem = countPercentageSearchItem + count;
165 3237 tao
        }
166 1831 tao
      }
167 2373 sgarg
168
      if(!queryString.toString().equals("")){
169
          self.append("(");
170
          self.append(queryString.toString());
171
          self.append(")");
172
      }
173 2677 sgarg
174
      logMetacat.info("In QueryGroup.printSQL.. final query returned is: "
175
			+ self.toString());
176 1831 tao
      return self.toString();
177
    }
178 3226 tao
179
180 3237 tao
181
182 3226 tao
    /*
183 3328 tao
     * If every query term in a queryGroup share a search value and search path
184
     * is in xml_path_index, we should use a new query to replace the original query term query in order to
185
     * improve performance. Also if even the term doesn't share any value with other term
186
     * we still use "OR" to replace UNION action (we only handle union operator in the query group).
187
     *
188 3226 tao
     */
189 3328 tao
    private String printSQLStringInPathIndex()
190 3226 tao
    {
191
    	String sql ="";
192
    	String value ="";
193
    	StringBuffer sqlBuff = new StringBuffer();
194
    	Vector pathVector = new Vector();
195
    	int index =0;
196 3328 tao
    	if (queryTermsWithSameValue != null && queryTermsInPathIndex != null)
197 3226 tao
    	{
198 3328 tao
199
    		sqlBuff.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
200
    		if (!queryTermsWithSameValue.isEmpty())
201 3226 tao
    		{
202 3328 tao
    			boolean firstVector = true;
203
	    		for (int j=0; j<queryTermsWithSameValue.size(); j++)
204 3237 tao
	    		{
205 3328 tao
	    	   		Vector queryTermVector = (Vector)queryTermsWithSameValue.elementAt(j);
206
		    		QueryTerm term1 = (QueryTerm)queryTermVector.elementAt(0);
207
		        	value = term1.getValue();
208
		        	boolean first = true;
209
		        	if (firstVector)
210
		        	{
211
					  firstVector = false;
212
		        	}
213
		        	else
214
		        	{
215
		        		sqlBuff.append(" "+"OR"+" ");
216
		        	}
217
		        	sqlBuff.append(" (UPPER(nodedata) LIKE '%");
218
		        	if (value != null)
219
		        	{
220
		        	    sqlBuff.append(value.toUpperCase());
221
		        	}
222
		        	else
223
		        	{
224
		        		sqlBuff.append(value);
225
		        	}
226
					sqlBuff.append("%' AND path IN (");
227
		    		//gets every path in query term object
228
		    		for (int i=0; i<queryTermVector.size(); i++)
229
		    		{
230
		    			QueryTerm term = (QueryTerm)queryTermVector.elementAt(i);
231
		    			value = term.getValue();
232
		    			String path = term.getPathExpression();
233
		    			if (path != null && !path.equals(""))
234
		    			{
235
		    				if (first)
236
		    				{
237
		    					first = false;
238
		    					sqlBuff.append("'");
239
		    					sqlBuff.append(path);
240
		    					sqlBuff.append("'");
241
242
		    				}
243
		    				else
244
		    				{
245
		    					sqlBuff.append(",'");
246
		    					sqlBuff.append(path);
247
		    					sqlBuff.append("'");
248
		    				}
249
		    				index++;
250
		     				if (value != null && (value.equals("%") || value.equals("%%%")))
251
		                    {
252
		    				  countPercentageSearchItem++;
253
		                    }
254
	    			     }
255
	    		    }
256
	    		    sqlBuff.append("))");
257
258
	    	    }
259
	    	}
260
    		if (!queryTermsInPathIndex.isEmpty())
261
    		{
262
    			for (int j=0; j<queryTermsInPathIndex.size(); j++)
263
    			{
264
    				QueryTerm term = (QueryTerm)queryTermsInPathIndex.elementAt(j);
265
    				if (term != null)
266
    				{
267
	    				term.setInUnionGroup(true);
268
		    			 if (index > 0)
269
		    			 {
270
		    				 sqlBuff.append(" "+"OR"+" ");
271
		    			 }
272
		    			 sqlBuff.append("(");
273
	    				 sqlBuff.append(term.printSQL(true));
274
	    				 sqlBuff.append(")");
275
	    				 index++;
276
	    			}
277
    			}
278
    		}
279 3226 tao
    	}
280
    	if (index >0)
281
    	{
282
    		sql = sqlBuff.toString();
283
    	}
284
    	return sql;
285
    }
286 1831 tao
287
    /**
288
     * create a String description of the query that this instance represents.
289
     * This should become a way to get the XML serialization of the query.
290
     */
291
    public String toString() {
292
      StringBuffer self = new StringBuffer();
293
294
      self.append("  (Query group operator=" + operator + "\n");
295
      Enumeration en= getChildren();
296
      while (en.hasMoreElements()) {
297
        Object qobject = en.nextElement();
298
        self.append(qobject);
299
      }
300
      self.append("  )\n");
301
      return self.toString();
302
    }
303 3225 tao
304
    /*
305
     * When a new QueryTerm come, first we need to compare it to
306
     * the queryTerm vector, which contains queryTerm that doesn't
307
     * have same search value to any other queryTerm. Here is algorithm.
308
     * 1) If new QueryTerm find a QueryTerm in queryTerms which has same search value,
309
     *    them create a new vector which contain both QueryTerms and add the new vector
310
     *    to two-dimention vector queryTermsWithSameValue, and remove the QueryTerm which
311
     *    was in queryTerm.
312
     * 2) If new QueryTerm couldn't find a QueryTerm in queryTerms which has same search value,
313
     *    then search queryTermsWithSameValue, to see if this vector already has the search value.
314
     *    2.1) if has the search value, add the new QueryTerm to the queryTermsWithSameValue.
315
     *    2.2) if hasn't, add the new QueryTerm to queryTerms vector.
316
     */
317
    private void handleNewQueryTerms(QueryTerm newTerm)
318
    {
319 3237 tao
    	// currently we only handle UNION group
320
    	if (newTerm != null )
321 3225 tao
    	{
322 3239 tao
    		//System.out.println("new term is not null branch in handle new query term");
323 3237 tao
    		//we only handle union operator now.
324 3245 tao
    		if (operator != null && operator.equalsIgnoreCase(UNION) &&
325
    				MetaCatUtil.pathsForIndexing.contains(newTerm.getPathExpression()))
326 3237 tao
    	    {
327 3239 tao
    			//System.out.println("in only union branch in handle new query term");
328 3328 tao
	    		for (int i=0; i<queryTermsInPathIndex.size(); i++)
329 3237 tao
	    		{
330 3328 tao
	    			QueryTerm term = (QueryTerm)queryTermsInPathIndex.elementAt(i);
331 3237 tao
	    			if (term != null && term.hasSameSearchValue(newTerm))
332
	    			{
333 3239 tao
	    				//System.out.println("1Move a query term and add a new query term into search value in handle new query term");
334 3237 tao
	    				// find a target which has same search value
335
	    				Vector newSameValueVector = new Vector();
336
	    				newSameValueVector.add(term);
337
	    				newSameValueVector.addElement(newTerm);
338
	    				queryTermsWithSameValue.add(newSameValueVector);
339 3328 tao
	    				queryTermsInPathIndex.remove(i);
340 3237 tao
	    				return;
341
	    			}
342
	    		}
343
	    		// no same search value was found in queryTerms.
344
	    		// then we need search queryTermsWithSameValue
345
	    		for (int i=0; i<queryTermsWithSameValue.size(); i++)
346
	    		{
347
	    			Vector sameValueVec = (Vector)queryTermsWithSameValue.elementAt(i);
348
	    			// we only compare the first query term
349
	    			QueryTerm term = (QueryTerm)sameValueVec.elementAt(0);
350
	    			if (term != null && term.hasSameSearchValue(newTerm))
351
	    			{
352 3239 tao
	    				//System.out.println("2add a new query term into search value in handle new query term");
353 3237 tao
	    				sameValueVec.add(newTerm);
354
	    				return;
355
	    			}
356
	    		}
357 3328 tao
	    		//nothing found, but the search path is still in xml_path_index,
358
	    		// save it into queryTermsInPathIndex vector
359
	    		queryTermsInPathIndex.add(newTerm);
360
	    		return;
361 3237 tao
    	    }
362 3328 tao
363
    		// add this newTerm to queryTerms since we couldn't find it in xml_path_index
364 3225 tao
    		queryTerms.add(newTerm);
365
    	}
366
367
    }
368 1831 tao
  }