Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author: leinfelder $'
12
 *     '$Date: 2008-08-27 10:18:24 -0700 (Wed, 27 Aug 2008) $'
13
 * '$Revision: 4325 $'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29

    
30
package edu.ucsb.nceas.metacat;
31

    
32
import edu.ucsb.nceas.dbadapter.*;
33
import edu.ucsb.nceas.metacat.util.MetaCatUtil;
34

    
35
import java.io.*;
36
import java.util.Hashtable;
37
import java.util.Stack;
38
import java.util.Vector;
39
import java.util.Enumeration;
40

    
41
import org.apache.log4j.Logger;
42

    
43
 /** a utility class that represents a group of terms in a query */
44
public  class QueryGroup {
45
    private String operator = null;  // indicates how query terms are combined
46
    private Vector children = null;  // the list of query terms and groups
47
    private int countPercentageSearchItem = 0;
48
    private Vector queryTermsWithSameValue = null;//this two dimension vectors.
49
                                                  //will hold query terms which has same search value.
50
    private Vector queryTermsInPathIndex = null; //this vector holds query terms without same value
51
                                                                                 // and search path is in path index.
52
    private Vector queryTerms = null;//this vector only holds query terms without same search value.
53
                                                             // and search path is NOT in path index.
54
    private Vector queryGroupsChildren = null;
55
    private static Logger logMetacat = Logger.getLogger(QueryGroup.class);
56
    private static String UNION = "UNION";
57

    
58
    /**
59
     * construct a new QueryGroup
60
     *
61
     * @param operator the boolean conector used to connect query terms
62
     *                    in this query group
63
     */
64
    public QueryGroup(String operator) {
65
      this.operator = operator;
66
      children = new Vector();
67
      queryTermsWithSameValue = new Vector();
68
      queryTermsInPathIndex = new Vector(); 
69
      queryTerms = new Vector();
70
      queryGroupsChildren = new Vector();
71
    }
72

    
73
    /**
74
     * Add a child QueryGroup to this QueryGroup
75
     *
76
     * @param qgroup the query group to be added to the list of terms
77
     */
78
    public void addChild(QueryGroup qgroup) {
79
      children.add((Object)qgroup);
80
      queryGroupsChildren.add(qgroup);
81
    }
82

    
83
    /**
84
     * Add a child QueryTerm to this QueryGroup
85
     *
86
     * @param qterm the query term to be added to the list of terms
87
     */
88
    public void addChild(QueryTerm qterm) {
89
      children.add((Object)qterm);
90
      handleNewQueryTerms(qterm);
91
      
92
    }
93

    
94
    /*
95
     * Retrieve an Enumeration of query terms for this QueryGroup
96
     */
97
    private Enumeration getChildren() {
98
      return children.elements();
99
    }
100

    
101
    public int getPercentageSymbolCount()
102
    {
103
      return countPercentageSearchItem;
104
    }
105

    
106
    /**
107
     * create a SQL serialization of the query that this instance represents
108
     */
109
    public String printSQL(boolean useXMLIndex) {
110
      StringBuffer self = new StringBuffer();
111
      StringBuffer queryString = new StringBuffer();
112

    
113
      boolean first = true;
114
      
115
      if (!queryTermsWithSameValue.isEmpty() || !queryTermsInPathIndex.isEmpty())
116
      {
117
    	  String pathIndexQueryString = printSQLStringInPathIndex();
118
    	  queryString.append(pathIndexQueryString);
119
    	  if (queryString != null)
120
    	  {
121
    		  first = false;
122
    	  }
123
      }
124
      
125
      for (int i=0; i<queryGroupsChildren.size(); i++)
126
      {
127
      
128
        
129
            QueryGroup qg = (QueryGroup)queryGroupsChildren.elementAt(i);
130
        	String queryGroupSQL = qg.printSQL(useXMLIndex);
131
        	logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: " 
132
        			+ queryGroupSQL);       	
133
        	if (first) {
134
        		first = false;
135
        	} else {
136
        		if(!queryString.toString().equals("") && queryGroupSQL != null &&!queryGroupSQL.equals("")){
137
                    queryString.append(" " + operator + " ");
138
        		}
139
        	}
140
   		  	queryString.append(queryGroupSQL);
141
   		  	
142
   		  	// count percerntage number
143
   		  	int count = qg.getPercentageSymbolCount();
144
   		  	countPercentageSearchItem = countPercentageSearchItem + count;
145
      }
146
      
147
      for (int i=0; i<queryTerms.size(); i++)
148
      {
149
           QueryTerm qt = (QueryTerm)queryTerms.elementAt(i);
150
           String termQueryString = qt.printSQL(useXMLIndex);
151
       	   logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: " 
152
        			+ termQueryString);
153
           if(!(qt.getSearchMode().equals("contains") && qt.getValue().equals("%"))){
154
        	   if (first) {
155
                   first = false;
156
               } else {
157
                   if(!queryString.toString().equals("")){
158
                       queryString.append(" " + operator + " ");
159
                   }
160
               }
161
               queryString.append(termQueryString);
162
           
163
           // count percerntage number
164
           int count = qt.getPercentageSymbolCount();
165
           countPercentageSearchItem = countPercentageSearchItem + count;
166
        } 
167
      }
168

    
169
      if(!queryString.toString().equals("")){
170
          self.append("(");
171
          self.append(queryString.toString());
172
          self.append(")");
173
      }
174
      
175
      logMetacat.info("In QueryGroup.printSQL.. final query returned is: " 
176
			+ self.toString());
177
      return self.toString();
178
    }
179
    
180
    
181
    
182
    
183
    /*
184
     * If every query term in a queryGroup share a search value and search path
185
     * is in xml_path_index, we should use a new query to replace the original query term query in order to
186
     * improve performance. Also if even the term doesn't share any value with other term
187
     * we still use "OR" to replace UNION action (we only handle union operator in the query group).
188
     * 
189
     */
190
    private String printSQLStringInPathIndex()
191
    {
192
    	String sql ="";
193
    	String value ="";
194
    	boolean casesensitive = false;
195
    	StringBuffer sqlBuff = new StringBuffer();
196
    	Vector pathVector = new Vector();
197
    	int index =0;
198
    	if (queryTermsWithSameValue != null && queryTermsInPathIndex != null)
199
    	{
200
    		
201
    		sqlBuff.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
202
    		if (!queryTermsWithSameValue.isEmpty())
203
    		{
204
    			boolean firstVector = true;
205
	    		for (int j=0; j<queryTermsWithSameValue.size(); j++)
206
	    		{
207
	    	   		Vector queryTermVector = (Vector)queryTermsWithSameValue.elementAt(j);
208
		    		QueryTerm term1 = (QueryTerm)queryTermVector.elementAt(0);
209
		        	value = term1.getValue();
210
		        	casesensitive = term1.isCaseSensitive();
211
		        	boolean first = true;
212
		        	if (firstVector)
213
		        	{
214
					  firstVector = false;
215
		        	}
216
		        	else
217
		        	{
218
		        		sqlBuff.append(" "+"OR"+" ");
219
		        	}
220
		        	if (casesensitive) {
221
			        	sqlBuff.append(" (nodedata");
222
		        	} else {
223
		        		sqlBuff.append(" (UPPER(nodedata)");
224
		        	}
225
		        	sqlBuff.append(" LIKE '%");
226
		        	if (value != null && !casesensitive)
227
		        	{
228
		        	    sqlBuff.append(value.toUpperCase());
229
		        	}
230
		        	else
231
		        	{
232
		        		sqlBuff.append(value);
233
		        	}
234
					sqlBuff.append("%' AND path IN (");
235
		    		//gets every path in query term object
236
		    		for (int i=0; i<queryTermVector.size(); i++)
237
		    		{
238
		    			QueryTerm term = (QueryTerm)queryTermVector.elementAt(i);
239
		    			value = term.getValue();
240
		    			String path = term.getPathExpression();
241
		    			if (path != null && !path.equals(""))
242
		    			{
243
		    				if (first)
244
		    				{
245
		    					first = false;
246
		    					sqlBuff.append("'");
247
		    					sqlBuff.append(path);
248
		    					sqlBuff.append("'");
249
		    					
250
		    				}
251
		    				else
252
		    				{
253
		    					sqlBuff.append(",'");
254
		    					sqlBuff.append(path);
255
		    					sqlBuff.append("'");
256
		    				}
257
		    				index++;
258
		     				if (value != null && (value.equals("%") || value.equals("%%%")))
259
		                    {
260
		    				  countPercentageSearchItem++;
261
		                    }
262
	    			     }
263
	    		    }
264
	    		    sqlBuff.append("))");
265
	    	
266
	    	    }
267
	    	}
268
    		if (!queryTermsInPathIndex.isEmpty())
269
    		{
270
    			for (int j=0; j<queryTermsInPathIndex.size(); j++)
271
    			{
272
    				QueryTerm term = (QueryTerm)queryTermsInPathIndex.elementAt(j);
273
    				if (term != null)
274
    				{
275
	    				term.setInUnionGroup(true);
276
		    			 if (index > 0)
277
		    			 {
278
		    				 sqlBuff.append(" "+"OR"+" ");
279
		    			 }
280
		    			 sqlBuff.append("(");
281
	    				 sqlBuff.append(term.printSQL(true));
282
	    				 sqlBuff.append(")");
283
	    				 index++;
284
	    			}
285
    			}
286
    		}
287
    	}
288
    	if (index >0)
289
    	{
290
    		sql = sqlBuff.toString();
291
    	}
292
    	return sql;
293
    }
294

    
295
    /**
296
     * create a String description of the query that this instance represents.
297
     * This should become a way to get the XML serialization of the query.
298
     */
299
    public String toString() {
300
      StringBuffer self = new StringBuffer();
301

    
302
      self.append("  (Query group operator=" + operator + "\n");
303
      Enumeration en= getChildren();
304
      while (en.hasMoreElements()) {
305
        Object qobject = en.nextElement();
306
        self.append(qobject);
307
      }
308
      self.append("  )\n");
309
      return self.toString();
310
    }
311
    
312
    /*
313
     * When a new QueryTerm come, first we need to compare it to
314
     * the queryTerm vector, which contains queryTerm that doesn't
315
     * have same search value to any other queryTerm. Here is algorithm.
316
     * 1) If new QueryTerm find a QueryTerm in queryTerms which has same search value,
317
     *    them create a new vector which contain both QueryTerms and add the new vector
318
     *    to two-dimention vector queryTermsWithSameValue, and remove the QueryTerm which
319
     *    was in queryTerm.
320
     * 2) If new QueryTerm couldn't find a QueryTerm in queryTerms which has same search value,
321
     *    then search queryTermsWithSameValue, to see if this vector already has the search value.
322
     *    2.1) if has the search value, add the new QueryTerm to the queryTermsWithSameValue.
323
     *    2.2) if hasn't, add the new QueryTerm to queryTerms vector.
324
     */
325
    private void handleNewQueryTerms(QueryTerm newTerm)
326
    {
327
    	// currently we only handle UNION group
328
    	if (newTerm != null )
329
    	{
330
    		//System.out.println("new term is not null branch in handle new query term");
331
    		//we only handle union operator now.
332
    		if (operator != null && operator.equalsIgnoreCase(UNION) && 
333
    				MetaCatUtil.pathsForIndexing.contains(newTerm.getPathExpression()))
334
    	    {
335
    			//System.out.println("in only union branch in handle new query term");
336
	    		for (int i=0; i<queryTermsInPathIndex.size(); i++)
337
	    		{
338
	    			QueryTerm term = (QueryTerm)queryTermsInPathIndex.elementAt(i);
339
	    			if (term != null && term.hasSameSearchValue(newTerm))
340
	    			{
341
	    				//System.out.println("1Move a query term and add a new query term into search value in handle new query term");
342
	    				// find a target which has same search value
343
	    				Vector newSameValueVector = new Vector();
344
	    				newSameValueVector.add(term);
345
	    				newSameValueVector.addElement(newTerm);
346
	    				queryTermsWithSameValue.add(newSameValueVector);
347
	    				queryTermsInPathIndex.remove(i);
348
	    				return;
349
	    			}
350
	    		}
351
	    		// no same search value was found in queryTerms.
352
	    		// then we need search queryTermsWithSameValue
353
	    		for (int i=0; i<queryTermsWithSameValue.size(); i++)
354
	    		{
355
	    			Vector sameValueVec = (Vector)queryTermsWithSameValue.elementAt(i);
356
	    			// we only compare the first query term
357
	    			QueryTerm term = (QueryTerm)sameValueVec.elementAt(0);
358
	    			if (term != null && term.hasSameSearchValue(newTerm))
359
	    			{
360
	    				//System.out.println("2add a new query term into search value in handle new query term");
361
	    				sameValueVec.add(newTerm);
362
	    				return;
363
	    			}
364
	    		}
365
	    		//nothing found, but the search path is still in xml_path_index,
366
	    		// save it into queryTermsInPathIndex vector
367
	    		queryTermsInPathIndex.add(newTerm);
368
	    		return;
369
    	    }
370
    		
371
    		// add this newTerm to queryTerms since we couldn't find it in xml_path_index
372
    		queryTerms.add(newTerm);
373
    	}
374
    	
375
    }
376
  }
(55-55/69)