Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that represents a structured query, and can be
4
 *             constructed from an XML serialization conforming to
5
 *             pathquery.dtd. The printSQL() method can be used to print
6
 *             a SQL serialization of the query.
7
 *  Copyright: 2000 Regents of the University of California and the
8
 *             National Center for Ecological Analysis and Synthesis
9
 *    Authors: Matt Jones
10
 *
11
 *   '$Author: daigle $'
12
 *     '$Date: 2009-08-04 14:32:58 -0700 (Tue, 04 Aug 2009) $'
13
 * '$Revision: 5015 $'
14
 *
15
 * This program is free software; you can redistribute it and/or modify
16
 * it under the terms of the GNU General Public License as published by
17
 * the Free Software Foundation; either version 2 of the License, or
18
 * (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU General Public License
26
 * along with this program; if not, write to the Free Software
27
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28
 */
29

    
30
package edu.ucsb.nceas.metacat;
31

    
32
import edu.ucsb.nceas.dbadapter.*;
33
import edu.ucsb.nceas.metacat.shared.MetacatUtilException;
34
import edu.ucsb.nceas.metacat.util.MetacatUtil;
35
import edu.ucsb.nceas.metacat.util.SystemUtil;
36

    
37
import java.io.*;
38
import java.util.Hashtable;
39
import java.util.Stack;
40
import java.util.Vector;
41
import java.util.Enumeration;
42

    
43
import org.apache.log4j.Logger;
44

    
45
 /** a utility class that represents a group of terms in a query */
46
public  class QueryGroup {
47
    private String operator = null;  // indicates how query terms are combined
48
    private Vector children = null;  // the list of query terms and groups
49
    private int countPercentageSearchItem = 0;
50
    private Vector queryTermsWithSameValue = null;//this two dimension vectors.
51
                                                  //will hold query terms which has same search value.
52
    private Vector queryTermsInPathIndex = null; //this vector holds query terms without same value
53
                                                                                 // and search path is in path index.
54
    private Vector queryTerms = null;//this vector only holds query terms without same search value.
55
                                                             // and search path is NOT in path index.
56
    private Vector queryGroupsChildren = null;
57
    private static Logger logMetacat = Logger.getLogger(QueryGroup.class);
58
    private static String UNION = "UNION";
59

    
60
    /**
61
     * construct a new QueryGroup
62
     *
63
     * @param operator the boolean conector used to connect query terms
64
     *                    in this query group
65
     */
66
    public QueryGroup(String operator) {
67
      this.operator = operator;
68
      children = new Vector();
69
      queryTermsWithSameValue = new Vector();
70
      queryTermsInPathIndex = new Vector(); 
71
      queryTerms = new Vector();
72
      queryGroupsChildren = new Vector();
73
    }
74

    
75
    /**
76
     * Add a child QueryGroup to this QueryGroup
77
     *
78
     * @param qgroup the query group to be added to the list of terms
79
     */
80
    public void addChild(QueryGroup qgroup) {
81
      children.add((Object)qgroup);
82
      queryGroupsChildren.add(qgroup);
83
    }
84

    
85
    /**
86
     * Add a child QueryTerm to this QueryGroup
87
     *
88
     * @param qterm the query term to be added to the list of terms
89
     */
90
    public void addChild(QueryTerm qterm) {
91
      children.add((Object)qterm);
92
      handleNewQueryTerms(qterm);
93
      
94
    }
95

    
96
    /*
97
     * Retrieve an Enumeration of query terms for this QueryGroup
98
     */
99
    private Enumeration getChildren() {
100
      return children.elements();
101
    }
102

    
103
    public int getPercentageSymbolCount()
104
    {
105
      return countPercentageSearchItem;
106
    }
107

    
108
    /**
109
     * create a SQL serialization of the query that this instance represents
110
     */
111
    public String printSQL(boolean useXMLIndex) {
112
      StringBuffer self = new StringBuffer();
113
      StringBuffer queryString = new StringBuffer();
114

    
115
      boolean first = true;
116
      
117
      if (!queryTermsWithSameValue.isEmpty() || !queryTermsInPathIndex.isEmpty())
118
      {
119
    	  String pathIndexQueryString = printSQLStringInPathIndex();
120
    	  queryString.append(pathIndexQueryString);
121
    	  if (queryString != null)
122
    	  {
123
    		  first = false;
124
    	  }
125
      }
126
      
127
      for (int i=0; i<queryGroupsChildren.size(); i++)
128
      {
129
      
130
        
131
            QueryGroup qg = (QueryGroup)queryGroupsChildren.elementAt(i);
132
        	String queryGroupSQL = qg.printSQL(useXMLIndex);
133
        	logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: " 
134
        			+ queryGroupSQL);       	
135
        	if (first) {
136
        		first = false;
137
        	} else {
138
        		if(!queryString.toString().equals("") && queryGroupSQL != null &&!queryGroupSQL.equals("")){
139
                    queryString.append(" " + operator + " ");
140
        		}
141
        	}
142
   		  	queryString.append(queryGroupSQL);
143
   		  	
144
   		  	// count percerntage number
145
   		  	int count = qg.getPercentageSymbolCount();
146
   		  	countPercentageSearchItem = countPercentageSearchItem + count;
147
      }
148
      
149
      for (int i=0; i<queryTerms.size(); i++)
150
      {
151
           QueryTerm qt = (QueryTerm)queryTerms.elementAt(i);
152
           String termQueryString = qt.printSQL(useXMLIndex);
153
       	   logMetacat.info("In QueryGroup.printSQL.. found a QueryGroup: " 
154
        			+ termQueryString);
155
           if(!(qt.getSearchMode().equals("contains") && qt.getValue().equals("%"))){
156
        	   if (first) {
157
                   first = false;
158
               } else {
159
                   if(!queryString.toString().equals("")){
160
                       queryString.append(" " + operator + " ");
161
                   }
162
               }
163
               queryString.append(termQueryString);
164
           
165
           // count percerntage number
166
           int count = qt.getPercentageSymbolCount();
167
           countPercentageSearchItem = countPercentageSearchItem + count;
168
        } 
169
      }
170

    
171
      if(!queryString.toString().equals("")){
172
          self.append("(");
173
          self.append(queryString.toString());
174
          self.append(")");
175
      }
176
      
177
      logMetacat.info("In QueryGroup.printSQL.. final query returned is: " 
178
			+ self.toString());
179
      return self.toString();
180
    }
181
    
182
    
183
    
184
    
185
    /*
186
     * If every query term in a queryGroup share a search value and search path
187
     * is in xml_path_index, we should use a new query to replace the original query term query in order to
188
     * improve performance. Also if even the term doesn't share any value with other term
189
     * we still use "OR" to replace UNION action (we only handle union operator in the query group).
190
     * 
191
     */
192
    private String printSQLStringInPathIndex()
193
    {
194
    	String sql ="";
195
    	String value ="";
196
    	boolean casesensitive = false;
197
    	StringBuffer sqlBuff = new StringBuffer();
198
    	Vector pathVector = new Vector();
199
    	int index =0;
200
    	if (queryTermsWithSameValue != null && queryTermsInPathIndex != null)
201
    	{
202
    		
203
    		sqlBuff.append("SELECT DISTINCT docid FROM xml_path_index WHERE ");
204
    		if (!queryTermsWithSameValue.isEmpty())
205
    		{
206
    			boolean firstVector = true;
207
	    		for (int j=0; j<queryTermsWithSameValue.size(); j++)
208
	    		{
209
	    	   		Vector queryTermVector = (Vector)queryTermsWithSameValue.elementAt(j);
210
		    		QueryTerm term1 = (QueryTerm)queryTermVector.elementAt(0);
211
		        	value = term1.getValue();
212
		        	casesensitive = term1.isCaseSensitive();
213
		        	boolean first = true;
214
		        	if (firstVector)
215
		        	{
216
					  firstVector = false;
217
		        	}
218
		        	else
219
		        	{
220
		        		sqlBuff.append(" "+"OR"+" ");
221
		        	}
222
		        	if (casesensitive) {
223
			        	sqlBuff.append(" (nodedata");
224
		        	} else {
225
		        		sqlBuff.append(" (UPPER(nodedata)");
226
		        	}
227
		        	sqlBuff.append(" LIKE '%");
228
		        	if (value != null && !casesensitive)
229
		        	{
230
		        	    sqlBuff.append(value.toUpperCase());
231
		        	}
232
		        	else
233
		        	{
234
		        		sqlBuff.append(value);
235
		        	}
236
					sqlBuff.append("%' AND path IN (");
237
		    		//gets every path in query term object
238
		    		for (int i=0; i<queryTermVector.size(); i++)
239
		    		{
240
		    			QueryTerm term = (QueryTerm)queryTermVector.elementAt(i);
241
		    			value = term.getValue();
242
		    			String path = term.getPathExpression();
243
		    			if (path != null && !path.equals(""))
244
		    			{
245
		    				if (first)
246
		    				{
247
		    					first = false;
248
		    					sqlBuff.append("'");
249
		    					sqlBuff.append(path);
250
		    					sqlBuff.append("'");
251
		    					
252
		    				}
253
		    				else
254
		    				{
255
		    					sqlBuff.append(",'");
256
		    					sqlBuff.append(path);
257
		    					sqlBuff.append("'");
258
		    				}
259
		    				index++;
260
		     				if (value != null && (value.equals("%") || value.equals("%%%")))
261
		                    {
262
		    				  countPercentageSearchItem++;
263
		                    }
264
	    			     }
265
	    		    }
266
	    		    sqlBuff.append("))");
267
	    	
268
	    	    }
269
	    	}
270
    		if (!queryTermsInPathIndex.isEmpty())
271
    		{
272
    			for (int j=0; j<queryTermsInPathIndex.size(); j++)
273
    			{
274
    				QueryTerm term = (QueryTerm)queryTermsInPathIndex.elementAt(j);
275
    				if (term != null)
276
    				{
277
	    				term.setInUnionGroup(true);
278
		    			 if (index > 0)
279
		    			 {
280
		    				 sqlBuff.append(" "+"OR"+" ");
281
		    			 }
282
		    			 sqlBuff.append("(");
283
	    				 sqlBuff.append(term.printSQL(true));
284
	    				 sqlBuff.append(")");
285
	    				 index++;
286
	    			}
287
    			}
288
    		}
289
    	}
290
    	if (index >0)
291
    	{
292
    		sql = sqlBuff.toString();
293
    	}
294
    	return sql;
295
    }
296

    
297
    /**
298
     * create a String description of the query that this instance represents.
299
     * This should become a way to get the XML serialization of the query.
300
     */
301
    public String toString() {
302
      StringBuffer self = new StringBuffer();
303

    
304
      self.append("  (Query group operator=" + operator + "\n");
305
      Enumeration en= getChildren();
306
      while (en.hasMoreElements()) {
307
        Object qobject = en.nextElement();
308
        self.append(qobject);
309
      }
310
      self.append("  )\n");
311
      return self.toString();
312
    }
313
    
314
    /*
315
     * When a new QueryTerm come, first we need to compare it to
316
     * the queryTerm vector, which contains queryTerm that doesn't
317
     * have same search value to any other queryTerm. Here is algorithm.
318
     * 1) If new QueryTerm find a QueryTerm in queryTerms which has same search value,
319
     *    them create a new vector which contain both QueryTerms and add the new vector
320
     *    to two-dimention vector queryTermsWithSameValue, and remove the QueryTerm which
321
     *    was in queryTerm.
322
     * 2) If new QueryTerm couldn't find a QueryTerm in queryTerms which has same search value,
323
     *    then search queryTermsWithSameValue, to see if this vector already has the search value.
324
     *    2.1) if has the search value, add the new QueryTerm to the queryTermsWithSameValue.
325
     *    2.2) if hasn't, add the new QueryTerm to queryTerms vector.
326
     */
327
    private void handleNewQueryTerms(QueryTerm newTerm)
328
    {
329
    	// currently we only handle UNION group
330
    	if (newTerm != null )
331
    	{
332
    		//System.out.println("new term is not null branch in handle new query term");
333
    		//we only handle union operator now.
334
    		try {
335
    			if (operator != null
336
						&& operator.equalsIgnoreCase(UNION)
337
						&& SystemUtil.getPathsForIndexing().contains(
338
								newTerm.getPathExpression())) {
339
					// System.out.println("in only union branch in handle new
340
					// query term");
341
					for (int i = 0; i < queryTermsInPathIndex.size(); i++) {
342
						QueryTerm term = (QueryTerm) queryTermsInPathIndex.elementAt(i);
343
						if (term != null && term.hasSameSearchValue(newTerm)) {
344
							// System.out.println("1Move a query term and add a
345
							// new query term into search value in handle new
346
							// query term");
347
							// find a target which has same search value
348
							Vector newSameValueVector = new Vector();
349
							newSameValueVector.add(term);
350
							newSameValueVector.addElement(newTerm);
351
							queryTermsWithSameValue.add(newSameValueVector);
352
							queryTermsInPathIndex.remove(i);
353
							return;
354
						}
355
					}
356
					// no same search value was found in queryTerms.
357
					// then we need search queryTermsWithSameValue
358
					for (int i = 0; i < queryTermsWithSameValue.size(); i++) {
359
						Vector sameValueVec = (Vector) queryTermsWithSameValue
360
								.elementAt(i);
361
						// we only compare the first query term
362
						QueryTerm term = (QueryTerm) sameValueVec.elementAt(0);
363
						if (term != null && term.hasSameSearchValue(newTerm)) {
364
							// System.out.println("2add a new query term into
365
							// search value in handle new query term");
366
							sameValueVec.add(newTerm);
367
							return;
368
						}
369
					}
370
					// nothing found, but the search path is still in
371
					// xml_path_index,
372
					// save it into queryTermsInPathIndex vector
373
					queryTermsInPathIndex.add(newTerm);
374
					return;
375
				}    		
376
    		} catch (MetacatUtilException ue) {
377
				logMetacat.warn("Could not get index paths: " + ue.getMessage());
378
			}
379
    		
380
    		// add this newTerm to queryTerms since we couldn't find it in xml_path_index
381
    		queryTerms.add(newTerm);
382
    	}
383
    	
384
    }
385
  }
(51-51/62)