1
|
/**
|
2
|
* '$RCSfile$'
|
3
|
* Purpose: A Class that searches a relational DB for elements and
|
4
|
* attributes that have free text matches a query string,
|
5
|
* or structured query matches to a path specified node in the
|
6
|
* XML hierarchy. It returns a result set consisting of the
|
7
|
* document ID for each document that satisfies the query
|
8
|
* Copyright: 2000 Regents of the University of California and the
|
9
|
* National Center for Ecological Analysis and Synthesis
|
10
|
* Authors: Matt Jones
|
11
|
* Release: @release@
|
12
|
*
|
13
|
* '$Author: berkley $'
|
14
|
* '$Date: 2000-09-05 13:50:56 -0700 (Tue, 05 Sep 2000) $'
|
15
|
* '$Revision: 436 $'
|
16
|
*/
|
17
|
|
18
|
package edu.ucsb.nceas.metacat;
|
19
|
|
20
|
import java.io.*;
|
21
|
import java.util.Vector;
|
22
|
import java.net.URL;
|
23
|
import java.net.MalformedURLException;
|
24
|
import java.sql.*;
|
25
|
import java.util.Stack;
|
26
|
import java.util.Hashtable;
|
27
|
import java.util.Enumeration;
|
28
|
|
29
|
/**
|
30
|
* A Class that searches a relational DB for elements and
|
31
|
* attributes that have free text matches a query string,
|
32
|
* or structured query matches to a path specified node in the
|
33
|
* XML hierarchy. It returns a result set consisting of the
|
34
|
* document ID for each document that satisfies the query
|
35
|
*/
|
36
|
public class DBQuery {
|
37
|
|
38
|
private Connection conn = null;
|
39
|
private String parserName = null;
|
40
|
|
41
|
/**
|
42
|
* the main routine used to test the DBQuery utility.
|
43
|
* <p>
|
44
|
* Usage: java DBQuery <xmlfile>
|
45
|
*
|
46
|
* @param xmlfile the filename of the xml file containing the query
|
47
|
*/
|
48
|
static public void main(String[] args) {
|
49
|
|
50
|
if (args.length < 1)
|
51
|
{
|
52
|
System.err.println("Wrong number of arguments!!!");
|
53
|
System.err.println("USAGE: java DBQuery <xmlfile>");
|
54
|
return;
|
55
|
} else {
|
56
|
try {
|
57
|
|
58
|
String xmlfile = args[0];
|
59
|
|
60
|
// Open a connection to the database
|
61
|
MetaCatUtil util = new MetaCatUtil();
|
62
|
Connection dbconn = util.openDBConnection();
|
63
|
|
64
|
// Execute the query
|
65
|
DBQuery queryobj = new DBQuery(dbconn, util.getOption("saxparser"));
|
66
|
FileReader xml = new FileReader(new File(xmlfile));
|
67
|
Hashtable nodelist = null;
|
68
|
nodelist = queryobj.findDocuments(xml);
|
69
|
|
70
|
// Print the reulting document listing
|
71
|
StringBuffer result = new StringBuffer();
|
72
|
String document = null;
|
73
|
String docid = null;
|
74
|
result.append("<?xml version=\"1.0\"?>\n");
|
75
|
result.append("<resultset>\n");
|
76
|
// following line removed by Dan Higgins to avoid insertion of query XML inside returned XML doc
|
77
|
// result.append(" <query>" + xmlfile + "</query>\n");
|
78
|
Enumeration doclist = nodelist.keys();
|
79
|
while (doclist.hasMoreElements()) {
|
80
|
docid = (String)doclist.nextElement();
|
81
|
document = (String)nodelist.get(docid);
|
82
|
result.append(" <document>\n " + document +
|
83
|
"\n </document>\n");
|
84
|
}
|
85
|
result.append("</resultset>\n");
|
86
|
|
87
|
System.out.println(result);
|
88
|
|
89
|
} catch (Exception e) {
|
90
|
System.err.println("EXCEPTION HANDLING REQUIRED");
|
91
|
System.err.println(e.getMessage());
|
92
|
e.printStackTrace(System.err);
|
93
|
}
|
94
|
}
|
95
|
}
|
96
|
|
97
|
/**
|
98
|
* construct an instance of the DBQuery class
|
99
|
*
|
100
|
* <p>Generally, one would call the findDocuments() routine after creating
|
101
|
* an instance to specify the search query</p>
|
102
|
*
|
103
|
* @param conn the JDBC connection that we use for the query
|
104
|
* @param parserName the fully qualified name of a Java class implementing
|
105
|
* the org.xml.sax.XMLReader interface
|
106
|
*/
|
107
|
public DBQuery( Connection conn, String parserName )
|
108
|
throws IOException,
|
109
|
SQLException,
|
110
|
ClassNotFoundException {
|
111
|
this.conn = conn;
|
112
|
this.parserName = parserName;
|
113
|
}
|
114
|
|
115
|
/**
|
116
|
* routine to search the elements and attributes looking to match query
|
117
|
*
|
118
|
* @param xmlquery the xml serialization of the query (@see pathquery.dtd)
|
119
|
*/
|
120
|
public Hashtable findDocuments(Reader xmlquery) {
|
121
|
Hashtable docListResult = new Hashtable();
|
122
|
PreparedStatement pstmt;
|
123
|
String docid = null;
|
124
|
String docname = null;
|
125
|
String doctype = null;
|
126
|
String doctitle = null;
|
127
|
String createDate = null;
|
128
|
String updateDate = null;
|
129
|
String fieldname = null;
|
130
|
String fielddata = null;
|
131
|
StringBuffer document = null;
|
132
|
|
133
|
try {
|
134
|
// Get the XML query and covert it into a SQL statment
|
135
|
QuerySpecification qspec = new QuerySpecification(xmlquery,
|
136
|
parserName);
|
137
|
//System.out.println(qspec.printSQL());
|
138
|
pstmt = conn.prepareStatement( qspec.printSQL() );
|
139
|
|
140
|
// Execute the SQL query using the JDBC connection
|
141
|
pstmt.execute();
|
142
|
ResultSet rs = pstmt.getResultSet();
|
143
|
boolean tableHasRows = rs.next();
|
144
|
while (tableHasRows) {
|
145
|
docid = rs.getString(1);
|
146
|
docname = rs.getString(2);
|
147
|
doctype = rs.getString(3);
|
148
|
doctitle = rs.getString(4);
|
149
|
createDate = rs.getString(5);
|
150
|
updateDate = rs.getString(6);
|
151
|
|
152
|
document = new StringBuffer();
|
153
|
|
154
|
document.append("<docid>").append(docid).append("</docid>");
|
155
|
if (docname != null) {
|
156
|
document.append("<docname>" + docname + "</docname>");
|
157
|
}
|
158
|
if (doctype != null) {
|
159
|
document.append("<doctype>" + doctype + "</doctype>");
|
160
|
}
|
161
|
if (doctitle != null) {
|
162
|
document.append("<doctitle>" + doctitle + "</doctitle>");
|
163
|
}
|
164
|
if(createDate != null) {
|
165
|
document.append("<createdate>" + createDate + "</createdate>");
|
166
|
}
|
167
|
if(updateDate != null) {
|
168
|
document.append("<updatedate>" + updateDate + "</updatedate>");
|
169
|
}
|
170
|
|
171
|
// Store the document id and the root node id
|
172
|
docListResult.put(docid,(String)document.toString());
|
173
|
|
174
|
// Advance to the next record in the cursor
|
175
|
tableHasRows = rs.next();
|
176
|
}
|
177
|
|
178
|
if(qspec.containsExtendedSQL())
|
179
|
{
|
180
|
Vector extendedFields = new Vector(qspec.getReturnFieldList());
|
181
|
Vector results = new Vector();
|
182
|
pstmt = conn.prepareStatement(qspec.printExtendedSQL());
|
183
|
pstmt.execute();
|
184
|
rs = pstmt.getResultSet();
|
185
|
tableHasRows = rs.next();
|
186
|
while(tableHasRows)
|
187
|
{
|
188
|
docid = rs.getString(1);
|
189
|
fieldname = rs.getString(2);
|
190
|
fielddata = rs.getString(3);
|
191
|
|
192
|
document = new StringBuffer();
|
193
|
|
194
|
document.append("<param name=\"");
|
195
|
document.append(fieldname);
|
196
|
document.append("\">");
|
197
|
document.append(fielddata);
|
198
|
document.append("</param>");
|
199
|
|
200
|
tableHasRows = rs.next();
|
201
|
if(docListResult.containsKey(docid))
|
202
|
{
|
203
|
String removedelement = (String)docListResult.remove(docid);
|
204
|
docListResult.put(docid, removedelement + document.toString());
|
205
|
}
|
206
|
else
|
207
|
{
|
208
|
docListResult.put(docid, document.toString());
|
209
|
}
|
210
|
}
|
211
|
}
|
212
|
pstmt.close();
|
213
|
} catch (SQLException e) {
|
214
|
System.err.println("Error getting id: " + e.getMessage());
|
215
|
} catch (IOException ioe) {
|
216
|
System.err.println("Error printing qspec:");
|
217
|
System.err.println(ioe.getMessage());
|
218
|
}
|
219
|
//System.out.println("docListResult: ");
|
220
|
//System.out.println(docListResult.toString());
|
221
|
return docListResult;
|
222
|
}
|
223
|
|
224
|
/**
|
225
|
* returns a string array of the contents of a particular node.
|
226
|
* If the node appears more than once, the contents are returned
|
227
|
* in the order in which they appearred in the document.
|
228
|
* @param nodename the name or path of the particular node.
|
229
|
* @param docid the docid of the document you want the node from.
|
230
|
* @param conn a database connection-this allows this method to be static
|
231
|
*/
|
232
|
public static Object[] getNodeContent(String nodename, String docid,
|
233
|
Connection conn)
|
234
|
{
|
235
|
StringBuffer query = new StringBuffer();
|
236
|
Vector result = new Vector();
|
237
|
PreparedStatement pstmt;
|
238
|
query.append("select nodedata from xml_nodes where parentnodeid in ");
|
239
|
query.append("(select nodeid from xml_index where path like '");
|
240
|
query.append(nodename);
|
241
|
query.append("' and docid like '").append(docid).append("')");
|
242
|
try
|
243
|
{
|
244
|
pstmt = conn.prepareStatement(query.toString());
|
245
|
|
246
|
// Execute the SQL query using the JDBC connection
|
247
|
pstmt.execute();
|
248
|
ResultSet rs = pstmt.getResultSet();
|
249
|
boolean tableHasRows = rs.next();
|
250
|
while (tableHasRows)
|
251
|
{
|
252
|
result.add(rs.getString(1));
|
253
|
System.out.println(rs.getString(1));
|
254
|
tableHasRows = rs.next();
|
255
|
}
|
256
|
}
|
257
|
catch (SQLException e)
|
258
|
{
|
259
|
System.err.println("Error getting id: " + e.getMessage());
|
260
|
}
|
261
|
|
262
|
return result.toArray();
|
263
|
}
|
264
|
|
265
|
/**
|
266
|
* format a structured query as an XML document that conforms
|
267
|
* to the pathquery.dtd and is appropriate for submission to the DBQuery
|
268
|
* structured query engine
|
269
|
*
|
270
|
* @param params The list of parameters that should be included in the query
|
271
|
*/
|
272
|
public static String createSQuery(Hashtable params)
|
273
|
{
|
274
|
StringBuffer query = new StringBuffer();
|
275
|
Enumeration elements;
|
276
|
Enumeration keys;
|
277
|
String doctype = null;
|
278
|
String casesensitive = null;
|
279
|
String searchmode = null;
|
280
|
Object nextkey;
|
281
|
Object nextelement;
|
282
|
//add the xml headers
|
283
|
query.append("<?xml version=\"1.0\"?>\n");
|
284
|
query.append("<pathquery version=\"1.0\"><meta_file_id>");
|
285
|
|
286
|
if(params.containsKey("meta_file_id"))
|
287
|
{
|
288
|
query.append( ((String[])params.get("meta_file_id"))[0]);
|
289
|
query.append("</meta_file_id>");
|
290
|
}
|
291
|
else
|
292
|
{
|
293
|
query.append("unspecified</meta_file_id>");
|
294
|
}
|
295
|
|
296
|
query.append("<querytitle>");
|
297
|
if(params.containsKey("querytitle"))
|
298
|
{
|
299
|
query.append(((String[])params.get("querytitle"))[0]);
|
300
|
query.append("</querytitle>");
|
301
|
}
|
302
|
else
|
303
|
{
|
304
|
query.append("unspecified</querytitle>");
|
305
|
}
|
306
|
|
307
|
if(params.containsKey("doctype"))
|
308
|
{
|
309
|
doctype = ((String[])params.get("doctype"))[0];
|
310
|
}
|
311
|
else
|
312
|
{
|
313
|
doctype = "ANY";
|
314
|
}
|
315
|
|
316
|
if(params.containsKey("returnfield"))
|
317
|
{
|
318
|
String[] returnfield = ((String[])params.get("returnfield"));
|
319
|
for(int i=0; i<returnfield.length; i++)
|
320
|
{
|
321
|
query.append("<returnfield>").append(returnfield[i]);
|
322
|
query.append("</returnfield>");
|
323
|
}
|
324
|
}
|
325
|
|
326
|
//if you don't limit the query by doctype, then it just creates
|
327
|
//an empty returndoctype tag.
|
328
|
if (!doctype.equals("any") &&
|
329
|
!doctype.equals("ANY") &&
|
330
|
!doctype.equals("") )
|
331
|
{
|
332
|
query.append("<returndoctype>");
|
333
|
query.append(doctype).append("</returndoctype>");
|
334
|
}
|
335
|
else
|
336
|
{
|
337
|
query.append("<returndoctype></returndoctype>");
|
338
|
}
|
339
|
|
340
|
//allows the dynamic switching of boolean operators
|
341
|
if(params.containsKey("operator"))
|
342
|
{
|
343
|
query.append("<querygroup operator=\"" +
|
344
|
((String[])params.get("operator"))[0] + "\">");
|
345
|
}
|
346
|
else
|
347
|
{ //the default operator is UNION
|
348
|
query.append("<querygroup operator=\"UNION\">");
|
349
|
}
|
350
|
|
351
|
if(params.containsKey("casesensitive"))
|
352
|
{
|
353
|
casesensitive = ((String[])params.get("casesensitive"))[0];
|
354
|
}
|
355
|
else
|
356
|
{
|
357
|
casesensitive = "false";
|
358
|
}
|
359
|
|
360
|
if(params.containsKey("searchmode"))
|
361
|
{
|
362
|
searchmode = ((String[])params.get("searchmode"))[0];
|
363
|
}
|
364
|
else
|
365
|
{
|
366
|
searchmode = "contains";
|
367
|
}
|
368
|
|
369
|
//anyfield is a special case because it does a
|
370
|
//free text search. It does not have a <pathexpr>
|
371
|
//tag. This allows for a free text search within the structured
|
372
|
//query. This is useful if the INTERSECT operator is used.
|
373
|
if(params.containsKey("anyfield"))
|
374
|
{
|
375
|
String[] anyfield = ((String[])params.get("anyfield"));
|
376
|
//allow for more than one value for anyfield
|
377
|
for(int i=0; i<anyfield.length; i++)
|
378
|
{
|
379
|
if(!anyfield[i].equals(""))
|
380
|
{
|
381
|
query.append("<queryterm casesensitive=\"" + casesensitive +
|
382
|
"\" " + "searchmode=\"" + searchmode + "\"><value>" +
|
383
|
anyfield[i] +
|
384
|
"</value></queryterm>");
|
385
|
}
|
386
|
}
|
387
|
}
|
388
|
|
389
|
//this while loop finds the rest of the parameters
|
390
|
//and attempts to query for the field specified
|
391
|
//by the parameter.
|
392
|
elements = params.elements();
|
393
|
keys = params.keys();
|
394
|
while(keys.hasMoreElements() && elements.hasMoreElements())
|
395
|
{
|
396
|
nextkey = keys.nextElement();
|
397
|
nextelement = elements.nextElement();
|
398
|
|
399
|
//make sure we aren't querying for any of these
|
400
|
//parameters since the are already in the query
|
401
|
//in one form or another.
|
402
|
if(!nextkey.toString().equals("doctype") &&
|
403
|
!nextkey.toString().equals("action") &&
|
404
|
!nextkey.toString().equals("qformat") &&
|
405
|
!nextkey.toString().equals("anyfield") &&
|
406
|
!nextkey.toString().equals("returnfield") &&
|
407
|
!nextkey.toString().equals("operator") )
|
408
|
{
|
409
|
//allow for more than value per field name
|
410
|
for(int i=0; i<((String[])nextelement).length; i++)
|
411
|
{
|
412
|
if(!((String[])nextelement)[i].equals(""))
|
413
|
{
|
414
|
query.append("<queryterm casesensitive=\"" + casesensitive +"\" " +
|
415
|
"searchmode=\"" + searchmode + "\">" +
|
416
|
"<value>" +
|
417
|
//add the query value
|
418
|
((String[])nextelement)[i] +
|
419
|
"</value><pathexpr>" +
|
420
|
//add the path to query by
|
421
|
nextkey.toString() +
|
422
|
"</pathexpr></queryterm>");
|
423
|
}
|
424
|
}
|
425
|
}
|
426
|
}
|
427
|
query.append("</querygroup></pathquery>");
|
428
|
//append on the end of the xml and return the result as a string
|
429
|
return query.toString();
|
430
|
}
|
431
|
|
432
|
/**
|
433
|
* format a simple free-text value query as an XML document that conforms
|
434
|
* to the pathquery.dtd and is appropriate for submission to the DBQuery
|
435
|
* structured query engine
|
436
|
*
|
437
|
* @param value the text string to search for in the xml catalog
|
438
|
* @param doctype the type of documents to include in the result set -- use
|
439
|
* "any" or "ANY" for unfiltered result sets
|
440
|
*/
|
441
|
public static String createQuery(String value, String doctype) {
|
442
|
StringBuffer xmlquery = new StringBuffer();
|
443
|
xmlquery.append("<?xml version=\"1.0\"?>\n");
|
444
|
xmlquery.append("<pathquery version=\"1.0\">");
|
445
|
xmlquery.append("<meta_file_id>Unspecified</meta_file_id>");
|
446
|
xmlquery.append("<querytitle>Unspecified</querytitle>");
|
447
|
|
448
|
if (!doctype.equals("any") && !doctype.equals("ANY")) {
|
449
|
xmlquery.append("<returndoctype>");
|
450
|
xmlquery.append(doctype).append("</returndoctype>");
|
451
|
}
|
452
|
|
453
|
xmlquery.append("<querygroup operator=\"UNION\">");
|
454
|
//chad added - 8/14
|
455
|
//the if statement allows a query to gracefully handle a null
|
456
|
//query. Without this if a nullpointerException is thrown.
|
457
|
if(!value.equals(""))
|
458
|
{
|
459
|
xmlquery.append("<queryterm casesensitive=\"false\" ");
|
460
|
xmlquery.append("searchmode=\"contains\">");
|
461
|
xmlquery.append("<value>").append(value).append("</value>");
|
462
|
xmlquery.append("</queryterm>");
|
463
|
}
|
464
|
xmlquery.append("</querygroup>");
|
465
|
xmlquery.append("</pathquery>");
|
466
|
|
467
|
|
468
|
return (xmlquery.toString());
|
469
|
}
|
470
|
|
471
|
/**
|
472
|
* format a simple free-text value query as an XML document that conforms
|
473
|
* to the pathquery.dtd and is appropriate for submission to the DBQuery
|
474
|
* structured query engine
|
475
|
*
|
476
|
* @param value the text string to search for in the xml catalog
|
477
|
*/
|
478
|
public static String createQuery(String value) {
|
479
|
return createQuery(value, "any");
|
480
|
}
|
481
|
}
|
482
|
|
483
|
/**
|
484
|
* '$Log$
|
485
|
* 'Revision 1.17 2000/08/31 21:20:39 berkley
|
486
|
* 'changed xslf for new returnfield scheme. the returnfields are now returned as <param name="<returnfield>"> tags.
|
487
|
* 'hThe sql for the returnfield query was redone to fix a previous problem with slow queries
|
488
|
* '
|
489
|
* 'Revision 1.16 2000/08/23 22:55:25 berkley
|
490
|
* 'changed the field names to be case-sensitive in the returnfields
|
491
|
* '
|
492
|
* 'Revision 1.15 2000/08/23 17:22:07 berkley
|
493
|
* 'added support for the returnfield parameter
|
494
|
* '-added the dynamic parameters to the returned hash table of documents
|
495
|
* '
|
496
|
* 'Revision 1.14 2000/08/17 16:02:34 berkley
|
497
|
* 'Made changes to createSQuery to allow for multiple parameters of the same name. Also changed the param list to include only "Hashtable params" without a "String doctype" since the doctype is already contained in the params.
|
498
|
* '
|
499
|
* 'Revision 1.13 2000/08/14 21:26:12 berkley
|
500
|
* 'Added createSQuery() to handle structured queries of an arbitrary number of parameters. Also modified createQuery() to handle a null query in a graceful manner.
|
501
|
* '
|
502
|
* 'Revision 1.12 2000/08/14 20:53:33 jones
|
503
|
* 'Added "release" keyword to all metacat source files so that the release
|
504
|
* 'number will be evident in software distributions.
|
505
|
* '
|
506
|
* 'Revision 1.11 2000/08/11 18:26:07 berkley
|
507
|
* 'added createSQuery
|
508
|
* '
|
509
|
* 'Revision 1.10 2000/07/26 20:40:41 higgins
|
510
|
* 'no message
|
511
|
* '
|
512
|
* 'Revision 1.9 2000/06/26 10:35:04 jones
|
513
|
* 'Merged in substantial changes to DBWriter and associated classes and to
|
514
|
* 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
|
515
|
* 'functions. The command line tools and the parameters for the
|
516
|
* 'servlet have changed substantially.
|
517
|
* '
|
518
|
* 'Revision 1.8.2.2 2000/06/25 23:38:16 jones
|
519
|
* 'Added RCSfile keyword
|
520
|
* '
|
521
|
* 'Revision 1.8.2.1 2000/06/25 23:34:17 jones
|
522
|
* 'Changed documentation formatting, added log entries at bottom of source files
|
523
|
* ''
|
524
|
*/
|