1 |
17
|
jones
|
/**
|
2 |
203
|
jones
|
* '$RCSfile$'
|
3 |
|
|
* Purpose: A Class that handles the SAX XML events as they
|
4 |
|
|
* are generated from XML documents
|
5 |
|
|
* Copyright: 2000 Regents of the University of California and the
|
6 |
|
|
* National Center for Ecological Analysis and Synthesis
|
7 |
|
|
* Authors: Matt Jones, Jivka Bojilova
|
8 |
349
|
jones
|
* Release: @release@
|
9 |
17
|
jones
|
*
|
10 |
203
|
jones
|
* '$Author$'
|
11 |
|
|
* '$Date$'
|
12 |
|
|
* '$Revision$'
|
13 |
17
|
jones
|
*/
|
14 |
|
|
|
15 |
75
|
jones
|
package edu.ucsb.nceas.metacat;
|
16 |
51
|
jones
|
|
17 |
17
|
jones
|
import java.sql.*;
|
18 |
|
|
import java.util.Stack;
|
19 |
18
|
jones
|
import java.util.EmptyStackException;
|
20 |
17
|
jones
|
|
21 |
185
|
jones
|
import org.xml.sax.Attributes;
|
22 |
|
|
import org.xml.sax.SAXException;
|
23 |
204
|
jones
|
import org.xml.sax.SAXParseException;
|
24 |
186
|
jones
|
import org.xml.sax.ext.DeclHandler;
|
25 |
185
|
jones
|
import org.xml.sax.ext.LexicalHandler;
|
26 |
|
|
import org.xml.sax.helpers.DefaultHandler;
|
27 |
17
|
jones
|
|
28 |
31
|
jones
|
/**
|
29 |
|
|
* A database aware Class implementing callback bethods for the SAX parser to
|
30 |
|
|
* call when processing the XML stream and generating events
|
31 |
|
|
*/
|
32 |
186
|
jones
|
public class DBSAXHandler extends DefaultHandler
|
33 |
|
|
implements LexicalHandler, DeclHandler {
|
34 |
17
|
jones
|
|
35 |
204
|
jones
|
private boolean atFirstElement;
|
36 |
243
|
jones
|
private boolean processingDTD;
|
37 |
204
|
jones
|
private String docname = null;
|
38 |
135
|
jones
|
private String doctype;
|
39 |
|
|
private String systemid;
|
40 |
17
|
jones
|
private boolean stackCreated = false;
|
41 |
137
|
jones
|
private Stack nodeStack;
|
42 |
17
|
jones
|
private Connection conn = null;
|
43 |
142
|
jones
|
private DBSAXDocument currentDocument;
|
44 |
185
|
jones
|
private DBSAXNode rootNode;
|
45 |
203
|
jones
|
private String action = null;
|
46 |
|
|
private String docid = null;
|
47 |
17
|
jones
|
|
48 |
220
|
jones
|
private static final int MAXDATACHARS = 4000;
|
49 |
|
|
|
50 |
31
|
jones
|
/** Construct an instance of the handler class
|
51 |
|
|
*
|
52 |
|
|
* @param conn the JDBC connection to which information is written
|
53 |
|
|
*/
|
54 |
122
|
jones
|
public DBSAXHandler(Connection conn) {
|
55 |
185
|
jones
|
this.conn = conn;
|
56 |
204
|
jones
|
this.atFirstElement = true;
|
57 |
243
|
jones
|
this.processingDTD = false;
|
58 |
17
|
jones
|
|
59 |
185
|
jones
|
// Create the stack for keeping track of node context
|
60 |
|
|
// if it doesn't already exist
|
61 |
|
|
if (!stackCreated) {
|
62 |
|
|
nodeStack = new Stack();
|
63 |
|
|
stackCreated = true;
|
64 |
|
|
}
|
65 |
17
|
jones
|
}
|
66 |
|
|
|
67 |
203
|
jones
|
/** Construct an instance of the handler class
|
68 |
|
|
*
|
69 |
|
|
* @param conn the JDBC connection to which information is written
|
70 |
|
|
*/
|
71 |
|
|
public DBSAXHandler(Connection conn, String action, String docid) {
|
72 |
|
|
this(conn);
|
73 |
|
|
this.action = action;
|
74 |
|
|
this.docid = docid;
|
75 |
|
|
}
|
76 |
|
|
|
77 |
72
|
bojilova
|
/** SAX Handler that receives notification of beginning of the document */
|
78 |
122
|
jones
|
public void startDocument() throws SAXException {
|
79 |
203
|
jones
|
MetaCatUtil.debugMessage("start Document");
|
80 |
|
|
|
81 |
185
|
jones
|
// Create the document node representation as root
|
82 |
|
|
rootNode = new DBSAXNode(conn, docname);
|
83 |
204
|
jones
|
MetaCatUtil.debugMessage("PRINTING DOCNAME FROM ROOTNODE: " +
|
84 |
|
|
rootNode.getTagName());
|
85 |
185
|
jones
|
// Add the node to the stack, so that any text data can be
|
86 |
|
|
// added as it is encountered
|
87 |
|
|
nodeStack.push(rootNode);
|
88 |
72
|
bojilova
|
}
|
89 |
|
|
|
90 |
|
|
/** SAX Handler that receives notification of end of the document */
|
91 |
122
|
jones
|
public void endDocument() throws SAXException {
|
92 |
185
|
jones
|
currentDocument.setTitleFromChildElement();
|
93 |
203
|
jones
|
MetaCatUtil.debugMessage("end Document");
|
94 |
|
|
if ((docid != null) && (!docid.equals(currentDocument.getDocID()))) {
|
95 |
|
|
throw (new SAXException("New document ID generated:",
|
96 |
204
|
jones
|
new AccessionNumberGeneratedException(currentDocument.getDocID())));
|
97 |
203
|
jones
|
} else {
|
98 |
|
|
throw (new SAXException("New document ID generated:",
|
99 |
204
|
jones
|
new AccessionNumberGeneratedException(currentDocument.getDocID())));
|
100 |
122
|
jones
|
}
|
101 |
72
|
bojilova
|
}
|
102 |
|
|
|
103 |
185
|
jones
|
/** SAX Handler that is called at the start of each XML element */
|
104 |
|
|
public void startElement(String uri, String localName,
|
105 |
|
|
String qName, Attributes atts)
|
106 |
|
|
throws SAXException {
|
107 |
203
|
jones
|
MetaCatUtil.debugMessage("Start ELEMENT " + localName);
|
108 |
72
|
bojilova
|
|
109 |
203
|
jones
|
DBSAXNode parentNode = null;
|
110 |
|
|
DBSAXNode currentNode = null;
|
111 |
17
|
jones
|
|
112 |
203
|
jones
|
// Get a reference to the parent node for the id
|
113 |
|
|
try {
|
114 |
|
|
parentNode = (DBSAXNode)nodeStack.peek();
|
115 |
|
|
} catch (EmptyStackException e) {
|
116 |
|
|
}
|
117 |
18
|
jones
|
|
118 |
203
|
jones
|
// Document representation that points to the root document node
|
119 |
204
|
jones
|
if (atFirstElement) {
|
120 |
|
|
atFirstElement = false;
|
121 |
203
|
jones
|
// If no DOCTYPE declaration: docname = root element name
|
122 |
|
|
if (docname == null) {
|
123 |
|
|
docname = localName;
|
124 |
204
|
jones
|
doctype = docname;
|
125 |
|
|
MetaCatUtil.debugMessage("DOCNAME-a: " + docname);
|
126 |
|
|
MetaCatUtil.debugMessage("DOCTYPE-a: " + doctype);
|
127 |
203
|
jones
|
} else if (doctype == null) {
|
128 |
204
|
jones
|
doctype = docname;
|
129 |
|
|
//doctype = DBEntityResolver.doctype;
|
130 |
|
|
MetaCatUtil.debugMessage("DOCTYPE-b: " + doctype);
|
131 |
203
|
jones
|
}
|
132 |
|
|
rootNode.writeNodename(docname);
|
133 |
|
|
rootNode.writeRootNodeID(rootNode.getNodeID());
|
134 |
|
|
try {
|
135 |
|
|
currentDocument = new DBSAXDocument(conn, rootNode.getNodeID(),
|
136 |
|
|
docname, doctype, docid, action);
|
137 |
|
|
} catch (AccessionNumberException ane) {
|
138 |
204
|
jones
|
throw (new SAXException("Error with " + action, ane));
|
139 |
203
|
jones
|
}
|
140 |
|
|
rootNode.writeDocID(currentDocument.getDocID());
|
141 |
|
|
}
|
142 |
135
|
jones
|
|
143 |
203
|
jones
|
// Create the current node representation
|
144 |
313
|
bojilova
|
currentNode = new DBSAXNode(conn, localName, parentNode, currentDocument);
|
145 |
17
|
jones
|
|
146 |
203
|
jones
|
// Add all of the attributes
|
147 |
|
|
for (int i=0; i<atts.getLength(); i++) {
|
148 |
|
|
currentNode.setAttribute(atts.getLocalName(i), atts.getValue(i));
|
149 |
|
|
}
|
150 |
17
|
jones
|
|
151 |
203
|
jones
|
// Add the node to the stack, so that any text data can be
|
152 |
|
|
// added as it is encountered
|
153 |
|
|
nodeStack.push(currentNode);
|
154 |
|
|
}
|
155 |
17
|
jones
|
|
156 |
31
|
jones
|
/** SAX Handler that is called for each XML text node */
|
157 |
122
|
jones
|
public void characters(char[] cbuf, int start, int len) {
|
158 |
203
|
jones
|
MetaCatUtil.debugMessage("CHARACTERS");
|
159 |
186
|
jones
|
DBSAXNode currentNode = (DBSAXNode)nodeStack.peek();
|
160 |
220
|
jones
|
String data = null;
|
161 |
|
|
int leftover = len;
|
162 |
|
|
int offset = start;
|
163 |
|
|
boolean moredata = true;
|
164 |
|
|
|
165 |
|
|
// This loop deals with the case where there are more characters
|
166 |
|
|
// than can fit in a single database text field (limit is
|
167 |
|
|
// MAXDATACHARS). If the text to be inserted exceeds MAXDATACHARS,
|
168 |
|
|
// write a series of nodes that are MAXDATACHARS long, and then the
|
169 |
|
|
// final node contains the remainder
|
170 |
|
|
while (moredata) {
|
171 |
|
|
if (leftover > MAXDATACHARS) {
|
172 |
|
|
data = new String(cbuf, offset, MAXDATACHARS);
|
173 |
|
|
leftover -= MAXDATACHARS;
|
174 |
|
|
offset += MAXDATACHARS;
|
175 |
|
|
} else {
|
176 |
|
|
data = new String(cbuf, offset, leftover);
|
177 |
|
|
moredata = false;
|
178 |
|
|
}
|
179 |
122
|
jones
|
|
180 |
220
|
jones
|
// Write the content of the node to the database
|
181 |
|
|
currentNode.writeChildNodeToDB("TEXT", null, data);
|
182 |
|
|
}
|
183 |
17
|
jones
|
}
|
184 |
|
|
|
185 |
31
|
jones
|
/**
|
186 |
|
|
* SAX Handler that is called for each XML text node that is Ignorable
|
187 |
|
|
* white space
|
188 |
|
|
*/
|
189 |
122
|
jones
|
public void ignorableWhitespace(char[] cbuf, int start, int len) {
|
190 |
203
|
jones
|
MetaCatUtil.debugMessage("IGNORABLEWHITESPACE");
|
191 |
17
|
jones
|
}
|
192 |
|
|
|
193 |
122
|
jones
|
/**
|
194 |
|
|
* SAX Handler called once for each processing instruction found:
|
195 |
|
|
* node that PI may occur before or after the root element.
|
196 |
|
|
*/
|
197 |
|
|
public void processingInstruction(String target, String data)
|
198 |
|
|
throws SAXException {
|
199 |
203
|
jones
|
MetaCatUtil.debugMessage("PI");
|
200 |
186
|
jones
|
DBSAXNode currentNode = (DBSAXNode)nodeStack.peek();
|
201 |
|
|
currentNode.writeChildNodeToDB("PI", target, data);
|
202 |
92
|
bojilova
|
}
|
203 |
72
|
bojilova
|
|
204 |
31
|
jones
|
/** SAX Handler that is called at the end of each XML element */
|
205 |
185
|
jones
|
public void endElement(String uri, String localName,
|
206 |
|
|
String qName) throws SAXException {
|
207 |
203
|
jones
|
MetaCatUtil.debugMessage("End ELEMENT " + localName);
|
208 |
17
|
jones
|
|
209 |
185
|
jones
|
// Get the node from the stack
|
210 |
|
|
DBSAXNode currentNode = (DBSAXNode)nodeStack.pop();
|
211 |
17
|
jones
|
}
|
212 |
|
|
|
213 |
185
|
jones
|
//
|
214 |
|
|
// the next section implements the LexicalHandler interface
|
215 |
|
|
//
|
216 |
|
|
|
217 |
|
|
/** SAX Handler that receives notification of DOCTYPE. Sets the DTD */
|
218 |
|
|
public void startDTD(String name, String publicId, String systemId)
|
219 |
|
|
throws SAXException {
|
220 |
|
|
docname = name;
|
221 |
|
|
doctype = publicId;
|
222 |
|
|
systemid = systemId;
|
223 |
|
|
|
224 |
204
|
jones
|
MetaCatUtil.debugMessage("Start DTD");
|
225 |
203
|
jones
|
MetaCatUtil.debugMessage("DOCNAME: " + docname);
|
226 |
|
|
MetaCatUtil.debugMessage("DOCTYPE: " + doctype);
|
227 |
|
|
MetaCatUtil.debugMessage(" SYSID: " + systemid);
|
228 |
185
|
jones
|
}
|
229 |
|
|
|
230 |
|
|
/**
|
231 |
|
|
* SAX Handler that receives notification of end of DTD
|
232 |
|
|
*/
|
233 |
|
|
public void endDTD() throws SAXException {
|
234 |
204
|
jones
|
MetaCatUtil.debugMessage("end DTD");
|
235 |
185
|
jones
|
}
|
236 |
|
|
|
237 |
|
|
/**
|
238 |
|
|
* SAX Handler that receives notification of comments in the DTD
|
239 |
|
|
*/
|
240 |
|
|
public void comment(char[] ch, int start, int length) throws SAXException {
|
241 |
203
|
jones
|
MetaCatUtil.debugMessage("COMMENT");
|
242 |
186
|
jones
|
DBSAXNode currentNode = (DBSAXNode)nodeStack.peek();
|
243 |
|
|
currentNode.writeChildNodeToDB("COMMENT", null, new String(ch));
|
244 |
185
|
jones
|
}
|
245 |
|
|
|
246 |
|
|
/**
|
247 |
|
|
* SAX Handler that receives notification of the start of CDATA sections
|
248 |
|
|
*/
|
249 |
|
|
public void startCDATA() throws SAXException {
|
250 |
203
|
jones
|
MetaCatUtil.debugMessage("start CDATA");
|
251 |
185
|
jones
|
}
|
252 |
|
|
|
253 |
|
|
/**
|
254 |
|
|
* SAX Handler that receives notification of the end of CDATA sections
|
255 |
|
|
*/
|
256 |
|
|
public void endCDATA() throws SAXException {
|
257 |
203
|
jones
|
MetaCatUtil.debugMessage("end CDATA");
|
258 |
185
|
jones
|
}
|
259 |
|
|
|
260 |
|
|
/**
|
261 |
|
|
* SAX Handler that receives notification of the start of entities
|
262 |
|
|
*/
|
263 |
|
|
public void startEntity(String name) throws SAXException {
|
264 |
243
|
jones
|
MetaCatUtil.debugMessage("start ENTITY: " + name);
|
265 |
|
|
if (name.equals("[dtd]")) {
|
266 |
|
|
processingDTD = true;
|
267 |
|
|
}
|
268 |
185
|
jones
|
}
|
269 |
|
|
|
270 |
|
|
/**
|
271 |
|
|
* SAX Handler that receives notification of the end of entities
|
272 |
|
|
*/
|
273 |
|
|
public void endEntity(String name) throws SAXException {
|
274 |
243
|
jones
|
MetaCatUtil.debugMessage("end ENTITY: " + name);
|
275 |
|
|
if (name.equals("[dtd]")) {
|
276 |
|
|
processingDTD = false;
|
277 |
|
|
}
|
278 |
185
|
jones
|
}
|
279 |
186
|
jones
|
|
280 |
|
|
/**
|
281 |
|
|
* SAX Handler that receives notification of element declarations
|
282 |
|
|
*/
|
283 |
|
|
public void elementDecl(String name, String model)
|
284 |
|
|
throws org.xml.sax.SAXException {
|
285 |
243
|
jones
|
MetaCatUtil.debugMessage("ELEMENTDECL: " + name + " " + model);
|
286 |
186
|
jones
|
}
|
287 |
|
|
|
288 |
|
|
/**
|
289 |
|
|
* SAX Handler that receives notification of attribute declarations
|
290 |
|
|
*/
|
291 |
|
|
public void attributeDecl(String eName, String aName,
|
292 |
|
|
String type, String valueDefault, String value)
|
293 |
|
|
throws org.xml.sax.SAXException {
|
294 |
243
|
jones
|
MetaCatUtil.debugMessage("ATTRIBUTEDECL: " + eName + " "
|
295 |
|
|
+ aName + " " + type + " " + valueDefault + " "
|
296 |
|
|
+ value);
|
297 |
186
|
jones
|
}
|
298 |
|
|
|
299 |
|
|
/**
|
300 |
|
|
* SAX Handler that receives notification of internal entity declarations
|
301 |
|
|
*/
|
302 |
|
|
public void internalEntityDecl(String name, String value)
|
303 |
|
|
throws org.xml.sax.SAXException {
|
304 |
243
|
jones
|
MetaCatUtil.debugMessage("INTERNENTITYDECL: " + name + " " + value);
|
305 |
186
|
jones
|
}
|
306 |
|
|
|
307 |
|
|
/**
|
308 |
|
|
* SAX Handler that receives notification of external entity declarations
|
309 |
|
|
*/
|
310 |
|
|
public void externalEntityDecl(String name, String publicId,
|
311 |
|
|
String systemId)
|
312 |
|
|
throws org.xml.sax.SAXException {
|
313 |
243
|
jones
|
MetaCatUtil.debugMessage("EXTERNENTITYDECL: " + name + " " + publicId
|
314 |
|
|
+ " " + systemId);
|
315 |
186
|
jones
|
}
|
316 |
|
|
|
317 |
204
|
jones
|
//
|
318 |
|
|
// the next section implements the ErrorHandler interface
|
319 |
|
|
//
|
320 |
186
|
jones
|
|
321 |
204
|
jones
|
/**
|
322 |
|
|
* SAX Handler that receives notification of fatal parsing errors
|
323 |
|
|
*/
|
324 |
|
|
public void fatalError(SAXParseException exception) throws SAXException {
|
325 |
|
|
MetaCatUtil.debugMessage("FATALERROR");
|
326 |
|
|
throw (new SAXException("Fatal processing error.", exception));
|
327 |
|
|
}
|
328 |
|
|
|
329 |
|
|
/**
|
330 |
|
|
* SAX Handler that receives notification of recoverable parsing errors
|
331 |
|
|
*/
|
332 |
|
|
public void error(SAXParseException exception) throws SAXException {
|
333 |
|
|
MetaCatUtil.debugMessage("ERROR");
|
334 |
|
|
}
|
335 |
|
|
|
336 |
|
|
/**
|
337 |
|
|
* SAX Handler that receives notification of warnings
|
338 |
|
|
*/
|
339 |
|
|
public void warning(SAXParseException exception) throws SAXException {
|
340 |
|
|
MetaCatUtil.debugMessage("FATALERROR");
|
341 |
|
|
}
|
342 |
|
|
|
343 |
|
|
//
|
344 |
|
|
// Helper, getter and setter methods
|
345 |
|
|
//
|
346 |
|
|
|
347 |
|
|
/**
|
348 |
|
|
* get the document name
|
349 |
|
|
*/
|
350 |
|
|
public String getDocname() {
|
351 |
|
|
return docname;
|
352 |
|
|
}
|
353 |
|
|
|
354 |
|
|
/**
|
355 |
|
|
* get the document processing state
|
356 |
|
|
*/
|
357 |
243
|
jones
|
public boolean processingDTD() {
|
358 |
|
|
return processingDTD;
|
359 |
204
|
jones
|
}
|
360 |
17
|
jones
|
}
|
361 |
203
|
jones
|
|
362 |
|
|
/**
|
363 |
|
|
* '$Log$
|
364 |
349
|
jones
|
* 'Revision 1.32 2000/08/03 23:17:03 bojilova
|
365 |
|
|
* 'Call to DBSAXNode constructor simplified
|
366 |
|
|
* '
|
367 |
313
|
bojilova
|
* 'Revision 1.31 2000/06/29 23:27:08 jones
|
368 |
|
|
* 'Fixed bug in DBEntityResolver so that it now properly delegates to
|
369 |
|
|
* 'the system id found inthe database.
|
370 |
|
|
* 'Changed DBValidate to use DBEntityResolver, rather than the OASIS
|
371 |
|
|
* 'catalog, and to return validation results in XML format.
|
372 |
|
|
* '
|
373 |
243
|
jones
|
* 'Revision 1.30 2000/06/28 03:14:35 jones
|
374 |
|
|
* 'Fixed bug where TEXT nodes couldn't be longer than 4000 characters, which
|
375 |
|
|
* 'is the maximum length of a VARCHAR2 field in Oracle. Now, if text
|
376 |
|
|
* 'exceeds the field length, I break the text up into a series of TEXT
|
377 |
|
|
* 'nodes each of the max field length, and the remainder in the last
|
378 |
|
|
* 'TEXT node. The only problem with this is that our current search
|
379 |
|
|
* 'algorithms only will find phrases within a single TEXT nodes, so if
|
380 |
|
|
* 'the search term spans the node boundary, the search algorithm will not
|
381 |
|
|
* 'return a hit. I expect this is extremely rare, basically inconsequential.
|
382 |
|
|
* '
|
383 |
220
|
jones
|
* 'Revision 1.29 2000/06/27 04:31:07 jones
|
384 |
|
|
* 'Fixed bugs associated with the new UPDATE and DELETE functions of
|
385 |
|
|
* 'DBWriter. There were problematic interactions between some static
|
386 |
|
|
* 'variables used in DBEntityResolver and the way in which the
|
387 |
|
|
* 'Servlet objects are re-used across multiple client invocations.
|
388 |
|
|
* '
|
389 |
|
|
* 'Generally cleaned up error reporting. Now all errors and success
|
390 |
|
|
* 'results are reported as XML documents from MetaCatServlet. Need
|
391 |
|
|
* 'to make the command line tools do the same.
|
392 |
|
|
* '
|
393 |
204
|
jones
|
* 'Revision 1.28 2000/06/26 10:35:05 jones
|
394 |
|
|
* 'Merged in substantial changes to DBWriter and associated classes and to
|
395 |
|
|
* 'the MetaCatServlet in order to accomodate the new UPDATE and DELETE
|
396 |
|
|
* 'functions. The command line tools and the parameters for the
|
397 |
|
|
* 'servlet have changed substantially.
|
398 |
|
|
* '
|
399 |
203
|
jones
|
* 'Revision 1.27.2.6 2000/06/26 02:02:20 jones
|
400 |
|
|
* 'Continued fixing problems with exception handling that deals
|
401 |
|
|
* 'with INSERT and UPDATE actions and the docid passed to DBWriter
|
402 |
|
|
* '
|
403 |
|
|
* 'Revision 1.27.2.5 2000/06/26 00:51:06 jones
|
404 |
|
|
* 'If docid passed to DBWriter.write() is not unique, classes now generate
|
405 |
|
|
* 'an AccessionNumberException containing the new docid generated as a
|
406 |
|
|
* 'replacement. The docid is then extracted from the exception and
|
407 |
|
|
* 'returned to the calling application for user feedback or client processing.
|
408 |
|
|
* '
|
409 |
|
|
* 'Revision 1.27.2.4 2000/06/25 23:38:16 jones
|
410 |
|
|
* 'Added RCSfile keyword
|
411 |
|
|
* '
|
412 |
|
|
* 'Revision 1.27.2.3 2000/06/25 23:34:17 jones
|
413 |
|
|
* 'Changed documentation formatting, added log entries at bottom of source files
|
414 |
|
|
* ''
|
415 |
|
|
*/
|