Revision 1398
Added by Jing Tao about 22 years ago
src/edu/ucsb/nceas/metacat/EmlSAXHandler.java | ||
---|---|---|
1 |
/** |
|
2 |
* '$RCSfile$' |
|
3 |
* Purpose: A Class that handles the SAX XML events as they |
|
4 |
* are generated from XML documents |
|
5 |
* Copyright: 2000 Regents of the University of California and the |
|
6 |
* National Center for Ecological Analysis and Synthesis |
|
7 |
* Authors: Matt Jones, Jivka Bojilova |
|
8 |
* Release: @release@ |
|
9 |
* |
|
10 |
* '$Author$' |
|
11 |
* '$Date$' |
|
12 |
* '$Revision$' |
|
13 |
* |
|
14 |
* This program is free software; you can redistribute it and/or modify |
|
15 |
* it under the terms of the GNU General Public License as published by |
|
16 |
* the Free Software Foundation; either version 2 of the License, or |
|
17 |
* (at your option) any later version. |
|
18 |
* |
|
19 |
* This program is distributed in the hope that it will be useful, |
|
20 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
21 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
22 |
* GNU General Public License for more details. |
|
23 |
* |
|
24 |
* You should have received a copy of the GNU General Public License |
|
25 |
* along with this program; if not, write to the Free Software |
|
26 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
27 |
*/ |
|
28 |
|
|
29 |
package edu.ucsb.nceas.metacat; |
|
30 |
|
|
31 |
import java.sql.*; |
|
32 |
import java.io.StringReader; |
|
33 |
import java.util.Stack; |
|
34 |
import java.util.Vector; |
|
35 |
import java.util.Hashtable; |
|
36 |
import java.util.Enumeration; |
|
37 |
import java.util.EmptyStackException; |
|
38 |
|
|
39 |
import org.xml.sax.Attributes; |
|
40 |
import org.xml.sax.SAXException; |
|
41 |
import org.xml.sax.SAXParseException; |
|
42 |
import org.xml.sax.ext.DeclHandler; |
|
43 |
import org.xml.sax.ext.LexicalHandler; |
|
44 |
import org.xml.sax.helpers.DefaultHandler; |
|
45 |
|
|
46 |
/** |
|
47 |
* A database aware Class implementing callback bethods for the SAX parser to |
|
48 |
* call when processing the XML stream and generating events |
|
49 |
*/ |
|
50 |
public class EmlSAXHandler extends DBSAXHandler implements |
|
51 |
AccessControlInterface |
|
52 |
{ |
|
53 |
private Vector allowRules = new Vector(); |
|
54 |
private Vector denyRules = new Vector(); |
|
55 |
private String documentId = null; |
|
56 |
private Vector subDocumentIdList = new Vector(); |
|
57 |
|
|
58 |
// Constant |
|
59 |
private static final String DESCRIBES = "describes"; |
|
60 |
|
|
61 |
/** Construct an instance of the handler class |
|
62 |
* |
|
63 |
* @param conn the JDBC connection to which information is written |
|
64 |
* @param action - "INSERT" or "UPDATE" |
|
65 |
* @param docid to be inserted or updated into JDBC connection |
|
66 |
* @param user the user connected to MetaCat servlet and owns the document |
|
67 |
* @param groups the groups to which user belongs |
|
68 |
* @param pub flag for public "read" access on document |
|
69 |
* @param serverCode the serverid from xml_replication on which this document |
|
70 |
* resides. |
|
71 |
* |
|
72 |
*/ |
|
73 |
public EmlSAXHandler(DBConnection conn, String action, String docid, |
|
74 |
String user, String[] groups, String pub, int serverCode) |
|
75 |
{ |
|
76 |
super(conn, action, docid, user, groups, pub, serverCode); |
|
77 |
} |
|
78 |
|
|
79 |
/** SAX Handler that is called at the start of each XML element */ |
|
80 |
public void startElement(String uri, String localName, |
|
81 |
String qName, Attributes atts) |
|
82 |
throws SAXException |
|
83 |
{ |
|
84 |
// for element <eml:eml...> qname is "eml:eml", local name is "eml" |
|
85 |
// for element <acl....> both qname and local name is "eml" |
|
86 |
// uri is namesapce |
|
87 |
MetaCatUtil.debugMessage("Start ELEMENT(qName) " + qName, 50); |
|
88 |
MetaCatUtil.debugMessage("Start ELEMENT(localName) " + localName, 50); |
|
89 |
MetaCatUtil.debugMessage("Start ELEMENT(uri) " + uri, 50); |
|
90 |
|
|
91 |
|
|
92 |
DBSAXNode parentNode = null; |
|
93 |
DBSAXNode currentNode = null; |
|
94 |
|
|
95 |
// Get a reference to the parent node for the id |
|
96 |
try { |
|
97 |
parentNode = (DBSAXNode)nodeStack.peek(); |
|
98 |
} catch (EmptyStackException e) { |
|
99 |
parentNode = null; |
|
100 |
} |
|
101 |
|
|
102 |
// Document representation that points to the root document node |
|
103 |
if (atFirstElement) |
|
104 |
{ |
|
105 |
atFirstElement = false; |
|
106 |
// If no DOCTYPE declaration: docname = root element |
|
107 |
// doctype = root element name or name space |
|
108 |
if (docname == null) |
|
109 |
{ |
|
110 |
docname = localName; |
|
111 |
// if uri isn't null doctype = uri(namespace) |
|
112 |
// othewise root element |
|
113 |
if (uri != null && !(uri.trim()).equals("")) |
|
114 |
{ |
|
115 |
doctype = uri; |
|
116 |
} |
|
117 |
else |
|
118 |
{ |
|
119 |
doctype = docname; |
|
120 |
} |
|
121 |
MetaCatUtil.debugMessage("DOCNAME-a: " + docname, 30); |
|
122 |
MetaCatUtil.debugMessage("DOCTYPE-a: " + doctype, 30); |
|
123 |
} |
|
124 |
else if (doctype == null) |
|
125 |
{ |
|
126 |
// because docname is not null and it is declared in dtd |
|
127 |
// so could not be in schema, no namespace |
|
128 |
doctype = docname; |
|
129 |
MetaCatUtil.debugMessage("DOCTYPE-b: " + doctype, 30); |
|
130 |
} |
|
131 |
rootNode.writeNodename(docname); |
|
132 |
try { |
|
133 |
// for validated XML Documents store a reference to XML DB Catalog |
|
134 |
// Because this is select statement and it needn't to roll back if |
|
135 |
// insert document action fialed. |
|
136 |
// In order to decrease DBConnection usage count, we get a new |
|
137 |
// dbconnection from pool |
|
138 |
String catalogid = null; |
|
139 |
DBConnection dbConn = null; |
|
140 |
int serialNumber = -1; |
|
141 |
|
|
142 |
if ( systemid != null ) { |
|
143 |
try |
|
144 |
{ |
|
145 |
// Get dbconnection |
|
146 |
dbConn=DBConnectionPool.getDBConnection |
|
147 |
("DBSAXHandler.startElement"); |
|
148 |
serialNumber=dbConn.getCheckOutSerialNumber(); |
|
149 |
|
|
150 |
Statement stmt = dbConn.createStatement(); |
|
151 |
ResultSet rs = stmt.executeQuery( |
|
152 |
"SELECT catalog_id FROM xml_catalog " + |
|
153 |
"WHERE entry_type = 'DTD' " + |
|
154 |
"AND public_id = '" + doctype + "'"); |
|
155 |
boolean hasRow = rs.next(); |
|
156 |
if ( hasRow ) { |
|
157 |
catalogid = rs.getString(1); |
|
158 |
} |
|
159 |
stmt.close(); |
|
160 |
}//try |
|
161 |
finally |
|
162 |
{ |
|
163 |
// Return dbconnection |
|
164 |
DBConnectionPool.returnDBConnection(dbConn, serialNumber); |
|
165 |
}//finally |
|
166 |
} |
|
167 |
|
|
168 |
//create documentImpl object by the constructor which can specify |
|
169 |
//the revision |
|
170 |
currentDocument = new DocumentImpl(connection, rootNode.getNodeID(), |
|
171 |
docname, doctype, docid, revision, action, user, |
|
172 |
this.pub, catalogid, this.serverCode); |
|
173 |
|
|
174 |
|
|
175 |
} catch (Exception ane) { |
|
176 |
throw (new SAXException("Error in DBSaxHandler.startElement " + |
|
177 |
action, ane)); |
|
178 |
} |
|
179 |
} |
|
180 |
|
|
181 |
// Create the current node representation |
|
182 |
currentNode = new DBSAXNode(connection, qName, localName, parentNode, |
|
183 |
currentDocument.getRootNodeID(),docid, |
|
184 |
currentDocument.getDoctype()); |
|
185 |
|
|
186 |
// Add all of the namespaces |
|
187 |
String prefix; |
|
188 |
String nsuri; |
|
189 |
Enumeration prefixes = namespaces.keys(); |
|
190 |
while ( prefixes.hasMoreElements() ) { |
|
191 |
prefix = (String)prefixes.nextElement(); |
|
192 |
nsuri = (String)namespaces.get(prefix); |
|
193 |
currentNode.setNamespace(prefix, nsuri, docid); |
|
194 |
} |
|
195 |
namespaces = null; |
|
196 |
namespaces = new Hashtable(); |
|
197 |
|
|
198 |
// Add all of the attributes |
|
199 |
for (int i=0; i<atts.getLength(); i++) |
|
200 |
{ |
|
201 |
String attributeName = atts.getQName(i); |
|
202 |
String attributeValue = atts.getValue(i); |
|
203 |
currentNode.setAttribute(attributeName, attributeValue, docid); |
|
204 |
|
|
205 |
// To handle name space and schema location if the attribute name is |
|
206 |
// xsi:schemaLocation. If the name space is in not in catalog table |
|
207 |
// it will be regeistered. |
|
208 |
if (attributeName != null && |
|
209 |
attributeName.indexOf(MetaCatServlet.SCHEMALOCATIONKEYWORD) != -1) |
|
210 |
{ |
|
211 |
SchemaLocationResolver resolver = |
|
212 |
new SchemaLocationResolver(attributeValue); |
|
213 |
resolver.resolveNameSpace(); |
|
214 |
|
|
215 |
} |
|
216 |
} |
|
217 |
|
|
218 |
// handle access stuff |
|
219 |
if (localName.equals(ACCESS)) |
|
220 |
{ |
|
221 |
// if it is in addtionalmetacat |
|
222 |
if (currentNode.getTagName() == DESCRIBES) |
|
223 |
{ |
|
224 |
|
|
225 |
// get the value in current |
|
226 |
} |
|
227 |
} |
|
228 |
// Add the node to the stack, so that any text data can be |
|
229 |
// added as it is encountered |
|
230 |
nodeStack.push(currentNode); |
|
231 |
// Add the node to the vector used by thread for writing XML Index |
|
232 |
nodeIndex.addElement(currentNode); |
|
233 |
|
|
234 |
} |
|
235 |
|
|
236 |
/* The run method of xmlIndex thread. It writes XML Index for the document. */ |
|
237 |
public void run () |
|
238 |
{ |
|
239 |
DBSAXNode currNode = null; |
|
240 |
DBSAXNode prevNode = null; |
|
241 |
DBConnection dbConn = null; |
|
242 |
int serialNumber = -1; |
|
243 |
String doctype = currentDocument.getDoctype(); |
|
244 |
int step = 0; |
|
245 |
int counter = 0; |
|
246 |
|
|
247 |
try |
|
248 |
{ |
|
249 |
|
|
250 |
// Opening separate db connection for writing XML Index |
|
251 |
dbConn=DBConnectionPool.getDBConnection("DBSAXHandler.run"); |
|
252 |
serialNumber=dbConn.getCheckOutSerialNumber(); |
|
253 |
dbConn.setAutoCommit(false); |
|
254 |
|
|
255 |
//the following while loop construct checks to make sure that the docid |
|
256 |
//of the document that we are trying to index is already |
|
257 |
//in the xml_documents table. if this is not the case, the foreign |
|
258 |
//key relationship between xml_documents and xml_index is temporarily |
|
259 |
//broken causing multiple problems. |
|
260 |
boolean inxmldoc = false; |
|
261 |
long startTime = System.currentTimeMillis(); |
|
262 |
while(!inxmldoc) |
|
263 |
{ |
|
264 |
String xmlDocumentsCheck = "select distinct docid from xml_documents"; |
|
265 |
PreparedStatement xmlDocCheck = |
|
266 |
dbConn.prepareStatement(xmlDocumentsCheck); |
|
267 |
// Increase usage count |
|
268 |
dbConn.increaseUsageCount(1); |
|
269 |
xmlDocCheck.execute(); |
|
270 |
ResultSet doccheckRS = xmlDocCheck.getResultSet(); |
|
271 |
boolean tableHasRows = doccheckRS.next(); |
|
272 |
Vector docids = new Vector(); |
|
273 |
while(tableHasRows) |
|
274 |
{ |
|
275 |
docids.add(doccheckRS.getString(1).trim()); |
|
276 |
tableHasRows = doccheckRS.next(); |
|
277 |
} |
|
278 |
|
|
279 |
for(int i=0; i<docids.size(); i++) |
|
280 |
{ |
|
281 |
String d = ((String)docids.elementAt(i)).trim(); |
|
282 |
if(docid.trim().equals(d)) |
|
283 |
{ |
|
284 |
inxmldoc = true; |
|
285 |
} |
|
286 |
} |
|
287 |
doccheckRS.close(); |
|
288 |
xmlDocCheck.close(); |
|
289 |
// make sure the while loop will be ended in reseaonable time |
|
290 |
long stopTime = System.currentTimeMillis(); |
|
291 |
if ((stopTime - startTime) > INDEXDELAY) |
|
292 |
{ |
|
293 |
throw new Exception("Couldn't find the docid for index build in" + |
|
294 |
"reseaonable time!"); |
|
295 |
} |
|
296 |
} |
|
297 |
|
|
298 |
// Going through the elements of the document and writing its Index |
|
299 |
Enumeration nodes = nodeIndex.elements(); |
|
300 |
while ( nodes.hasMoreElements() ) { |
|
301 |
currNode = (DBSAXNode)nodes.nextElement(); |
|
302 |
currNode.updateNodeIndex(dbConn, docid, doctype); |
|
303 |
} |
|
304 |
dbConn.commit(); |
|
305 |
} |
|
306 |
catch (Exception e) |
|
307 |
{ |
|
308 |
try |
|
309 |
{ |
|
310 |
dbConn.rollback(); |
|
311 |
|
|
312 |
} |
|
313 |
catch (SQLException sqle) |
|
314 |
{} |
|
315 |
MetaCatUtil.debugMessage("Error in DBSAXHandler.run " + |
|
316 |
e.getMessage(), 30); |
|
317 |
|
|
318 |
} |
|
319 |
finally |
|
320 |
{ |
|
321 |
DBConnectionPool.returnDBConnection(dbConn, serialNumber); |
|
322 |
}//finally |
|
323 |
}//run |
|
324 |
|
|
325 |
|
|
326 |
} |
|
0 | 327 |
Also available in: Unified diff
A new sax parser will handle eml2 (not finished yet)