Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that handles the SAX XML events as they
4
 *             are generated from XML documents
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Matt Jones, Jivka Bojilova
8
 *    Release: @release@
9
 *
10
 *   '$Author: tao $'
11
 *     '$Date: 2002-05-08 16:03:42 -0700 (Wed, 08 May 2002) $'
12
 * '$Revision: 1062 $'
13
 *
14
 * This program is free software; you can redistribute it and/or modify
15
 * it under the terms of the GNU General Public License as published by
16
 * the Free Software Foundation; either version 2 of the License, or
17
 * (at your option) any later version.
18
 *
19
 * This program is distributed in the hope that it will be useful,
20
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 * GNU General Public License for more details.
23
 *
24
 * You should have received a copy of the GNU General Public License
25
 * along with this program; if not, write to the Free Software
26
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27
 */
28

    
29
package edu.ucsb.nceas.metacat;
30

    
31
import java.sql.*;
32
import java.io.StringReader;
33
import java.util.Stack;
34
import java.util.Vector;
35
import java.util.Hashtable;
36
import java.util.Enumeration;
37
import java.util.EmptyStackException;
38

    
39
import org.xml.sax.Attributes;
40
import org.xml.sax.SAXException;
41
import org.xml.sax.SAXParseException;
42
import org.xml.sax.ext.DeclHandler;
43
import org.xml.sax.ext.LexicalHandler;
44
import org.xml.sax.helpers.DefaultHandler;
45

    
46
/** 
47
 * A database aware Class implementing callback bethods for the SAX parser to
48
 * call when processing the XML stream and generating events
49
 */
50
public class DBSAXHandler extends DefaultHandler 
51
                          implements LexicalHandler, DeclHandler, Runnable {
52

    
53
   private boolean	atFirstElement;
54
   private boolean	processingDTD;
55
   private String 	docname = null;
56
   private String 	doctype;
57
   private String 	systemid;
58
   private boolean 	stackCreated = false;
59
   private Stack 	  nodeStack;
60
   private Vector   nodeIndex;
61
   private Connection	  conn = null;
62
   private DocumentImpl currentDocument;
63
   private DBSAXNode    rootNode;
64
   private String   action = null;
65
   private String   docid = null;
66
   private String   revision = null;
67
   private String   user = null;
68
   private String[] groups = null;
69
   private String   pub = null;
70
   private Thread   xmlIndex;
71
   private boolean endDocument = false;
72
   private int serverCode = 1;
73
   private Hashtable namespaces = new Hashtable();
74

    
75
   private static final int MAXDATACHARS = 4000;
76
// DOCTITLE attr cleared from the db
77
//   private static final int MAXTITLELEN = 1000;
78

    
79
   /** Construct an instance of the handler class 
80
    *
81
    * @param conn the JDBC connection to which information is written
82
    */
83
   public DBSAXHandler(Connection conn) {
84
     this.conn = conn;
85
     this.atFirstElement = true;
86
     this.processingDTD = false;
87

    
88
     // Create the stack for keeping track of node context
89
     // if it doesn't already exist
90
     if (!stackCreated) {
91
       nodeStack = new Stack();
92
       nodeIndex = new Vector();
93
       stackCreated = true;
94
     }
95
   }
96
  
97
  /** Construct an instance of the handler class 
98
    *
99
    * @param conn the JDBC connection to which information is written
100
    * @param action - "INSERT" or "UPDATE"
101
    * @param docid to be inserted or updated into JDBC connection
102
    * @param user the user connected to MetaCat servlet and owns the document
103
    * @param groups the groups to which user belongs
104
    * @param pub flag for public "read" access on document
105
    * @param serverCode the serverid from xml_replication on which this document
106
    *        resides.
107
    *
108
    */
109
   public DBSAXHandler(Connection conn, String action, String docid, 
110
                      String user, String[] groups, String pub, int serverCode)
111
   {
112
     this(conn);
113
     this.action = action;
114
     this.docid = docid;
115
     this.user = user;
116
     this.groups = groups;
117
     this.pub = pub;
118
     this.serverCode = serverCode;
119
     this.xmlIndex = new Thread(this);
120
   }  
121
  
122
  /** Construct an instance of the handler class
123
    * In this constructor, user can specify the version need to upadate
124
    *
125
    * @param conn the JDBC connection to which information is written
126
    * @param action - "INSERT" or "UPDATE"
127
    * @param docid to be inserted or updated into JDBC connection
128
    * @param revision, the user specified the revision need to be update
129
    * @param user the user connected to MetaCat servlet and owns the document
130
    * @param groups the groups to which user belongs
131
    * @param pub flag for public "read" access on document
132
    * @param serverCode the serverid from xml_replication on which this document
133
    *        resides.
134
    *
135
    */
136
   public DBSAXHandler(Connection conn, String action, String docid, 
137
     String revision, String user, String[] groups, String pub, int serverCode)
138
   {
139
     this(conn);
140
     this.action = action;
141
     this.docid = docid;
142
     this.revision = revision;
143
     this.user = user;
144
     this.groups = groups;
145
     this.pub = pub;
146
     this.serverCode = serverCode;
147
     this.xmlIndex = new Thread(this);
148
   }
149
 
150
   /** SAX Handler that receives notification of beginning of the document */
151
   public void startDocument() throws SAXException {
152
     MetaCatUtil.debugMessage("start Document", 50);
153

    
154
     // Create the document node representation as root
155
     rootNode = new DBSAXNode(conn, this.docid);
156
     // Add the node to the stack, so that any text data can be 
157
     // added as it is encountered
158
     nodeStack.push(rootNode);
159
   }
160

    
161
   /** SAX Handler that receives notification of end of the document */
162
   public void endDocument() throws SAXException {
163
     MetaCatUtil.debugMessage("end Document", 50);
164
     // Starting new thread for writing XML Index.
165
     // It calls the run method of the thread.
166
     try {
167
       xmlIndex.start();
168
     } catch (NullPointerException e) {
169
       xmlIndex = null;
170
       throw new 
171
       SAXException("Problem with starting thread for writing XML Index. " +
172
                    e.getMessage());
173
     }
174
   }
175

    
176
   /** SAX Handler that is called at the start of Namespace */
177
   public void startPrefixMapping(String prefix, String uri) 
178
                                          throws SAXException
179
   {
180
    MetaCatUtil.debugMessage("NAMESPACE", 50);
181

    
182
    namespaces.put(prefix, uri);
183
   }
184
   
185
   /** SAX Handler that is called at the start of each XML element */
186
   public void startElement(String uri, String localName,
187
                            String qName, Attributes atts) 
188
               throws SAXException {
189
     MetaCatUtil.debugMessage("Start ELEMENT " + qName, 50);
190
 
191
     DBSAXNode parentNode = null;
192
     DBSAXNode currentNode = null;
193

    
194
     // Get a reference to the parent node for the id
195
     try {
196
       parentNode = (DBSAXNode)nodeStack.peek();
197
     } catch (EmptyStackException e) {
198
       parentNode = null;
199
     }
200

    
201
     // Document representation that points to the root document node
202
     if (atFirstElement) {
203
       atFirstElement = false;
204
       // If no DOCTYPE declaration: docname = root element name 
205
       if (docname == null) {
206
         docname = localName;
207
         doctype = docname;
208
         MetaCatUtil.debugMessage("DOCNAME-a: " + docname, 50);
209
         MetaCatUtil.debugMessage("DOCTYPE-a: " + doctype, 50);
210
       } else if (doctype == null) {
211
         doctype = docname;
212
         MetaCatUtil.debugMessage("DOCTYPE-b: " + doctype, 50);
213
       }
214
       rootNode.writeNodename(docname);
215
       try {
216
         // for validated XML Documents store a reference to XML DB Catalog
217
         String catalogid = null;
218
         if ( systemid != null ) {
219
           Statement stmt = conn.createStatement();
220
           ResultSet rs = stmt.executeQuery(
221
                          "SELECT catalog_id FROM xml_catalog " +
222
                          "WHERE entry_type = 'DTD' " + 
223
                          "AND public_id = '" + doctype + "'");
224
           boolean hasRow = rs.next();
225
           if ( hasRow ) {
226
            catalogid = rs.getString(1);
227
           }
228
           stmt.close();
229
         }
230
         
231
         //create documentImpl object by the constructor which can specify
232
         //the revision
233
         currentDocument = new DocumentImpl(conn, rootNode.getNodeID(), 
234
                               docname, doctype, docid, revision, action, user, 
235
                               this.pub, catalogid, this.serverCode);
236
         
237
         
238
       } catch (Exception ane) {
239
         throw (new SAXException("Error in DBSaxHandler.startElement " + 
240
                                 action, ane));
241
       }
242
     }      
243

    
244
     // Create the current node representation
245
     currentNode = new DBSAXNode(conn, qName, localName, parentNode,
246
                                 currentDocument.getRootNodeID(),docid,
247
                                 currentDocument.getDoctype());
248
                               
249
     // Add all of the namespaces
250
     String prefix;
251
     String nsuri;
252
     Enumeration prefixes = namespaces.keys();
253
     while ( prefixes.hasMoreElements() ) {
254
       prefix = (String)prefixes.nextElement();
255
       nsuri = (String)namespaces.get(prefix);
256
       currentNode.setNamespace(prefix, nsuri, docid);
257
     }
258
     namespaces = null;
259
     namespaces = new Hashtable();
260

    
261
     // Add all of the attributes
262
     for (int i=0; i<atts.getLength(); i++) {
263
       currentNode.setAttribute(atts.getQName(i), atts.getValue(i), docid);
264
     }      
265

    
266
     // Add the node to the stack, so that any text data can be 
267
     // added as it is encountered
268
     nodeStack.push(currentNode);
269
     // Add the node to the vector used by thread for writing XML Index
270
     nodeIndex.addElement(currentNode);
271

    
272
  }
273
  
274
  /* The run method of xmlIndex thread. It writes XML Index for the document. */
275
  public void run () {
276
    DBSAXNode currNode = null;
277
    DBSAXNode prevNode = null;
278
    Connection dbconn = null;
279
    String doctype = currentDocument.getDoctype();
280
    int step = 0;
281
    int counter = 0;
282

    
283
    try {
284
      // Opening separate db connection for writing XML Index
285
      MetaCatUtil util = new MetaCatUtil();
286
      dbconn = util.openDBConnection();
287
      dbconn.setAutoCommit(false);
288
      
289
      //the following while loop construct checks to make sure that the docid
290
      //of the document that we are trying to index is already
291
      //in the xml_documents table.  if this is not the case, the foreign
292
      //key relationship between xml_documents and xml_index is temporarily
293
      //broken causing multiple problems.
294
      boolean inxmldoc = false;
295
      while(!inxmldoc)
296
      {
297
        String xmlDocumentsCheck = "select distinct docid from xml_documents";
298
        PreparedStatement xmlDocCheck = 
299
                                  dbconn.prepareStatement(xmlDocumentsCheck);
300
        xmlDocCheck.execute();
301
        ResultSet doccheckRS = xmlDocCheck.getResultSet();
302
        boolean tableHasRows = doccheckRS.next();
303
        Vector docids = new Vector();
304
        while(tableHasRows) 
305
        {
306
          docids.add(doccheckRS.getString(1).trim());
307
          tableHasRows = doccheckRS.next();
308
        }
309
        
310
        for(int i=0; i<docids.size(); i++)
311
        {
312
          String d = ((String)docids.elementAt(i)).trim();
313
          if(docid.trim().equals(d))
314
          {
315
            inxmldoc = true;
316
          }
317
        }
318
        xmlDocCheck.close();
319
      }
320
      
321
      // Going through the elements of the document and writing its Index
322
      Enumeration nodes = nodeIndex.elements();
323
      while ( nodes.hasMoreElements() ) {
324
        currNode = (DBSAXNode)nodes.nextElement();
325
        currNode.updateNodeIndex(dbconn, docid, doctype);
326
      }
327
    
328
      dbconn.commit();
329
         
330
      //if this is a package file
331
      String packagedoctype = util.getOption("packagedoctype");
332
      Vector packagedoctypes = new Vector();
333
      
334
      packagedoctypes = MetaCatUtil.getOptionList(packagedoctype);
335
      
336
      if ( packagedoctypes.contains(doctype) )
337
      {
338
        // write the package info to xml_relation table
339
        RelationHandler rth = new RelationHandler(docid, dbconn);
340
        // from the relations get the access file id for that package
341
        String aclid = rth.getAccessFileID(docid);
342
        // if there are access file, write ACL for that package
343
        if ( aclid != null ) {
344
          runAccessControlList(dbconn, aclid);
345
        }
346
      }
347
      // if it is an access file
348
      else if ( MetaCatUtil.getOptionList(
349
                            util.getOption("accessdoctype")).contains(doctype) )
350
      {
351
        // write ACL for the package
352
        runAccessControlList(dbconn, docid);
353
      }
354
      
355
      dbconn.close();
356

    
357
    } catch (Exception e) {
358
      try {
359
        dbconn.rollback();
360
        dbconn.close();
361
      } catch (SQLException sqle) {}
362
      System.out.println("Error in DBSAXHandler.run " + e.getMessage());
363
      e.printStackTrace();
364
    }      
365
  }
366
  
367
  // It runs in xmlIndex thread. It writes ACL for a package.
368
  private void runAccessControlList (Connection conn, String aclid)
369
                                                throws Exception
370
  {
371
    // read the access file from xml_nodes
372
    // parse the access file and store the access info into xml_access
373
    AccessControlList aclobj = 
374
    new AccessControlList(conn, aclid, //new StringReader(xml),
375
                          user, groups, serverCode);
376
    conn.commit();
377
  }
378

    
379

    
380
  /** SAX Handler that is called for each XML text node */
381
  public void characters(char[] cbuf, int start, int len) throws SAXException {
382
     MetaCatUtil.debugMessage("CHARACTERS", 50);
383
     DBSAXNode currentNode = (DBSAXNode)nodeStack.peek();
384
     String data = null;
385
     int leftover = len;
386
     int offset = start;
387
     boolean moredata = true;
388
    
389
     // This loop deals with the case where there are more characters 
390
     // than can fit in a single database text field (limit is 
391
     // MAXDATACHARS).  If the text to be inserted exceeds MAXDATACHARS,
392
     // write a series of nodes that are MAXDATACHARS long, and then the
393
     // final node contains the remainder
394
     while (moredata) {
395
       if (leftover > MAXDATACHARS) {
396
         data = new String(cbuf, offset, MAXDATACHARS);
397
         leftover -= MAXDATACHARS;
398
         offset += MAXDATACHARS;
399
       } else {
400
         data = new String(cbuf, offset, leftover);
401
         moredata = false;
402
       }
403

    
404
       // Write the content of the node to the database
405
       currentNode.writeChildNodeToDB("TEXT", null, data, docid);
406
     }
407
   }
408

    
409
   /** 
410
    * SAX Handler that is called for each XML text node that is
411
    * Ignorable white space
412
    */
413
   public void ignorableWhitespace(char[] cbuf, int start, int len)
414
               throws SAXException {
415
     // When validation is turned "on", white spaces are reported here
416
     // When validation is turned "off" white spaces are not reported here,
417
     // but through characters() callback
418
     MetaCatUtil.debugMessage("IGNORABLEWHITESPACE", 50);
419
   
420

    
421
     DBSAXNode currentNode = (DBSAXNode)nodeStack.peek();
422
     String data = null;
423
     int leftover = len;
424
     int offset = start;
425
     boolean moredata = true;
426
     
427
     // This loop deals with the case where there are more characters 
428
     // than can fit in a single database text field (limit is 
429
     // MAXDATACHARS).  If the text to be inserted exceeds MAXDATACHARS,
430
     // write a series of nodes that are MAXDATACHARS long, and then the
431
     // final node contains the remainder
432
     while (moredata) {
433
       if (leftover > MAXDATACHARS) {
434
         data = new String(cbuf, offset, MAXDATACHARS);
435
         leftover -= MAXDATACHARS;
436
         offset += MAXDATACHARS;
437
       } else {
438
         data = new String(cbuf, offset, leftover);
439
         moredata = false;
440
       }
441

    
442
       // Write the content of the node to the database
443
       currentNode.writeChildNodeToDB("TEXT", null, data, docid);
444
     }
445
   }
446

    
447
   /** 
448
    * SAX Handler called once for each processing instruction found: 
449
    * node that PI may occur before or after the root element.
450
    */
451
   public void processingInstruction(String target, String data) 
452
          throws SAXException {
453
     MetaCatUtil.debugMessage("PI", 50);
454
     DBSAXNode currentNode = (DBSAXNode)nodeStack.peek();
455
     currentNode.writeChildNodeToDB("PI", target, data, docid);
456
   }
457

    
458
   /** SAX Handler that is called at the end of each XML element */
459
   public void endElement(String uri, String localName,
460
                          String qName) throws SAXException {
461
     MetaCatUtil.debugMessage("End ELEMENT " + qName, 50);
462

    
463
     // Get the node from the stack
464
     DBSAXNode currentNode = (DBSAXNode)nodeStack.pop();
465
   }
466

    
467
   //
468
   // the next section implements the LexicalHandler interface
469
   //
470

    
471
   /** SAX Handler that receives notification of DOCTYPE. Sets the DTD */
472
   public void startDTD(String name, String publicId, String systemId) 
473
               throws SAXException {
474
     docname = name;
475
     doctype = publicId;
476
     systemid = systemId;
477

    
478
     MetaCatUtil.debugMessage("Start DTD", 50);
479
     MetaCatUtil.debugMessage("DOCNAME: " + docname, 50);
480
     MetaCatUtil.debugMessage("DOCTYPE: " + doctype, 50);
481
     MetaCatUtil.debugMessage("  SYSID: " + systemid, 50);
482
   }
483

    
484
   /** 
485
    * SAX Handler that receives notification of end of DTD 
486
    */
487
   public void endDTD() throws SAXException {
488
    
489
     MetaCatUtil.debugMessage("end DTD", 50);
490
   }
491

    
492
   /** 
493
    * SAX Handler that receives notification of comments in the DTD
494
    */
495
   public void comment(char[] ch, int start, int length) throws SAXException {
496
     MetaCatUtil.debugMessage("COMMENT", 50);
497
     if ( !processingDTD ) {
498
       DBSAXNode currentNode = (DBSAXNode)nodeStack.peek();
499
       currentNode.writeChildNodeToDB("COMMENT", null, new String(ch), docid);
500
     }
501
   }
502

    
503
   /** 
504
    * SAX Handler that receives notification of the start of CDATA sections
505
    */
506
   public void startCDATA() throws SAXException {
507
     MetaCatUtil.debugMessage("start CDATA", 50);
508
   }
509

    
510
   /** 
511
    * SAX Handler that receives notification of the end of CDATA sections
512
    */
513
   public void endCDATA() throws SAXException {
514
     MetaCatUtil.debugMessage("end CDATA", 50);
515
   }
516

    
517
   /** 
518
    * SAX Handler that receives notification of the start of entities
519
    */
520
   public void startEntity(String name) throws SAXException {
521
     MetaCatUtil.debugMessage("start ENTITY: " + name, 50);
522
//System.out.println("start ENTITY: " + name);
523
     if (name.equals("[dtd]")) {
524
       processingDTD = true;
525
     }
526
   }
527

    
528
   /** 
529
    * SAX Handler that receives notification of the end of entities
530
    */
531
   public void endEntity(String name) throws SAXException {
532
     MetaCatUtil.debugMessage("end ENTITY: " + name, 50);
533
//System.out.println("end ENTITY: " + name);
534
     if (name.equals("[dtd]")) {
535
       processingDTD = false;
536
     }
537
   }
538

    
539
   /** 
540
    * SAX Handler that receives notification of element declarations
541
    */
542
   public void elementDecl(String name, String model)
543
                        throws org.xml.sax.SAXException {
544
//System.out.println("ELEMENTDECL: " + name + " " + model);
545
     MetaCatUtil.debugMessage("ELEMENTDECL: " + name + " " + model, 50);
546
   }
547

    
548
   /** 
549
    * SAX Handler that receives notification of attribute declarations
550
    */
551
   public void attributeDecl(String eName, String aName,
552
                        String type, String valueDefault, String value)
553
                        throws org.xml.sax.SAXException {
554

    
555
//System.out.println("ATTRIBUTEDECL: " + eName + " " 
556
//                        + aName + " " + type + " " + valueDefault + " "
557
//                        + value);
558
     MetaCatUtil.debugMessage("ATTRIBUTEDECL: " + eName + " " 
559
                        + aName + " " + type + " " + valueDefault + " "
560
                        + value, 50);
561
   }
562

    
563
   /** 
564
    * SAX Handler that receives notification of internal entity declarations
565
    */
566
   public void internalEntityDecl(String name, String value)
567
                        throws org.xml.sax.SAXException {
568
//System.out.println("INTERNENTITYDECL: " + name + " " + value);
569
     MetaCatUtil.debugMessage("INTERNENTITYDECL: " + name + " " + value, 50);
570
   }
571

    
572
   /** 
573
    * SAX Handler that receives notification of external entity declarations
574
    */
575
   public void externalEntityDecl(String name, String publicId,
576
                        String systemId)
577
                        throws org.xml.sax.SAXException {
578
//System.out.println("EXTERNENTITYDECL: " + name + " " + publicId 
579
//                              + " " + systemId);
580
     MetaCatUtil.debugMessage("EXTERNENTITYDECL: " + name + " " + publicId 
581
                              + " " + systemId, 50);
582
     // it processes other external entity, not the DTD;
583
     // it doesn't signal for the DTD here
584
     processingDTD = false;
585
   }
586

    
587
   //
588
   // the next section implements the ErrorHandler interface
589
   //
590

    
591
   /** 
592
    * SAX Handler that receives notification of fatal parsing errors
593
    */
594
   public void fatalError(SAXParseException exception) throws SAXException {
595
     MetaCatUtil.debugMessage("FATALERROR", 50);
596
     throw (new SAXException("Fatal processing error.", exception));
597
   }
598

    
599
   /** 
600
    * SAX Handler that receives notification of recoverable parsing errors
601
    */
602
   public void error(SAXParseException exception) throws SAXException {
603
     MetaCatUtil.debugMessage("ERROR", 50);
604
     throw (new SAXException("Processing error.", exception));
605
   }
606

    
607
   /** 
608
    * SAX Handler that receives notification of warnings
609
    */
610
   public void warning(SAXParseException exception) throws SAXException {
611
     MetaCatUtil.debugMessage("WARNING", 50);
612
     throw (new SAXException("Warning.", exception));
613
   }
614

    
615
   // 
616
   // Helper, getter and setter methods
617
   //
618
   
619
   /**
620
    * get the document name
621
    */
622
   public String getDocname() {
623
     return docname;
624
   }
625

    
626
   /**
627
    * get the document processing state
628
    */
629
   public boolean processingDTD() {
630
     return processingDTD;
631
   }
632
}
(16-16/41)