Project

General

Profile

1 87 jones
/**
2 203 jones
 *  '$RCSfile$'
3
 *    Purpose: A Class that transforms an XML text document
4
 *             into a another type using XSL
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Matt Jones
8 87 jones
 *
9 1929 brooke
 * '$Author$'
10
 * '$Date$'
11 203 jones
 * '$Revision$'
12 669 jones
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26 87 jones
 */
27
28
package edu.ucsb.nceas.metacat;
29
30
import java.io.*;
31
import java.net.URL;
32
import java.net.MalformedURLException;
33
import java.sql.*;
34 1688 tao
import java.util.Enumeration;
35 1664 tao
import java.util.Hashtable;
36 3675 barteau
import java.util.Iterator;
37
import java.util.Map;
38
import java.util.Map.Entry;
39 87 jones
import java.util.Stack;
40
41 906 berkley
import javax.xml.transform.TransformerFactory;
42
import javax.xml.transform.Transformer;
43
import javax.xml.transform.stream.StreamSource;
44
import javax.xml.transform.stream.StreamResult;
45
import javax.xml.transform.TransformerException;
46
import javax.xml.transform.TransformerConfigurationException;
47
48 2663 sgarg
import org.apache.log4j.Logger;
49 906 berkley
import org.apache.xerces.parsers.DOMParser;
50
import org.w3c.dom.Attr;
51
import org.w3c.dom.NamedNodeMap;
52
import org.w3c.dom.NodeList;
53
import org.w3c.dom.Document;
54
import org.w3c.dom.Node;
55
import org.w3c.dom.NodeList;
56
import org.w3c.dom.DocumentType;
57
import org.xml.sax.SAXException;
58
import org.xml.sax.InputSource;
59
import org.apache.xerces.dom.DocumentTypeImpl;
60
import org.apache.xpath.XPathAPI;
61
import org.w3c.dom.NamedNodeMap;
62
63
/*
64 87 jones
import oracle.xml.parser.v2.XSLStylesheet;
65
import oracle.xml.parser.v2.XSLException;
66 832 jones
import oracle.xml.parser.v2.XMLParseException;
67 87 jones
import oracle.xml.parser.v2.XSLProcessor;
68
import oracle.xml.parser.v2.XMLDocument;
69
import oracle.xml.parser.v2.DOMParser;
70 906 berkley
*/
71 832 jones
import org.w3c.dom.Document;
72
import org.w3c.dom.Node;
73
import org.w3c.dom.Element;
74
import org.xml.sax.SAXException;
75 87 jones
76 2896 sgarg
import java.util.Properties;
77 2893 sgarg
78 2912 harris
79 1716 berkley
/**
80 87 jones
 * A Class that transforms XML documents utitlizing XSL style sheets
81
 */
82
public class DBTransform {
83
84 1217 tao
  //private Connection	conn = null;
85 832 jones
  private MetaCatUtil   util = null;
86
  private String 	configDir = null;
87
  private String	defaultStyle = null;
88 2663 sgarg
  private Logger logMetacat = Logger.getLogger(DBTransform.class);
89 3725 tao
  private String contextURL = null;
90 2663 sgarg
91 87 jones
  /**
92
   * construct a DBTransform instance.
93
   *
94
   * Generally, one calls transformXMLDocument() after constructing the instance
95
   *
96
   * @param conn the database connection from which to lookup the public ids
97
   */
98 1716 berkley
  public DBTransform()
99
                  throws IOException,
100
                         SQLException,
101 87 jones
                         ClassNotFoundException
102
  {
103 1217 tao
    //this.conn = conn;
104 832 jones
    util = new MetaCatUtil();
105
    configDir = util.getOption("config-dir");
106
    defaultStyle = util.getOption("default-style");
107 3725 tao
    contextURL = util.getOption("httpserver")+"/"+util.getOption("context")+"/";
108 87 jones
  }
109 1716 berkley
110 87 jones
  /**
111 1716 berkley
   * @see transformXMLDocument(String doc, String sourceType,
112
   *            String targetType, String qformat, PrintWriter pw,
113
   *            String sessionid)
114
   */
115
  public void transformXMLDocument(String doc, String sourceType,
116 1717 berkley
                String targetType, String qformat, PrintWriter pw,
117
                Hashtable param)
118 1716 berkley
  {
119 1717 berkley
    transformXMLDocument(doc, sourceType, targetType, qformat, pw, param, null);
120 1716 berkley
  }
121
122 2088 tao
   /**
123
   * @see transformXMLDocument(String doc, String sourceType,
124
   *            String targetType, String qFormat, StringWriter pw
125
   *            String sessionid)
126
   */
127
  public void transformXMLDocument(String doc, String sourceType,
128
                String targetType, String qFormat, StringWriter pw)
129
  {
130
    transformXMLDocument(doc, sourceType, targetType, qFormat, pw, null);
131
  }
132
133
134 1716 berkley
  /**
135 87 jones
   * Transform an XML document using the stylesheet reference from the db
136
   *
137
   * @param doc the document to be transformed
138
   * @param sourcetype the document type of the source
139
   * @param targettype the target document type
140 832 jones
   * @param qformat the name of the style set to use
141
   * @param pw the PrintWriter to which output is printed
142 1664 tao
   * @param params some parameters for eml2 transformation
143 87 jones
   */
144 1716 berkley
  public void transformXMLDocument(String doc, String sourceType,
145
                                   String targetType, String qformat,
146
                                   PrintWriter pw, Hashtable param,
147
                                   String sessionid)
148 1664 tao
 {
149 1716 berkley
150 87 jones
    // Look up the stylesheet for this type combination
151 941 tao
    String xslSystemId = getStyleSystemId(qformat, sourceType, targetType);
152 2088 tao
    if (xslSystemId != null)
153
    {
154
      try
155
      {// Create a stylesheet from the system id that was found
156
        doc = removeDOCTYPE(doc);
157
        StringReader xml = new StringReader(doc);
158
        StreamResult result = new StreamResult(pw);
159
        doTransform(xml, result, xslSystemId, param, qformat, sessionid);
160 87 jones
161 2088 tao
      }
162
      catch (Exception e)
163
      {
164 941 tao
        pw.println(xslSystemId + "Error transforming document in " +
165 675 berkley
                   "DBTransform.transformXMLDocument: " +
166 87 jones
                   e.getMessage());
167 1716 berkley
168 87 jones
      }
169 2088 tao
    }
170
    else
171
    {
172 1716 berkley
      // No stylesheet registered form this document type, so just return the
173 87 jones
      // XML stream we were passed
174 100 jones
      pw.print(doc);
175 87 jones
    }
176
  }
177 1716 berkley
178 941 tao
  /**
179 1716 berkley
   * Transform an XML document to StringWriter using the stylesheet reference
180 941 tao
   * from the db
181
   * @param doc the document to be transformed
182
   * @param sourceType the document type of the source
183
   * @param targetType the target document type
184
   * @param qFormat the name of the style set to use
185
   * @param pw the StringWriter to which output will be stored
186
   */
187
  public void transformXMLDocument(String doc, String sourceType,
188 1716 berkley
                String targetType, String qFormat, StringWriter pw,
189 2088 tao
                String sessionid)
190
  {
191 941 tao
192
    // Look up the stylesheet for this type combination
193
    String xslSystemId = getStyleSystemId(qFormat, sourceType, targetType);
194 2088 tao
    if (xslSystemId != null)
195
    {
196 941 tao
      // Create a stylesheet from the system id that was found
197 2088 tao
      try
198
      {
199 1903 tao
        doc = removeDOCTYPE(doc);
200 2088 tao
        StringReader xml = new StringReader(doc);
201
        StreamResult result = new StreamResult(pw);
202
        doTransform(xml, result, xslSystemId, null, qFormat, sessionid);
203
      }
204
      catch (Exception e)
205
      {
206 2663 sgarg
        logMetacat.error(xslSystemId + "Error transforming document in " +
207 941 tao
                   "DBTransform.transformXMLDocument: " +
208 2663 sgarg
                   e.getMessage());
209 1716 berkley
210 941 tao
      }
211 2088 tao
    }
212
    else
213
    {
214 1716 berkley
      // No stylesheet registered form this document type, so just return the
215 941 tao
      // XML stream we were passed
216 950 tao
      pw.write(doc);
217 941 tao
    }
218
  }
219 1716 berkley
220 906 berkley
  /**
221 2088 tao
   * Method to do transform for a string reader
222
   * @param doc the document to be transformed
223
   * @param sourcetype the document type of the source
224
   * @param targettype the target document type
225
   * @param qformat the name of the style set to use
226
   * @param pw the PrintWriter to which output is printed
227
   * @param params some parameters for eml2 transformation
228 1716 berkley
   */
229 2088 tao
   public void transformXMLDocument(StringReader docContent, String sourceType,
230
                                   String targetType, String qformat,
231
                                   PrintWriter pw, Hashtable param,
232
                                   String sessionid)
233
   {
234
     // Look up the stylesheet for this type combination
235
    String xslSystemId = getStyleSystemId(qformat, sourceType, targetType);
236
    if (xslSystemId != null)
237
    {
238
      try
239
      {// Create a stylesheet from the system id that was found
240
        StreamResult result = new StreamResult(pw);
241
        doTransform(docContent, result, xslSystemId, param, qformat, sessionid);
242
243
      }
244
      catch (Exception e)
245
      {
246
        pw.println(xslSystemId + "Error transforming document in " +
247
                   "DBTransform.transformXMLDocument: " +
248
                   e.getMessage());
249
250
      }
251
    }
252
    else
253
    {
254
      // No stylesheet registered form this document type, so just return the
255
      // XML stream we were passed
256
      pw.print(docContent);
257
    }
258
   }
259
260 3675 barteau
  /**
261
   * Reads skin's config file if it exists, and populates Transformer paramaters
262
   * with its contents.
263
   * It then adds the parameters passed to it via Hashtable param to the Transformer.
264
   * It then calls the Transformer.transform method.
265 2088 tao
   */
266 3675 barteau
  private void doTransform(StringReader docContent,
267
          StreamResult resultOutput,
268
          String xslSystemId,
269
          Hashtable param,
270
          String qformat,
271
          String sessionid)
272
          throws Exception {
273
274
      Properties skinOptions;
275
      TransformerFactory tFactory;
276
      Transformer transformer;
277
      String key, value;
278
      StreamSource xml;
279
      Enumeration en;
280
      Iterator iterIt;
281
      Map.Entry entry;
282
283
      if (xslSystemId != null) {
284
        tFactory = TransformerFactory.newInstance();
285
        transformer = tFactory.newTransformer(new StreamSource(xslSystemId));
286
287 2088 tao
        transformer.setParameter("qformat", qformat);
288 2663 sgarg
        logMetacat.warn("qformat: "+qformat);
289 2893 sgarg
290 3675 barteau
        if (MetaCatUtil.hasSkinConfig(qformat)) {
291
            skinOptions = MetaCatUtil.getSkinConfig(qformat);
292 2897 sgarg
293 3675 barteau
            iterIt = skinOptions.entrySet().iterator();
294
            while (iterIt.hasNext()) {
295
                entry = (Entry) iterIt.next();
296
                key = (String) entry.getKey();
297
                value = (String) entry.getValue();
298
                transformer.setParameter(key, value);
299
            }
300
        }
301 2893 sgarg
302 3675 barteau
        if (sessionid != null && !sessionid.equals("null")) {
303 2088 tao
          transformer.setParameter("sessid", sessionid);
304
        }
305 3675 barteau
306 3733 leinfelder
        //set up the default params (can be overridden by the passed in params)
307
        String cgiPrefix = MetaCatUtil.getOption("cgi-prefix");
308 3736 leinfelder
        logMetacat.debug("cgi-prefix=" + cgiPrefix);
309
        logMetacat.debug("contextURL=" + contextURL);
310 3733 leinfelder
        transformer.setParameter("cgi-prefix", cgiPrefix);
311
        transformer.setParameter("contextURL", contextURL);
312
313 2088 tao
        // Set up parameter for transformation
314 3675 barteau
        if ( param != null) {
315
          en = param.keys();
316
          while (en.hasMoreElements()) {
317
            key = (String) en.nextElement();
318
            value = ((String[]) (param.get(key)))[0];
319 2668 sgarg
            logMetacat.info(key+" : "+value);
320 2088 tao
            transformer.setParameter(key, value);
321
          }
322
        }
323 3675 barteau
        xml = new StreamSource(docContent);
324 2088 tao
        transformer.transform(xml,resultOutput);
325
    }
326
  }//doTransform
327
328
329 1716 berkley
  /**
330 906 berkley
   * gets the content of a tag in a given xml file with the given path
331
   * @param f the file to parse
332
   * @param path the path to get the content from
333
   */
334 1716 berkley
  public static NodeList getPathContent(File f, String path)
335 906 berkley
  {
336
    if(f == null)
337
    {
338
      return null;
339
    }
340 1716 berkley
341 906 berkley
    DOMParser parser = new DOMParser();
342
    InputSource in;
343
    FileInputStream fs;
344 1716 berkley
345 906 berkley
    try
346 1716 berkley
    {
347 906 berkley
      fs = new FileInputStream(f);
348
      in = new InputSource(fs);
349
    }
350
    catch(FileNotFoundException fnf)
351
    {
352
      fnf.printStackTrace();
353
      return null;
354
    }
355 1716 berkley
356 906 berkley
    try
357
    {
358
      parser.parse(in);
359
      fs.close();
360
    }
361
    catch(Exception e1)
362
    {
363 1716 berkley
      System.err.println("File: " + f.getPath() + " : parse threw: " +
364 906 berkley
                         e1.toString());
365
      return null;
366
    }
367 1716 berkley
368 906 berkley
    Document doc = parser.getDocument();
369 1716 berkley
370 906 berkley
    try
371
    {
372
      NodeList docNodeList = XPathAPI.selectNodeList(doc, path);
373
      return docNodeList;
374
    }
375
    catch(Exception se)
376
    {
377 1716 berkley
      System.err.println("file: " + f.getPath() + " : parse threw: " +
378 906 berkley
                         se.toString());
379
      return null;
380
    }
381
  }
382 87 jones
383
  /**
384
   * Lookup a stylesheet reference from the db catalog
385
   *
386 832 jones
   * @param qformat    the named style-set format
387
   * @param sourcetype the document type of the source
388
   * @param targettype the document type of the target
389
   */
390 1716 berkley
  public String getStyleSystemId(String qformat, String sourcetype,
391 832 jones
                String targettype) {
392
    String systemId = null;
393
394
    if ((qformat == null) || (qformat.equals("html"))) {
395
      qformat = defaultStyle;
396
    }
397
398
    // Load the style-set map for this qformat into a DOM
399
    try {
400 906 berkley
      boolean breakflag = false;
401 1929 brooke
      String filename = configDir + "/" + qformat + "/" + qformat + ".xml";
402 2663 sgarg
      logMetacat.warn("Trying style-set file: " + filename);
403 906 berkley
      File f = new File(filename);
404
      NodeList nlDoctype = getPathContent(f, "/style-set/doctype");
405
      NodeList nlDefault = getPathContent(f, "/style-set/default-style");
406
      Node nDefault = nlDefault.item(0);
407
      systemId = nDefault.getFirstChild().getNodeValue(); //set the default
408 1716 berkley
409 906 berkley
      for(int i=0; i<nlDoctype.getLength(); i++)
410
      { //look for the right sourcetype
411
        Node nDoctype = nlDoctype.item(i);
412
        NamedNodeMap atts = nDoctype.getAttributes();
413
        Node nAtt = atts.getNamedItem("publicid");
414
        String doctype = nAtt.getFirstChild().getNodeValue();
415
        if(doctype.equals(sourcetype))
416
        { //found the right sourcetype now we need to get the target type
417
          NodeList nlChildren = nDoctype.getChildNodes();
418
          for(int j=0; j<nlChildren.getLength(); j++)
419
          {
420
            Node nChild = nlChildren.item(j);
421
            String childName = nChild.getNodeName();
422
            if(childName.equals("target"))
423
            {
424
              NamedNodeMap childAtts = nChild.getAttributes();
425
              Node nTargetPublicId = childAtts.getNamedItem("publicid");
426
              String target = nTargetPublicId.getFirstChild().getNodeValue();
427
              if(target.equals(targettype))
428
              { //we found the right target type
429
                NodeList nlTarget = nChild.getChildNodes();
430
                for(int k=0; k<nlTarget.getLength(); k++)
431
                {
432
                  Node nChildText = nlTarget.item(k);
433
                  if(nChildText.getNodeType() == Node.TEXT_NODE)
434
                  { //get the text from the target node
435
                    systemId = nChildText.getNodeValue();
436
                    breakflag = true;
437
                    break;
438
                  }
439
                }
440 832 jones
              }
441
            }
442 1716 berkley
443 906 berkley
            if(breakflag)
444
            {
445
              break;
446
            }
447 832 jones
          }
448
        }
449 1716 berkley
450 906 berkley
        if(breakflag)
451
        {
452
          break;
453
        }
454 832 jones
      }
455
    }
456 906 berkley
    catch(Exception e)
457
    {
458
      System.out.println("Error parsing style-set file: " + e.getMessage());
459
      e.printStackTrace();
460
    }
461 3725 tao
462
    //Check if the systemId is relative path, add a postfix - the contextULR to systemID.
463
    if (systemId != null && systemId.indexOf("http://" ) == -1)
464
    {
465
    	systemId = contextURL+systemId;
466
    }
467 832 jones
    // Return the system ID for this particular source document type
468 2663 sgarg
    logMetacat.info("style system id is: "+systemId);
469 832 jones
    return systemId;
470
  }
471
472 1896 tao
 /* Method to modified the system id of xml input -- make sure it
473
    points to system id in xml_catalog table
474
  */
475 2088 tao
  private void modifiedXmlStreamSource(StreamSource xml, String publicId)
476 1896 tao
                                       throws Exception
477
  {
478
    // make sure the xml is not null
479
    if (xml == null || publicId == null)
480
    {
481
      return;
482 87 jones
    }
483 2663 sgarg
    logMetacat.info("public id of input stream is " +publicId);
484 1896 tao
    // Get system id from xml_catalog table
485
    String systemId = DBEntityResolver.getDTDSystemID(publicId);
486 2663 sgarg
    logMetacat.info("system id of input stream from xml_catalog"
487
                               +"table is " +systemId);
488 1896 tao
    //set system id to input stream
489
    xml.setSystemId(systemId);
490 87 jones
  }
491 2088 tao
492 1903 tao
  /*
493
   * removes the DOCTYPE element and its contents from a Sting
494 2088 tao
   * used to avoid problems with incorrect SystemIDs
495 1903 tao
   */
496 2088 tao
  private String removeDOCTYPE(String in) {
497 1903 tao
    String ret = "";
498
    int startpos = in.indexOf("<!DOCTYPE");
499
    if (startpos>-1) {
500 2088 tao
      int stoppos = in.indexOf(">", startpos + 8);
501 1903 tao
      ret = in.substring(0,startpos) + in.substring(stoppos+1,in.length());
502
    } else {
503
      return in;
504
    }
505 2088 tao
    return ret;
506 1903 tao
  }
507 99 jones
508
  /**
509
   * the main routine used to test the transform utility.
510
   *
511 184 jones
   * Usage: java DBTransform
512 99 jones
   */
513
  static public void main(String[] args) {
514 1716 berkley
515 184 jones
     if (args.length > 0)
516 99 jones
     {
517
        System.err.println("Wrong number of arguments!!!");
518 184 jones
        System.err.println("USAGE: java DBTransform");
519 99 jones
        return;
520
     } else {
521
        try {
522 1716 berkley
523 99 jones
          // Open a connection to the database
524 1217 tao
          /*MetaCatUtil   util = new MetaCatUtil();
525
          Connection dbconn = util.openDBConnection();*/
526 99 jones
527
          // Create a test document
528
          StringBuffer testdoc = new StringBuffer();
529
          testdoc.append("<?xml version=\"1.0\"?>");
530
          testdoc.append("<eml-dataset><metafile_id>NCEAS-0001</metafile_id>");
531
          testdoc.append("<dataset_id>DS001</dataset_id>");
532
          testdoc.append("<title>My test doc</title></eml-dataset>");
533
534
          // Transform the document to the new doctype
535 1217 tao
          DBTransform dbt = new DBTransform();
536 1716 berkley
          dbt.transformXMLDocument(testdoc.toString(),
537
                                   "-//NCEAS//eml-dataset//EN",
538
                                   "-//W3C//HTML//EN",
539 832 jones
                                   "knb",
540 1664 tao
                                   new PrintWriter(System.out), null);
541 99 jones
542
        } catch (Exception e) {
543
          System.err.println("EXCEPTION HANDLING REQUIRED");
544
          System.err.println(e.getMessage());
545
          e.printStackTrace(System.err);
546
        }
547
     }
548
  }
549 1716 berkley
550 184 jones
  private void dbg(int position) {
551 99 jones
    System.err.println("Debug flag: " + position);
552
  }
553
554 87 jones
}