Project

General

Profile

1 87 jones
/**
2 203 jones
 *  '$RCSfile$'
3
 *    Purpose: A Class that transforms an XML text document
4
 *             into a another type using XSL
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Matt Jones
8 87 jones
 *
9 1929 brooke
 * '$Author$'
10
 * '$Date$'
11 203 jones
 * '$Revision$'
12 669 jones
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26 87 jones
 */
27
28
package edu.ucsb.nceas.metacat;
29
30
import java.io.*;
31
import java.net.URL;
32
import java.net.MalformedURLException;
33
import java.sql.*;
34 1688 tao
import java.util.Enumeration;
35 1664 tao
import java.util.Hashtable;
36 3675 barteau
import java.util.Iterator;
37
import java.util.Map;
38
import java.util.Map.Entry;
39 87 jones
import java.util.Stack;
40
41 906 berkley
import javax.xml.transform.TransformerFactory;
42
import javax.xml.transform.Transformer;
43
import javax.xml.transform.stream.StreamSource;
44
import javax.xml.transform.stream.StreamResult;
45
import javax.xml.transform.TransformerException;
46
import javax.xml.transform.TransformerConfigurationException;
47
48 2663 sgarg
import org.apache.log4j.Logger;
49 906 berkley
import org.apache.xerces.parsers.DOMParser;
50
import org.w3c.dom.Attr;
51
import org.w3c.dom.NamedNodeMap;
52
import org.w3c.dom.NodeList;
53
import org.w3c.dom.Document;
54
import org.w3c.dom.Node;
55
import org.w3c.dom.DocumentType;
56
import org.xml.sax.SAXException;
57
import org.xml.sax.InputSource;
58
import org.apache.xerces.dom.DocumentTypeImpl;
59
import org.apache.xpath.XPathAPI;
60
61
/*
62 87 jones
import oracle.xml.parser.v2.XSLStylesheet;
63
import oracle.xml.parser.v2.XSLException;
64 832 jones
import oracle.xml.parser.v2.XMLParseException;
65 87 jones
import oracle.xml.parser.v2.XSLProcessor;
66
import oracle.xml.parser.v2.XMLDocument;
67
import oracle.xml.parser.v2.DOMParser;
68 906 berkley
*/
69 832 jones
import org.w3c.dom.Document;
70
import org.w3c.dom.Node;
71
import org.w3c.dom.Element;
72
import org.xml.sax.SAXException;
73 87 jones
74 4080 daigle
import edu.ucsb.nceas.metacat.service.PropertyService;
75 4327 leinfelder
import edu.ucsb.nceas.metacat.service.SkinPropertyService;
76 4080 daigle
import edu.ucsb.nceas.metacat.util.MetaCatUtil;
77
import edu.ucsb.nceas.metacat.util.SystemUtil;
78
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
79 4327 leinfelder
import edu.ucsb.nceas.utilities.SortedProperties;
80 4080 daigle
81 2896 sgarg
import java.util.Properties;
82 2893 sgarg
83 2912 harris
84 1716 berkley
/**
85 87 jones
 * A Class that transforms XML documents utitlizing XSL style sheets
86
 */
87
public class DBTransform {
88
89 1217 tao
  //private Connection	conn = null;
90 832 jones
  private MetaCatUtil   util = null;
91
  private String 	configDir = null;
92
  private String	defaultStyle = null;
93 2663 sgarg
  private Logger logMetacat = Logger.getLogger(DBTransform.class);
94 3780 daigle
  private String httpServer = null;
95 3725 tao
  private String contextURL = null;
96 3780 daigle
  private String servletURL = null;
97 2663 sgarg
98 87 jones
  /**
99
   * construct a DBTransform instance.
100
   *
101
   * Generally, one calls transformXMLDocument() after constructing the instance
102
   *
103
   * @param conn the database connection from which to lookup the public ids
104
   */
105 1716 berkley
  public DBTransform()
106
                  throws IOException,
107
                         SQLException,
108 4080 daigle
                         ClassNotFoundException,
109
                         PropertyNotFoundException
110 87 jones
  {
111 4080 daigle
    configDir = SystemUtil.getStyleSkinsDir();
112
    defaultStyle = PropertyService.getProperty("application.default-style");
113
    httpServer = SystemUtil.getServerURL();
114
    contextURL = SystemUtil.getContextURL();
115
    servletURL = SystemUtil.getServletURL();
116 87 jones
  }
117 1716 berkley
118 87 jones
  /**
119 1716 berkley
   * @see transformXMLDocument(String doc, String sourceType,
120
   *            String targetType, String qformat, PrintWriter pw,
121
   *            String sessionid)
122
   */
123
  public void transformXMLDocument(String doc, String sourceType,
124 1717 berkley
                String targetType, String qformat, PrintWriter pw,
125
                Hashtable param)
126 1716 berkley
  {
127 1717 berkley
    transformXMLDocument(doc, sourceType, targetType, qformat, pw, param, null);
128 1716 berkley
  }
129
130 2088 tao
   /**
131
   * @see transformXMLDocument(String doc, String sourceType,
132
   *            String targetType, String qFormat, StringWriter pw
133
   *            String sessionid)
134
   */
135
  public void transformXMLDocument(String doc, String sourceType,
136
                String targetType, String qFormat, StringWriter pw)
137
  {
138
    transformXMLDocument(doc, sourceType, targetType, qFormat, pw, null);
139
  }
140
141
142 1716 berkley
  /**
143 87 jones
   * Transform an XML document using the stylesheet reference from the db
144
   *
145
   * @param doc the document to be transformed
146
   * @param sourcetype the document type of the source
147
   * @param targettype the target document type
148 832 jones
   * @param qformat the name of the style set to use
149
   * @param pw the PrintWriter to which output is printed
150 1664 tao
   * @param params some parameters for eml2 transformation
151 87 jones
   */
152 1716 berkley
  public void transformXMLDocument(String doc, String sourceType,
153
                                   String targetType, String qformat,
154
                                   PrintWriter pw, Hashtable param,
155
                                   String sessionid)
156 1664 tao
 {
157 1716 berkley
158 87 jones
    // Look up the stylesheet for this type combination
159 941 tao
    String xslSystemId = getStyleSystemId(qformat, sourceType, targetType);
160 2088 tao
    if (xslSystemId != null)
161
    {
162
      try
163
      {// Create a stylesheet from the system id that was found
164
        doc = removeDOCTYPE(doc);
165
        StringReader xml = new StringReader(doc);
166
        StreamResult result = new StreamResult(pw);
167
        doTransform(xml, result, xslSystemId, param, qformat, sessionid);
168 87 jones
169 2088 tao
      }
170
      catch (Exception e)
171
      {
172 4080 daigle
        pw.println(xslSystemId + ": Error transforming document in " +
173 675 berkley
                   "DBTransform.transformXMLDocument: " +
174 87 jones
                   e.getMessage());
175 1716 berkley
176 87 jones
      }
177 2088 tao
    }
178
    else
179
    {
180 1716 berkley
      // No stylesheet registered form this document type, so just return the
181 87 jones
      // XML stream we were passed
182 100 jones
      pw.print(doc);
183 87 jones
    }
184
  }
185 1716 berkley
186 941 tao
  /**
187 1716 berkley
   * Transform an XML document to StringWriter using the stylesheet reference
188 941 tao
   * from the db
189
   * @param doc the document to be transformed
190
   * @param sourceType the document type of the source
191
   * @param targetType the target document type
192
   * @param qFormat the name of the style set to use
193
   * @param pw the StringWriter to which output will be stored
194
   */
195
  public void transformXMLDocument(String doc, String sourceType,
196 1716 berkley
                String targetType, String qFormat, StringWriter pw,
197 2088 tao
                String sessionid)
198
  {
199 941 tao
200
    // Look up the stylesheet for this type combination
201
    String xslSystemId = getStyleSystemId(qFormat, sourceType, targetType);
202 2088 tao
    if (xslSystemId != null)
203
    {
204 941 tao
      // Create a stylesheet from the system id that was found
205 2088 tao
      try
206
      {
207 1903 tao
        doc = removeDOCTYPE(doc);
208 2088 tao
        StringReader xml = new StringReader(doc);
209
        StreamResult result = new StreamResult(pw);
210
        doTransform(xml, result, xslSystemId, null, qFormat, sessionid);
211
      }
212
      catch (Exception e)
213
      {
214 3780 daigle
        logMetacat.error(xslSystemId + ": Error transforming document in " +
215 941 tao
                   "DBTransform.transformXMLDocument: " +
216 2663 sgarg
                   e.getMessage());
217 1716 berkley
218 941 tao
      }
219 2088 tao
    }
220
    else
221
    {
222 1716 berkley
      // No stylesheet registered form this document type, so just return the
223 941 tao
      // XML stream we were passed
224 950 tao
      pw.write(doc);
225 941 tao
    }
226
  }
227 1716 berkley
228 906 berkley
  /**
229 2088 tao
   * Method to do transform for a string reader
230
   * @param doc the document to be transformed
231
   * @param sourcetype the document type of the source
232
   * @param targettype the target document type
233
   * @param qformat the name of the style set to use
234
   * @param pw the PrintWriter to which output is printed
235
   * @param params some parameters for eml2 transformation
236 1716 berkley
   */
237 2088 tao
   public void transformXMLDocument(StringReader docContent, String sourceType,
238
                                   String targetType, String qformat,
239
                                   PrintWriter pw, Hashtable param,
240
                                   String sessionid)
241
   {
242
     // Look up the stylesheet for this type combination
243
    String xslSystemId = getStyleSystemId(qformat, sourceType, targetType);
244
    if (xslSystemId != null)
245
    {
246
      try
247
      {// Create a stylesheet from the system id that was found
248
        StreamResult result = new StreamResult(pw);
249
        doTransform(docContent, result, xslSystemId, param, qformat, sessionid);
250
251
      }
252
      catch (Exception e)
253
      {
254 4080 daigle
        pw.println(xslSystemId + ": Error transforming document in " +
255 2088 tao
                   "DBTransform.transformXMLDocument: " +
256
                   e.getMessage());
257
258
      }
259
    }
260
    else
261
    {
262
      // No stylesheet registered form this document type, so just return the
263
      // XML stream we were passed
264
      pw.print(docContent);
265
    }
266
   }
267
268 3675 barteau
  /**
269
   * Reads skin's config file if it exists, and populates Transformer paramaters
270
   * with its contents.
271
   * It then adds the parameters passed to it via Hashtable param to the Transformer.
272
   * It then calls the Transformer.transform method.
273 2088 tao
   */
274 3675 barteau
  private void doTransform(StringReader docContent,
275
          StreamResult resultOutput,
276
          String xslSystemId,
277
          Hashtable param,
278
          String qformat,
279
          String sessionid)
280
          throws Exception {
281
282 4327 leinfelder
      SortedProperties skinOptions;
283 3675 barteau
      TransformerFactory tFactory;
284
      Transformer transformer;
285
      String key, value;
286
      StreamSource xml;
287
      Enumeration en;
288
      Iterator iterIt;
289
      Map.Entry entry;
290
291
      if (xslSystemId != null) {
292
        tFactory = TransformerFactory.newInstance();
293
        transformer = tFactory.newTransformer(new StreamSource(xslSystemId));
294
295 2088 tao
        transformer.setParameter("qformat", qformat);
296 2663 sgarg
        logMetacat.warn("qformat: "+qformat);
297 2893 sgarg
298 4327 leinfelder
        skinOptions = SkinPropertyService.getProperties(qformat);
299
        if (skinOptions != null) {
300
            iterIt = skinOptions.getProperties().entrySet().iterator();
301 3675 barteau
            while (iterIt.hasNext()) {
302
                entry = (Entry) iterIt.next();
303
                key = (String) entry.getKey();
304
                value = (String) entry.getValue();
305 4327 leinfelder
                //only include the plain properties
306
                if (key.indexOf('.') == -1) {
307
                	transformer.setParameter(key, value);
308
                }
309 3675 barteau
            }
310
        }
311 2893 sgarg
312 3675 barteau
        if (sessionid != null && !sessionid.equals("null")) {
313 2088 tao
          transformer.setParameter("sessid", sessionid);
314
        }
315 3675 barteau
316 3733 leinfelder
        //set up the default params (can be overridden by the passed in params)
317 4080 daigle
        String cgiPrefix = SystemUtil.getCGI_URL();
318 3736 leinfelder
        logMetacat.debug("cgi-prefix=" + cgiPrefix);
319 3780 daigle
        logMetacat.debug("httpServer=" + httpServer);
320 3736 leinfelder
        logMetacat.debug("contextURL=" + contextURL);
321 3780 daigle
        logMetacat.debug("serletPath=" + servletURL);
322 3733 leinfelder
        transformer.setParameter("cgi-prefix", cgiPrefix);
323 3780 daigle
        transformer.setParameter("httpServer", httpServer);
324 3733 leinfelder
        transformer.setParameter("contextURL", contextURL);
325 3780 daigle
        transformer.setParameter("servletURL", servletURL);
326 3733 leinfelder
327 2088 tao
        // Set up parameter for transformation
328 3675 barteau
        if ( param != null) {
329
          en = param.keys();
330
          while (en.hasMoreElements()) {
331
            key = (String) en.nextElement();
332
            value = ((String[]) (param.get(key)))[0];
333 2668 sgarg
            logMetacat.info(key+" : "+value);
334 2088 tao
            transformer.setParameter(key, value);
335
          }
336
        }
337 3675 barteau
        xml = new StreamSource(docContent);
338 2088 tao
        transformer.transform(xml,resultOutput);
339
    }
340
  }//doTransform
341
342
343 1716 berkley
  /**
344 906 berkley
   * gets the content of a tag in a given xml file with the given path
345
   * @param f the file to parse
346
   * @param path the path to get the content from
347
   */
348 1716 berkley
  public static NodeList getPathContent(File f, String path)
349 906 berkley
  {
350
    if(f == null)
351
    {
352
      return null;
353
    }
354 1716 berkley
355 906 berkley
    DOMParser parser = new DOMParser();
356
    InputSource in;
357
    FileInputStream fs;
358 1716 berkley
359 906 berkley
    try
360 1716 berkley
    {
361 906 berkley
      fs = new FileInputStream(f);
362
      in = new InputSource(fs);
363
    }
364
    catch(FileNotFoundException fnf)
365
    {
366
      fnf.printStackTrace();
367
      return null;
368
    }
369 1716 berkley
370 906 berkley
    try
371
    {
372
      parser.parse(in);
373
      fs.close();
374
    }
375
    catch(Exception e1)
376
    {
377 1716 berkley
      System.err.println("File: " + f.getPath() + " : parse threw: " +
378 906 berkley
                         e1.toString());
379
      return null;
380
    }
381 1716 berkley
382 906 berkley
    Document doc = parser.getDocument();
383 1716 berkley
384 906 berkley
    try
385
    {
386
      NodeList docNodeList = XPathAPI.selectNodeList(doc, path);
387
      return docNodeList;
388
    }
389
    catch(Exception se)
390
    {
391 1716 berkley
      System.err.println("file: " + f.getPath() + " : parse threw: " +
392 906 berkley
                         se.toString());
393
      return null;
394
    }
395
  }
396 87 jones
397
  /**
398
   * Lookup a stylesheet reference from the db catalog
399
   *
400 832 jones
   * @param qformat    the named style-set format
401
   * @param sourcetype the document type of the source
402
   * @param targettype the document type of the target
403
   */
404 1716 berkley
  public String getStyleSystemId(String qformat, String sourcetype,
405 832 jones
                String targettype) {
406
    String systemId = null;
407
408
    if ((qformat == null) || (qformat.equals("html"))) {
409
      qformat = defaultStyle;
410
    }
411
412
    // Load the style-set map for this qformat into a DOM
413
    try {
414 906 berkley
      boolean breakflag = false;
415 1929 brooke
      String filename = configDir + "/" + qformat + "/" + qformat + ".xml";
416 2663 sgarg
      logMetacat.warn("Trying style-set file: " + filename);
417 906 berkley
      File f = new File(filename);
418
      NodeList nlDoctype = getPathContent(f, "/style-set/doctype");
419
      NodeList nlDefault = getPathContent(f, "/style-set/default-style");
420
      Node nDefault = nlDefault.item(0);
421
      systemId = nDefault.getFirstChild().getNodeValue(); //set the default
422 1716 berkley
423 906 berkley
      for(int i=0; i<nlDoctype.getLength(); i++)
424
      { //look for the right sourcetype
425
        Node nDoctype = nlDoctype.item(i);
426
        NamedNodeMap atts = nDoctype.getAttributes();
427
        Node nAtt = atts.getNamedItem("publicid");
428
        String doctype = nAtt.getFirstChild().getNodeValue();
429
        if(doctype.equals(sourcetype))
430
        { //found the right sourcetype now we need to get the target type
431
          NodeList nlChildren = nDoctype.getChildNodes();
432
          for(int j=0; j<nlChildren.getLength(); j++)
433
          {
434
            Node nChild = nlChildren.item(j);
435
            String childName = nChild.getNodeName();
436
            if(childName.equals("target"))
437
            {
438
              NamedNodeMap childAtts = nChild.getAttributes();
439
              Node nTargetPublicId = childAtts.getNamedItem("publicid");
440
              String target = nTargetPublicId.getFirstChild().getNodeValue();
441
              if(target.equals(targettype))
442
              { //we found the right target type
443
                NodeList nlTarget = nChild.getChildNodes();
444
                for(int k=0; k<nlTarget.getLength(); k++)
445
                {
446
                  Node nChildText = nlTarget.item(k);
447
                  if(nChildText.getNodeType() == Node.TEXT_NODE)
448
                  { //get the text from the target node
449
                    systemId = nChildText.getNodeValue();
450
                    breakflag = true;
451
                    break;
452
                  }
453
                }
454 832 jones
              }
455
            }
456 1716 berkley
457 906 berkley
            if(breakflag)
458
            {
459
              break;
460
            }
461 832 jones
          }
462
        }
463 1716 berkley
464 906 berkley
        if(breakflag)
465
        {
466
          break;
467
        }
468 832 jones
      }
469
    }
470 906 berkley
    catch(Exception e)
471
    {
472
      System.out.println("Error parsing style-set file: " + e.getMessage());
473
      e.printStackTrace();
474
    }
475 3725 tao
476
    //Check if the systemId is relative path, add a postfix - the contextULR to systemID.
477
    if (systemId != null && systemId.indexOf("http://" ) == -1)
478
    {
479
    	systemId = contextURL+systemId;
480
    }
481 832 jones
    // Return the system ID for this particular source document type
482 2663 sgarg
    logMetacat.info("style system id is: "+systemId);
483 832 jones
    return systemId;
484
  }
485
486 1896 tao
 /* Method to modified the system id of xml input -- make sure it
487
    points to system id in xml_catalog table
488
  */
489 2088 tao
  private void modifiedXmlStreamSource(StreamSource xml, String publicId)
490 1896 tao
                                       throws Exception
491
  {
492
    // make sure the xml is not null
493
    if (xml == null || publicId == null)
494
    {
495
      return;
496 87 jones
    }
497 2663 sgarg
    logMetacat.info("public id of input stream is " +publicId);
498 1896 tao
    // Get system id from xml_catalog table
499
    String systemId = DBEntityResolver.getDTDSystemID(publicId);
500 2663 sgarg
    logMetacat.info("system id of input stream from xml_catalog"
501
                               +"table is " +systemId);
502 1896 tao
    //set system id to input stream
503
    xml.setSystemId(systemId);
504 87 jones
  }
505 2088 tao
506 1903 tao
  /*
507
   * removes the DOCTYPE element and its contents from a Sting
508 2088 tao
   * used to avoid problems with incorrect SystemIDs
509 1903 tao
   */
510 2088 tao
  private String removeDOCTYPE(String in) {
511 1903 tao
    String ret = "";
512
    int startpos = in.indexOf("<!DOCTYPE");
513
    if (startpos>-1) {
514 2088 tao
      int stoppos = in.indexOf(">", startpos + 8);
515 1903 tao
      ret = in.substring(0,startpos) + in.substring(stoppos+1,in.length());
516
    } else {
517
      return in;
518
    }
519 2088 tao
    return ret;
520 1903 tao
  }
521 99 jones
522
  /**
523
   * the main routine used to test the transform utility.
524
   *
525 184 jones
   * Usage: java DBTransform
526 99 jones
   */
527
  static public void main(String[] args) {
528 1716 berkley
529 184 jones
     if (args.length > 0)
530 99 jones
     {
531
        System.err.println("Wrong number of arguments!!!");
532 184 jones
        System.err.println("USAGE: java DBTransform");
533 99 jones
        return;
534
     } else {
535
        try {
536 1716 berkley
537 99 jones
          // Create a test document
538
          StringBuffer testdoc = new StringBuffer();
539
          testdoc.append("<?xml version=\"1.0\"?>");
540
          testdoc.append("<eml-dataset><metafile_id>NCEAS-0001</metafile_id>");
541
          testdoc.append("<dataset_id>DS001</dataset_id>");
542
          testdoc.append("<title>My test doc</title></eml-dataset>");
543
544
          // Transform the document to the new doctype
545 1217 tao
          DBTransform dbt = new DBTransform();
546 1716 berkley
          dbt.transformXMLDocument(testdoc.toString(),
547
                                   "-//NCEAS//eml-dataset//EN",
548
                                   "-//W3C//HTML//EN",
549 832 jones
                                   "knb",
550 1664 tao
                                   new PrintWriter(System.out), null);
551 99 jones
552
        } catch (Exception e) {
553
          System.err.println("EXCEPTION HANDLING REQUIRED");
554
          System.err.println(e.getMessage());
555
          e.printStackTrace(System.err);
556
        }
557
     }
558
  }
559 1716 berkley
560 184 jones
  private void dbg(int position) {
561 99 jones
    System.err.println("Debug flag: " + position);
562
  }
563
564 87 jones
}