Project

General

Profile

1 87 jones
/**
2 203 jones
 *  '$RCSfile$'
3
 *    Purpose: A Class that transforms an XML text document
4
 *             into a another type using XSL
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Matt Jones
8 87 jones
 *
9 1929 brooke
 * '$Author$'
10
 * '$Date$'
11 203 jones
 * '$Revision$'
12 669 jones
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26 87 jones
 */
27
28
package edu.ucsb.nceas.metacat;
29
30 5752 leinfelder
import java.io.File;
31
import java.io.FileInputStream;
32
import java.io.FileNotFoundException;
33
import java.io.IOException;
34
import java.io.OutputStreamWriter;
35
import java.io.StringReader;
36
import java.io.StringWriter;
37
import java.io.Writer;
38
import java.sql.SQLException;
39 1688 tao
import java.util.Enumeration;
40 1664 tao
import java.util.Hashtable;
41 3675 barteau
import java.util.Iterator;
42
import java.util.Map;
43 87 jones
44 906 berkley
import javax.xml.transform.TransformerFactory;
45
import javax.xml.transform.Transformer;
46
import javax.xml.transform.stream.StreamSource;
47
import javax.xml.transform.stream.StreamResult;
48
49 2663 sgarg
import org.apache.log4j.Logger;
50 906 berkley
import org.apache.xerces.parsers.DOMParser;
51
import org.w3c.dom.NamedNodeMap;
52
import org.w3c.dom.NodeList;
53
import org.w3c.dom.Document;
54
import org.w3c.dom.Node;
55
import org.xml.sax.InputSource;
56
import org.apache.xpath.XPathAPI;
57
58 5030 daigle
import edu.ucsb.nceas.metacat.properties.PropertyService;
59
import edu.ucsb.nceas.metacat.properties.SkinPropertyService;
60 4080 daigle
import edu.ucsb.nceas.metacat.util.SystemUtil;
61
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
62 4327 leinfelder
import edu.ucsb.nceas.utilities.SortedProperties;
63 4080 daigle
64 1716 berkley
/**
65 87 jones
 * A Class that transforms XML documents utitlizing XSL style sheets
66
 */
67
public class DBTransform {
68
69 5025 daigle
  // private Connection	conn = null;
70
  // private MetacatUtil   util = null;
71 832 jones
  private String 	configDir = null;
72
  private String	defaultStyle = null;
73 2663 sgarg
  private Logger logMetacat = Logger.getLogger(DBTransform.class);
74 3780 daigle
  private String httpServer = null;
75 3725 tao
  private String contextURL = null;
76 3780 daigle
  private String servletURL = null;
77 2663 sgarg
78 87 jones
  /**
79
   * construct a DBTransform instance.
80
   *
81
   * Generally, one calls transformXMLDocument() after constructing the instance
82
   *
83
   * @param conn the database connection from which to lookup the public ids
84
   */
85 1716 berkley
  public DBTransform()
86
                  throws IOException,
87
                         SQLException,
88 4080 daigle
                         ClassNotFoundException,
89
                         PropertyNotFoundException
90 87 jones
  {
91 4080 daigle
    configDir = SystemUtil.getStyleSkinsDir();
92
    defaultStyle = PropertyService.getProperty("application.default-style");
93
    httpServer = SystemUtil.getServerURL();
94
    contextURL = SystemUtil.getContextURL();
95
    servletURL = SystemUtil.getServletURL();
96 87 jones
  }
97 1716 berkley
98 87 jones
  /**
99
   * Transform an XML document using the stylesheet reference from the db
100
   *
101
   * @param doc the document to be transformed
102
   * @param sourcetype the document type of the source
103
   * @param targettype the target document type
104 832 jones
   * @param qformat the name of the style set to use
105
   * @param pw the PrintWriter to which output is printed
106 1664 tao
   * @param params some parameters for eml2 transformation
107 87 jones
   */
108 1716 berkley
  public void transformXMLDocument(String doc, String sourceType,
109
                                   String targetType, String qformat,
110 5752 leinfelder
                                   Writer w, Hashtable<String, String[]> param,
111 1716 berkley
                                   String sessionid)
112 1664 tao
 {
113 1716 berkley
114 5752 leinfelder
	  String xslSystemId = getStyleSystemId(qformat, sourceType, targetType);
115
	  try {
116
		  // Look up the stylesheet for this type combination
117
		  if (xslSystemId != null) {
118
			// Create a stylesheet from the system id that was found
119
			doc = removeDOCTYPE(doc);
120
			StringReader xml = new StringReader(doc);
121
			StreamResult result = new StreamResult(w);
122
			doTransform(xml, result, xslSystemId, param, qformat, sessionid);
123
		  }
124
		  else {
125
			  // No stylesheet registered form this document type, so just return the
126
			  // XML stream we were passed
127
			  w.write(doc);
128
		  }
129 2088 tao
      }
130
      catch (Exception e)
131
      {
132 5752 leinfelder
    	  try {
133 7258 leinfelder
    		  String msg = xslSystemId + ": Error transforming document in " +
134
	           "DBTransform.transformXMLDocument: " +
135
	           e.getMessage();
136
    		  w.write(msg);
137
    		  w.flush();
138
    		  logMetacat.error(msg, e);
139 5752 leinfelder
		} catch (IOException e1) {
140
			logMetacat.error(e1.getMessage(), e1);
141
		}
142 1716 berkley
143 87 jones
      }
144 5752 leinfelder
145 87 jones
  }
146 5025 daigle
147 941 tao
  /**
148 1716 berkley
   * Transform an XML document to StringWriter using the stylesheet reference
149 941 tao
   * from the db
150
   * @param doc the document to be transformed
151
   * @param sourceType the document type of the source
152
   * @param targetType the target document type
153
   * @param qFormat the name of the style set to use
154
   * @param pw the StringWriter to which output will be stored
155
   */
156
  public void transformXMLDocument(String doc, String sourceType,
157 1716 berkley
                String targetType, String qFormat, StringWriter pw,
158 5025 daigle
                Hashtable<String, String[]> param, String sessionid)
159 2088 tao
  {
160 941 tao
161
    // Look up the stylesheet for this type combination
162
    String xslSystemId = getStyleSystemId(qFormat, sourceType, targetType);
163 2088 tao
    if (xslSystemId != null)
164
    {
165 941 tao
      // Create a stylesheet from the system id that was found
166 2088 tao
      try
167
      {
168 1903 tao
        doc = removeDOCTYPE(doc);
169 2088 tao
        StringReader xml = new StringReader(doc);
170
        StreamResult result = new StreamResult(pw);
171 5025 daigle
        doTransform(xml, result, xslSystemId, param, qFormat, sessionid);
172 2088 tao
      }
173
      catch (Exception e)
174
      {
175 5167 daigle
        logMetacat.error("DBTransform.transformXMLDocument - " + xslSystemId + ": Error transforming document in " +
176 941 tao
                   "DBTransform.transformXMLDocument: " +
177 2663 sgarg
                   e.getMessage());
178 1716 berkley
179 941 tao
      }
180 2088 tao
    }
181
    else
182
    {
183 1716 berkley
      // No stylesheet registered form this document type, so just return the
184 941 tao
      // XML stream we were passed
185 950 tao
      pw.write(doc);
186 941 tao
    }
187
  }
188 1716 berkley
189 2088 tao
190 3675 barteau
  /**
191
   * Reads skin's config file if it exists, and populates Transformer paramaters
192
   * with its contents.
193
   * It then adds the parameters passed to it via Hashtable param to the Transformer.
194
   * It then calls the Transformer.transform method.
195 2088 tao
   */
196 3675 barteau
  private void doTransform(StringReader docContent,
197
          StreamResult resultOutput,
198
          String xslSystemId,
199 5025 daigle
          Hashtable<String, String[]> param,
200 3675 barteau
          String qformat,
201
          String sessionid)
202
          throws Exception {
203
204 4327 leinfelder
      SortedProperties skinOptions;
205 3675 barteau
      TransformerFactory tFactory;
206
      Transformer transformer;
207
      String key, value;
208
      StreamSource xml;
209 5025 daigle
      Enumeration<String> en;
210
      Iterator<Map.Entry<String, String>> iterIt;
211
      Map.Entry<String, String> entry;
212 3675 barteau
213
      if (xslSystemId != null) {
214
        tFactory = TransformerFactory.newInstance();
215
        transformer = tFactory.newTransformer(new StreamSource(xslSystemId));
216
217 2088 tao
        transformer.setParameter("qformat", qformat);
218 5167 daigle
        logMetacat.info("DBTransform.doTransform - qformat: " + qformat);
219 2893 sgarg
220 4327 leinfelder
        skinOptions = SkinPropertyService.getProperties(qformat);
221
        if (skinOptions != null) {
222
            iterIt = skinOptions.getProperties().entrySet().iterator();
223 3675 barteau
            while (iterIt.hasNext()) {
224 5025 daigle
                entry = iterIt.next();
225
                key = entry.getKey();
226
                value = entry.getValue();
227 4327 leinfelder
                //only include the plain properties
228
                if (key.indexOf('.') == -1) {
229
                	transformer.setParameter(key, value);
230
                }
231 3675 barteau
            }
232
        }
233 2893 sgarg
234 3675 barteau
        if (sessionid != null && !sessionid.equals("null")) {
235 2088 tao
          transformer.setParameter("sessid", sessionid);
236
        }
237 3675 barteau
238 3733 leinfelder
        //set up the default params (can be overridden by the passed in params)
239 4080 daigle
        String cgiPrefix = SystemUtil.getCGI_URL();
240 5167 daigle
        logMetacat.debug("DBTransform.doTransform - cgi-prefix: " + cgiPrefix);
241
        logMetacat.debug("DBTransform.doTransform - httpServer: " + httpServer);
242
        logMetacat.debug("DBTransform.doTransform - contextURL: " + contextURL);
243
        logMetacat.debug("DBTransform.doTransform - serletURL: " + servletURL);
244 3733 leinfelder
        transformer.setParameter("cgi-prefix", cgiPrefix);
245 3780 daigle
        transformer.setParameter("httpServer", httpServer);
246 3733 leinfelder
        transformer.setParameter("contextURL", contextURL);
247 3780 daigle
        transformer.setParameter("servletURL", servletURL);
248 3733 leinfelder
249 2088 tao
        // Set up parameter for transformation
250 3675 barteau
        if ( param != null) {
251
          en = param.keys();
252
          while (en.hasMoreElements()) {
253 5025 daigle
            key = en.nextElement();
254
            value = (param.get(key))[0];
255 5167 daigle
            logMetacat.info("DBTransform.doTransform - param: " + key + " -- " + value);
256 2088 tao
            transformer.setParameter(key, value);
257
          }
258
        }
259 3675 barteau
        xml = new StreamSource(docContent);
260 2088 tao
        transformer.transform(xml,resultOutput);
261
    }
262
  }//doTransform
263
264
265 1716 berkley
  /**
266 906 berkley
   * gets the content of a tag in a given xml file with the given path
267
   * @param f the file to parse
268
   * @param path the path to get the content from
269
   */
270 1716 berkley
  public static NodeList getPathContent(File f, String path)
271 906 berkley
  {
272
    if(f == null)
273
    {
274
      return null;
275
    }
276 1716 berkley
277 906 berkley
    DOMParser parser = new DOMParser();
278
    InputSource in;
279
    FileInputStream fs;
280 1716 berkley
281 906 berkley
    try
282 1716 berkley
    {
283 906 berkley
      fs = new FileInputStream(f);
284
      in = new InputSource(fs);
285
    }
286
    catch(FileNotFoundException fnf)
287
    {
288
      fnf.printStackTrace();
289
      return null;
290
    }
291 1716 berkley
292 906 berkley
    try
293
    {
294
      parser.parse(in);
295
      fs.close();
296
    }
297
    catch(Exception e1)
298
    {
299 1716 berkley
      System.err.println("File: " + f.getPath() + " : parse threw: " +
300 906 berkley
                         e1.toString());
301
      return null;
302
    }
303 1716 berkley
304 906 berkley
    Document doc = parser.getDocument();
305 1716 berkley
306 906 berkley
    try
307
    {
308
      NodeList docNodeList = XPathAPI.selectNodeList(doc, path);
309
      return docNodeList;
310
    }
311
    catch(Exception se)
312
    {
313 1716 berkley
      System.err.println("file: " + f.getPath() + " : parse threw: " +
314 906 berkley
                         se.toString());
315
      return null;
316
    }
317
  }
318 87 jones
319
  /**
320
   * Lookup a stylesheet reference from the db catalog
321
   *
322 832 jones
   * @param qformat    the named style-set format
323
   * @param sourcetype the document type of the source
324
   * @param targettype the document type of the target
325
   */
326 1716 berkley
  public String getStyleSystemId(String qformat, String sourcetype,
327 832 jones
                String targettype) {
328
    String systemId = null;
329
330
    if ((qformat == null) || (qformat.equals("html"))) {
331
      qformat = defaultStyle;
332
    }
333
334
    // Load the style-set map for this qformat into a DOM
335
    try {
336 906 berkley
      boolean breakflag = false;
337 1929 brooke
      String filename = configDir + "/" + qformat + "/" + qformat + ".xml";
338 5167 daigle
      logMetacat.info("DBTransform.getStyleSystemId - Trying style-set file: " + filename);
339 906 berkley
      File f = new File(filename);
340
      NodeList nlDoctype = getPathContent(f, "/style-set/doctype");
341
      NodeList nlDefault = getPathContent(f, "/style-set/default-style");
342
      Node nDefault = nlDefault.item(0);
343
      systemId = nDefault.getFirstChild().getNodeValue(); //set the default
344 1716 berkley
345 906 berkley
      for(int i=0; i<nlDoctype.getLength(); i++)
346
      { //look for the right sourcetype
347
        Node nDoctype = nlDoctype.item(i);
348
        NamedNodeMap atts = nDoctype.getAttributes();
349
        Node nAtt = atts.getNamedItem("publicid");
350
        String doctype = nAtt.getFirstChild().getNodeValue();
351
        if(doctype.equals(sourcetype))
352
        { //found the right sourcetype now we need to get the target type
353
          NodeList nlChildren = nDoctype.getChildNodes();
354
          for(int j=0; j<nlChildren.getLength(); j++)
355
          {
356
            Node nChild = nlChildren.item(j);
357
            String childName = nChild.getNodeName();
358
            if(childName.equals("target"))
359
            {
360
              NamedNodeMap childAtts = nChild.getAttributes();
361
              Node nTargetPublicId = childAtts.getNamedItem("publicid");
362
              String target = nTargetPublicId.getFirstChild().getNodeValue();
363
              if(target.equals(targettype))
364
              { //we found the right target type
365
                NodeList nlTarget = nChild.getChildNodes();
366
                for(int k=0; k<nlTarget.getLength(); k++)
367
                {
368
                  Node nChildText = nlTarget.item(k);
369
                  if(nChildText.getNodeType() == Node.TEXT_NODE)
370
                  { //get the text from the target node
371
                    systemId = nChildText.getNodeValue();
372
                    breakflag = true;
373
                    break;
374
                  }
375
                }
376 832 jones
              }
377
            }
378 1716 berkley
379 906 berkley
            if(breakflag)
380
            {
381
              break;
382
            }
383 832 jones
          }
384
        }
385 1716 berkley
386 906 berkley
        if(breakflag)
387
        {
388
          break;
389
        }
390 832 jones
      }
391
    }
392 906 berkley
    catch(Exception e)
393
    {
394
      System.out.println("Error parsing style-set file: " + e.getMessage());
395
      e.printStackTrace();
396
    }
397 3725 tao
398
    //Check if the systemId is relative path, add a postfix - the contextULR to systemID.
399
    if (systemId != null && systemId.indexOf("http://" ) == -1)
400
    {
401
    	systemId = contextURL+systemId;
402
    }
403 832 jones
    // Return the system ID for this particular source document type
404 5167 daigle
    logMetacat.info("DBTransform.getStyleSystemId - style system id is: " + systemId);
405 832 jones
    return systemId;
406
  }
407
408 5025 daigle
// /* Method to modified the system id of xml input -- make sure it
409
//    points to system id in xml_catalog table
410
//  */
411
//  private void modifiedXmlStreamSource(StreamSource xml, String publicId)
412
//                                       throws Exception
413
//  {
414
//    // make sure the xml is not null
415
//    if (xml == null || publicId == null)
416
//    {
417
//      return;
418
//    }
419
//    logMetacat.info("public id of input stream is " +publicId);
420
//    // Get system id from xml_catalog table
421
//    String systemId = DBEntityResolver.getDTDSystemID(publicId);
422
//    logMetacat.info("system id of input stream from xml_catalog"
423
//                               +"table is " +systemId);
424
//    //set system id to input stream
425
//    xml.setSystemId(systemId);
426
//  }
427 2088 tao
428 1903 tao
  /*
429
   * removes the DOCTYPE element and its contents from a Sting
430 2088 tao
   * used to avoid problems with incorrect SystemIDs
431 1903 tao
   */
432 2088 tao
  private String removeDOCTYPE(String in) {
433 1903 tao
    String ret = "";
434
    int startpos = in.indexOf("<!DOCTYPE");
435
    if (startpos>-1) {
436 2088 tao
      int stoppos = in.indexOf(">", startpos + 8);
437 1903 tao
      ret = in.substring(0,startpos) + in.substring(stoppos+1,in.length());
438
    } else {
439
      return in;
440
    }
441 2088 tao
    return ret;
442 1903 tao
  }
443 99 jones
444
  /**
445
   * the main routine used to test the transform utility.
446
   *
447 184 jones
   * Usage: java DBTransform
448 99 jones
   */
449
  static public void main(String[] args) {
450 1716 berkley
451 184 jones
     if (args.length > 0)
452 99 jones
     {
453
        System.err.println("Wrong number of arguments!!!");
454 184 jones
        System.err.println("USAGE: java DBTransform");
455 99 jones
        return;
456
     } else {
457
        try {
458 1716 berkley
459 99 jones
          // Create a test document
460
          StringBuffer testdoc = new StringBuffer();
461 5752 leinfelder
          String encoding = "UTF-8";
462
          testdoc.append("<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>");
463 99 jones
          testdoc.append("<eml-dataset><metafile_id>NCEAS-0001</metafile_id>");
464
          testdoc.append("<dataset_id>DS001</dataset_id>");
465
          testdoc.append("<title>My test doc</title></eml-dataset>");
466
467
          // Transform the document to the new doctype
468 5752 leinfelder
          Writer w = new OutputStreamWriter(System.out, encoding);
469 1217 tao
          DBTransform dbt = new DBTransform();
470 1716 berkley
          dbt.transformXMLDocument(testdoc.toString(),
471
                                   "-//NCEAS//eml-dataset//EN",
472
                                   "-//W3C//HTML//EN",
473 832 jones
                                   "knb",
474 5752 leinfelder
                                   w, null, null);
475 99 jones
476
        } catch (Exception e) {
477
          System.err.println("EXCEPTION HANDLING REQUIRED");
478
          System.err.println(e.getMessage());
479
          e.printStackTrace(System.err);
480
        }
481
     }
482
  }
483 1716 berkley
484 5025 daigle
//  private void dbg(int position) {
485
//    System.err.println("Debug flag: " + position);
486
//  }
487 99 jones
488 87 jones
}