Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that transforms an XML text document
4
 *             into a another type using XSL
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Matt Jones
8
 *
9
 * '$Author: leinfelder $'
10
 * '$Date: 2010-12-21 14:26:06 -0800 (Tue, 21 Dec 2010) $'
11
 * '$Revision: 5752 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27

    
28
package edu.ucsb.nceas.metacat;
29

    
30
import java.io.File;
31
import java.io.FileInputStream;
32
import java.io.FileNotFoundException;
33
import java.io.IOException;
34
import java.io.OutputStreamWriter;
35
import java.io.StringReader;
36
import java.io.StringWriter;
37
import java.io.Writer;
38
import java.sql.SQLException;
39
import java.util.Enumeration;
40
import java.util.Hashtable;
41
import java.util.Iterator;
42
import java.util.Map;
43

    
44
import javax.xml.transform.TransformerFactory;
45
import javax.xml.transform.Transformer;
46
import javax.xml.transform.stream.StreamSource;
47
import javax.xml.transform.stream.StreamResult;
48

    
49
import org.apache.log4j.Logger;
50
import org.apache.xerces.parsers.DOMParser;
51
import org.w3c.dom.NamedNodeMap;
52
import org.w3c.dom.NodeList;
53
import org.w3c.dom.Document;
54
import org.w3c.dom.Node;
55
import org.xml.sax.InputSource;
56
import org.apache.xpath.XPathAPI;
57

    
58
import edu.ucsb.nceas.metacat.properties.PropertyService;
59
import edu.ucsb.nceas.metacat.properties.SkinPropertyService;
60
import edu.ucsb.nceas.metacat.util.SystemUtil;
61
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
62
import edu.ucsb.nceas.utilities.SortedProperties;
63

    
64
/**
65
 * A Class that transforms XML documents utitlizing XSL style sheets
66
 */
67
public class DBTransform {
68

    
69
  // private Connection	conn = null;
70
  // private MetacatUtil   util = null;
71
  private String 	configDir = null;
72
  private String	defaultStyle = null;
73
  private Logger logMetacat = Logger.getLogger(DBTransform.class);
74
  private String httpServer = null;
75
  private String contextURL = null;
76
  private String servletURL = null;
77
  
78
  /**
79
   * construct a DBTransform instance.
80
   *
81
   * Generally, one calls transformXMLDocument() after constructing the instance
82
   *
83
   * @param conn the database connection from which to lookup the public ids
84
   */
85
  public DBTransform()
86
                  throws IOException,
87
                         SQLException,
88
                         ClassNotFoundException,
89
                         PropertyNotFoundException
90
  {
91
    configDir = SystemUtil.getStyleSkinsDir();
92
    defaultStyle = PropertyService.getProperty("application.default-style");
93
    httpServer = SystemUtil.getServerURL();
94
    contextURL = SystemUtil.getContextURL();
95
    servletURL = SystemUtil.getServletURL();
96
  }
97

    
98
  /**
99
   * Transform an XML document using the stylesheet reference from the db
100
   *
101
   * @param doc the document to be transformed
102
   * @param sourcetype the document type of the source
103
   * @param targettype the target document type
104
   * @param qformat the name of the style set to use
105
   * @param pw the PrintWriter to which output is printed
106
   * @param params some parameters for eml2 transformation
107
   */
108
  public void transformXMLDocument(String doc, String sourceType,
109
                                   String targetType, String qformat,
110
                                   Writer w, Hashtable<String, String[]> param,
111
                                   String sessionid)
112
 {
113

    
114
	  String xslSystemId = getStyleSystemId(qformat, sourceType, targetType);
115
	  try {
116
		  // Look up the stylesheet for this type combination
117
		  if (xslSystemId != null) {
118
			// Create a stylesheet from the system id that was found
119
			doc = removeDOCTYPE(doc);
120
			StringReader xml = new StringReader(doc);
121
			StreamResult result = new StreamResult(w);
122
			doTransform(xml, result, xslSystemId, param, qformat, sessionid);
123
		  }
124
		  else {
125
			  // No stylesheet registered form this document type, so just return the
126
			  // XML stream we were passed
127
			  w.write(doc);
128
		  }
129
      }
130
      catch (Exception e)
131
      {
132
    	  try {
133
			w.write(xslSystemId + ": Error transforming document in " +
134
			           "DBTransform.transformXMLDocument: " +
135
			           e.getMessage());
136
		} catch (IOException e1) {
137
			logMetacat.error(e1.getMessage(), e1);
138
		}
139

    
140
      }
141
    
142
  }
143
  
144
  /**
145
   * Transform an XML document to StringWriter using the stylesheet reference
146
   * from the db
147
   * @param doc the document to be transformed
148
   * @param sourceType the document type of the source
149
   * @param targetType the target document type
150
   * @param qFormat the name of the style set to use
151
   * @param pw the StringWriter to which output will be stored
152
   */
153
  public void transformXMLDocument(String doc, String sourceType,
154
                String targetType, String qFormat, StringWriter pw,
155
                Hashtable<String, String[]> param, String sessionid)
156
  {
157

    
158
    // Look up the stylesheet for this type combination
159
    String xslSystemId = getStyleSystemId(qFormat, sourceType, targetType);
160
    if (xslSystemId != null)
161
    {
162
      // Create a stylesheet from the system id that was found
163
      try
164
      {
165
        doc = removeDOCTYPE(doc);
166
        StringReader xml = new StringReader(doc);
167
        StreamResult result = new StreamResult(pw);
168
        doTransform(xml, result, xslSystemId, param, qFormat, sessionid);
169
      }
170
      catch (Exception e)
171
      {
172
        logMetacat.error("DBTransform.transformXMLDocument - " + xslSystemId + ": Error transforming document in " +
173
                   "DBTransform.transformXMLDocument: " +
174
                   e.getMessage());
175

    
176
      }
177
    }
178
    else
179
    {
180
      // No stylesheet registered form this document type, so just return the
181
      // XML stream we were passed
182
      pw.write(doc);
183
    }
184
  }
185

    
186

    
187
  /**
188
   * Reads skin's config file if it exists, and populates Transformer paramaters
189
   * with its contents.
190
   * It then adds the parameters passed to it via Hashtable param to the Transformer.
191
   * It then calls the Transformer.transform method.
192
   */
193
  private void doTransform(StringReader docContent, 
194
          StreamResult resultOutput,
195
          String xslSystemId, 
196
          Hashtable<String, String[]> param,
197
          String qformat, 
198
          String sessionid) 
199
          throws Exception {
200
      
201
      SortedProperties skinOptions;
202
      TransformerFactory tFactory;
203
      Transformer transformer;
204
      String key, value;
205
      StreamSource xml;
206
      Enumeration<String> en;
207
      Iterator<Map.Entry<String, String>> iterIt;
208
      Map.Entry<String, String> entry;
209
      
210
      if (xslSystemId != null) {
211
        tFactory = TransformerFactory.newInstance();
212
        transformer = tFactory.newTransformer(new StreamSource(xslSystemId));
213
        
214
        transformer.setParameter("qformat", qformat);
215
        logMetacat.info("DBTransform.doTransform - qformat: " + qformat);
216
        
217
        skinOptions = SkinPropertyService.getProperties(qformat);
218
        if (skinOptions != null) {            
219
            iterIt = skinOptions.getProperties().entrySet().iterator();
220
            while (iterIt.hasNext()) {
221
                entry = iterIt.next();
222
                key = entry.getKey();
223
                value = entry.getValue();
224
                //only include the plain properties
225
                if (key.indexOf('.') == -1) {
226
                	transformer.setParameter(key, value);
227
                }
228
            }
229
        }
230
        
231
        if (sessionid != null && !sessionid.equals("null")) {
232
          transformer.setParameter("sessid", sessionid);
233
        }
234
        
235
        //set up the default params (can be overridden by the passed in params)
236
        String cgiPrefix = SystemUtil.getCGI_URL();
237
        logMetacat.debug("DBTransform.doTransform - cgi-prefix: " + cgiPrefix);
238
        logMetacat.debug("DBTransform.doTransform - httpServer: " + httpServer);
239
        logMetacat.debug("DBTransform.doTransform - contextURL: " + contextURL);
240
        logMetacat.debug("DBTransform.doTransform - serletURL: " + servletURL);
241
        transformer.setParameter("cgi-prefix", cgiPrefix);
242
        transformer.setParameter("httpServer", httpServer);
243
        transformer.setParameter("contextURL", contextURL);
244
        transformer.setParameter("servletURL", servletURL);
245
        
246
        // Set up parameter for transformation
247
        if ( param != null) {
248
          en = param.keys();
249
          while (en.hasMoreElements()) {
250
            key = en.nextElement();
251
            value = (param.get(key))[0];
252
            logMetacat.info("DBTransform.doTransform - param: " + key + " -- " + value);
253
            transformer.setParameter(key, value);
254
          }
255
        }
256
        xml = new StreamSource(docContent);
257
        transformer.transform(xml,resultOutput);
258
    }
259
  }//doTransform
260

    
261

    
262
  /**
263
   * gets the content of a tag in a given xml file with the given path
264
   * @param f the file to parse
265
   * @param path the path to get the content from
266
   */
267
  public static NodeList getPathContent(File f, String path)
268
  {
269
    if(f == null)
270
    {
271
      return null;
272
    }
273

    
274
    DOMParser parser = new DOMParser();
275
    InputSource in;
276
    FileInputStream fs;
277

    
278
    try
279
    {
280
      fs = new FileInputStream(f);
281
      in = new InputSource(fs);
282
    }
283
    catch(FileNotFoundException fnf)
284
    {
285
      fnf.printStackTrace();
286
      return null;
287
    }
288

    
289
    try
290
    {
291
      parser.parse(in);
292
      fs.close();
293
    }
294
    catch(Exception e1)
295
    {
296
      System.err.println("File: " + f.getPath() + " : parse threw: " +
297
                         e1.toString());
298
      return null;
299
    }
300

    
301
    Document doc = parser.getDocument();
302

    
303
    try
304
    {
305
      NodeList docNodeList = XPathAPI.selectNodeList(doc, path);
306
      return docNodeList;
307
    }
308
    catch(Exception se)
309
    {
310
      System.err.println("file: " + f.getPath() + " : parse threw: " +
311
                         se.toString());
312
      return null;
313
    }
314
  }
315

    
316
  /**
317
   * Lookup a stylesheet reference from the db catalog
318
   *
319
   * @param qformat    the named style-set format
320
   * @param sourcetype the document type of the source
321
   * @param targettype the document type of the target
322
   */
323
  public String getStyleSystemId(String qformat, String sourcetype,
324
                String targettype) {
325
    String systemId = null;
326

    
327
    if ((qformat == null) || (qformat.equals("html"))) {
328
      qformat = defaultStyle;
329
    }
330

    
331
    // Load the style-set map for this qformat into a DOM
332
    try {
333
      boolean breakflag = false;
334
      String filename = configDir + "/" + qformat + "/" + qformat + ".xml";
335
      logMetacat.info("DBTransform.getStyleSystemId - Trying style-set file: " + filename);
336
      File f = new File(filename);
337
      NodeList nlDoctype = getPathContent(f, "/style-set/doctype");
338
      NodeList nlDefault = getPathContent(f, "/style-set/default-style");
339
      Node nDefault = nlDefault.item(0);
340
      systemId = nDefault.getFirstChild().getNodeValue(); //set the default
341

    
342
      for(int i=0; i<nlDoctype.getLength(); i++)
343
      { //look for the right sourcetype
344
        Node nDoctype = nlDoctype.item(i);
345
        NamedNodeMap atts = nDoctype.getAttributes();
346
        Node nAtt = atts.getNamedItem("publicid");
347
        String doctype = nAtt.getFirstChild().getNodeValue();
348
        if(doctype.equals(sourcetype))
349
        { //found the right sourcetype now we need to get the target type
350
          NodeList nlChildren = nDoctype.getChildNodes();
351
          for(int j=0; j<nlChildren.getLength(); j++)
352
          {
353
            Node nChild = nlChildren.item(j);
354
            String childName = nChild.getNodeName();
355
            if(childName.equals("target"))
356
            {
357
              NamedNodeMap childAtts = nChild.getAttributes();
358
              Node nTargetPublicId = childAtts.getNamedItem("publicid");
359
              String target = nTargetPublicId.getFirstChild().getNodeValue();
360
              if(target.equals(targettype))
361
              { //we found the right target type
362
                NodeList nlTarget = nChild.getChildNodes();
363
                for(int k=0; k<nlTarget.getLength(); k++)
364
                {
365
                  Node nChildText = nlTarget.item(k);
366
                  if(nChildText.getNodeType() == Node.TEXT_NODE)
367
                  { //get the text from the target node
368
                    systemId = nChildText.getNodeValue();
369
                    breakflag = true;
370
                    break;
371
                  }
372
                }
373
              }
374
            }
375

    
376
            if(breakflag)
377
            {
378
              break;
379
            }
380
          }
381
        }
382

    
383
        if(breakflag)
384
        {
385
          break;
386
        }
387
      }
388
    }
389
    catch(Exception e)
390
    {
391
      System.out.println("Error parsing style-set file: " + e.getMessage());
392
      e.printStackTrace();
393
    }
394
    
395
    //Check if the systemId is relative path, add a postfix - the contextULR to systemID. 
396
    if (systemId != null && systemId.indexOf("http://" ) == -1)
397
    {
398
    	systemId = contextURL+systemId;
399
    }
400
    // Return the system ID for this particular source document type
401
    logMetacat.info("DBTransform.getStyleSystemId - style system id is: " + systemId);
402
    return systemId;
403
  }
404

    
405
// /* Method to modified the system id of xml input -- make sure it
406
//    points to system id in xml_catalog table
407
//  */
408
//  private void modifiedXmlStreamSource(StreamSource xml, String publicId)
409
//                                       throws Exception
410
//  {
411
//    // make sure the xml is not null
412
//    if (xml == null || publicId == null)
413
//    {
414
//      return;
415
//    }
416
//    logMetacat.info("public id of input stream is " +publicId);
417
//    // Get system id from xml_catalog table
418
//    String systemId = DBEntityResolver.getDTDSystemID(publicId);
419
//    logMetacat.info("system id of input stream from xml_catalog"
420
//                               +"table is " +systemId);
421
//    //set system id to input stream
422
//    xml.setSystemId(systemId);
423
//  }
424

    
425
  /*
426
   * removes the DOCTYPE element and its contents from a Sting
427
   * used to avoid problems with incorrect SystemIDs
428
   */
429
  private String removeDOCTYPE(String in) {
430
    String ret = "";
431
    int startpos = in.indexOf("<!DOCTYPE");
432
    if (startpos>-1) {
433
      int stoppos = in.indexOf(">", startpos + 8);
434
      ret = in.substring(0,startpos) + in.substring(stoppos+1,in.length());
435
    } else {
436
      return in;
437
    }
438
    return ret;
439
  }
440

    
441
  /**
442
   * the main routine used to test the transform utility.
443
   *
444
   * Usage: java DBTransform
445
   */
446
  static public void main(String[] args) {
447

    
448
     if (args.length > 0)
449
     {
450
        System.err.println("Wrong number of arguments!!!");
451
        System.err.println("USAGE: java DBTransform");
452
        return;
453
     } else {
454
        try {
455

    
456
          // Create a test document
457
          StringBuffer testdoc = new StringBuffer();
458
          String encoding = "UTF-8";
459
          testdoc.append("<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>");
460
          testdoc.append("<eml-dataset><metafile_id>NCEAS-0001</metafile_id>");
461
          testdoc.append("<dataset_id>DS001</dataset_id>");
462
          testdoc.append("<title>My test doc</title></eml-dataset>");
463

    
464
          // Transform the document to the new doctype
465
          Writer w = new OutputStreamWriter(System.out, encoding);
466
          DBTransform dbt = new DBTransform();
467
          dbt.transformXMLDocument(testdoc.toString(),
468
                                   "-//NCEAS//eml-dataset//EN",
469
                                   "-//W3C//HTML//EN",
470
                                   "knb",
471
                                   w, null, null);
472

    
473
        } catch (Exception e) {
474
          System.err.println("EXCEPTION HANDLING REQUIRED");
475
          System.err.println(e.getMessage());
476
          e.printStackTrace(System.err);
477
        }
478
     }
479
  }
480

    
481
//  private void dbg(int position) {
482
//    System.err.println("Debug flag: " + position);
483
//  }
484

    
485
}
(21-21/65)