Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.service;
28

    
29
import java.io.File;
30
import java.io.FileWriter;
31
import java.io.IOException;
32
import java.io.OutputStream;
33
import java.io.StringReader;
34
import java.net.HttpURLConnection;
35
import java.net.MalformedURLException;
36
import java.net.URL;
37
import java.sql.PreparedStatement;
38
import java.sql.ResultSet;
39
import java.sql.SQLException;
40
import java.util.Vector;
41
import java.util.regex.Matcher;
42
import java.util.regex.Pattern;
43

    
44
import org.apache.commons.io.IOUtils;
45
import org.apache.log4j.Logger;
46

    
47
import edu.ucsb.nceas.metacat.DocumentImpl;
48
import edu.ucsb.nceas.metacat.MetaCatServlet;
49
import edu.ucsb.nceas.metacat.database.DBConnection;
50
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
51
import edu.ucsb.nceas.metacat.properties.PropertyService;
52
import edu.ucsb.nceas.metacat.shared.BaseService;
53
import edu.ucsb.nceas.metacat.shared.ServiceException;
54
import edu.ucsb.nceas.utilities.FileUtil;
55
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
56
import edu.ucsb.nceas.utilities.StringUtil;
57

    
58
public class XMLSchemaService extends BaseService {
59
	
60
	public static final String NAMESPACEKEYWORD = "xmlns";
61
	
62
	public static final String SCHEMA_DIR = "/schema/";
63
	
64
	private static XMLSchemaService xmlSchemaService = null;
65
	
66
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
67
	
68
	private static boolean useFullSchemaValidation = false;
69
	
70
//	private static String documentNamespace = null;
71
	
72
	// all schema objects that represent schemas registered in the db that 
73
	// actually have files on disk.
74
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
75
	
76
	// a convenience list that holds the names of registered namespaces.
77
    private static Vector<String> nameSpaceList = new Vector<String>();
78
    
79
    // a convenience string that holds all name spaces and locations in a space
80
    // delimited format
81
    private static String nameSpaceAndLocationString = ""; 
82
	
83
	/**
84
	 * private constructor since this is a singleton
85
	 */
86
	private XMLSchemaService() {
87
        _serviceName = "XMLSchemaService";
88
        try {
89
            doRefresh();
90
        } catch (ServiceException e) {
91
            logMetacat.debug(e.getMessage());
92
        }
93
	}
94
	
95
	/**
96
	 * Get the single instance of XMLService.
97
	 * 
98
	 * @return the single instance of XMLService
99
	 */
100
	public static XMLSchemaService getInstance() {
101
		if (xmlSchemaService == null) {
102
			xmlSchemaService = new XMLSchemaService();
103
		}
104
		return xmlSchemaService;
105
	}
106
	
107
	public boolean refreshable() {
108
		return true;
109
	}
110
	
111
	/**
112
	 * refresh the persistant values in this service.
113
	 */
114
	public void doRefresh() throws ServiceException {
115
	    logMetacat.debug("XMLService.doRefresh - refreshing the schema service.");
116
		try {
117
			populateRegisteredSchemaList();
118
			setUseFullSchemaValidation();
119
			createRegisteredNameSpaceList();
120
			createRegisteredNameSpaceAndLocationString();
121
		} catch (PropertyNotFoundException pnfe) {
122
			logMetacat.error("XMLService.doRefresh - Could not find property: xml.useFullSchemaValidation. " + 
123
					"Setting to false.");
124
		}
125
	}
126
	
127
	public void stop() throws ServiceException {
128
		return;
129
	}
130
	
131
	/**
132
	 * Gets the registered schema list. This list holds schemas that exist in
133
	 * the xml_catalog table that also have associated files in the schema
134
	 * directory.
135
	 * 
136
	 * @return a list of XMLSchema objects holding registered schema information
137
	 */
138
	public Vector<XMLSchema> getRegisteredSchemaList() {
139
		return registeredSchemaList;
140
	}
141
	
142
	/**
143
	 * Gets the name space and location string. This is a convenience method.
144
	 * The string will have space delimited namespaces and locations that are
145
	 * held in the registered schema list.
146
	 * 
147
	 * @return a string that holds space delimited registered namespaces and
148
	 *         locations.
149
	 */
150
	public String getNameSpaceAndLocationString() {
151
		return nameSpaceAndLocationString;
152
	}
153
	
154
	/**
155
	 * Gets a list of name spaces. This is a convenience method. The list will 
156
	 * have all namespaces that are held in the registered schema list.
157
	 * 
158
	 * @return a list that holds registered namespaces.
159
	 */
160
	public Vector<String> getNameSpaceList() {
161
		return nameSpaceList;
162
	}
163
	
164
	/**
165
	 * Report whether xml parsing is set to use full schema parsing. If full
166
	 * schema parsing is true, new schemas will be validated before being
167
	 * entered into the database and file system.
168
	 * 
169
	 * @return true if the xml.useFullSchemaValidation property is set to true,
170
	 *         false otherwise.
171
	 */
172
	public boolean useFullSchemaValidation() {
173
		return useFullSchemaValidation;
174
	}
175
	
176
	/**
177
	 * sets the UseFullSchemaValidation variable.  The only way this should be
178
	 * set is in the constructor or the refresh methods.
179
	 */
180
	private void setUseFullSchemaValidation() throws PropertyNotFoundException {
181
		String strUseFullSchemaValidation = 
182
			PropertyService.getProperty("xml.useFullSchemaValidation");
183
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
184
	}
185

    
186
	/**
187
	 * Populate the list of registered schemas. This reads all schemas in the
188
	 * xml_catalog table and then makes sure the schema actually exists and is
189
	 * readable on disk.
190
	 */
191
	public void populateRegisteredSchemaList() {
192
		DBConnection conn = null;
193
		int serialNumber = -1;
194
		PreparedStatement pstmt = null;
195
		ResultSet resultSet = null;
196
		registeredSchemaList = new Vector<XMLSchema>();
197

    
198
		// get the system id from the xml_catalog table for all schemas.
199
		String sql = "SELECT public_id, system_id FROM xml_catalog where "
200
				+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
201
		try {
202
			// check out DBConnection
203
			conn = DBConnectionPool
204
					.getDBConnection("XMLService.populateRegisteredSchemaList");
205
			serialNumber = conn.getCheckOutSerialNumber();
206
			pstmt = conn.prepareStatement(sql);
207
			logMetacat.debug("XMLService.populateRegisteredSchemaList - Selecting schemas: " + pstmt.toString());
208
			pstmt.execute();
209
			resultSet = pstmt.getResultSet();
210

    
211
			// make sure the schema actually exists on the file system. If so,
212
			// add it to the registered schema list.
213
			while (resultSet.next()) {
214
				String fileNamespace = resultSet.getString(1);
215
				String fileLocation = resultSet.getString(2);
216
				logMetacat.debug("XMLService.populateRegisteredSchemaList - Registering schema: " + fileNamespace + " " + fileLocation);
217
				XMLSchema xmlSchema = new XMLSchema(fileNamespace);
218
				if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://"))
219
				{
220
				    //System.out.println("processing an http schemal location");
221
				    logMetacat.debug("XMLService.populateRegisteredSchemaList - Processing http schema location: " + fileLocation);
222
				    xmlSchema.setExternalFileUri(fileLocation);
223
				    //cache the file
224
				    try
225
				    {
226
				        URL u = new URL(fileLocation);
227
				        //System.out.println("downloading " + fileLocation);
228
				        logMetacat.debug("XMLService.populateRegisteredSchemaList - Downloading http based schema...");
229
				        HttpURLConnection connection = (HttpURLConnection) u.openConnection();
230
				        connection.setDoOutput(true);
231
			            connection.setRequestMethod("GET");
232
			            connection.connect();
233
			            String schema = IOUtils.toString(connection.getInputStream());
234
			            
235
			            String deployDir = PropertyService.getProperty("application.deployDir");
236
			            String contextName = PropertyService.getProperty("application.context");
237
			            String filename = fileLocation.substring(fileLocation.lastIndexOf("/"), 
238
                                fileLocation.length());
239
			            File schemaFile = new File(deployDir + "/" + contextName + "/" +
240
			                    "schema/" + filename);
241
			            //System.out.println("writing schema to " + schemaFile.getAbsolutePath());
242
			            FileWriter fw = new FileWriter(schemaFile);
243
			            fw.write(schema);
244
			            fw.flush();
245
			            fw.close();
246
			            logMetacat.debug("XMLService.populateRegisteredSchemaList - Schema downloaded to " + schemaFile.getAbsolutePath());
247
			            fileLocation = "/schema/" + filename;
248
			            //System.out.println("fileLocation set to " + fileLocation);
249
			            logMetacat.debug("XMLService.populateRegisteredSchemaList - fileLocation set to " + fileLocation);
250
			            xmlSchema.setFileName(fileLocation);
251
				    }
252
				    catch(MalformedURLException me)
253
				    {
254
				        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
255
				                " because a connection could not be made to the given url: " + 
256
				                me.getMessage());
257
				    }
258
                    catch (IOException ioe)
259
                    {
260
                        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
261
                        " because an IOException occured: " + ioe.getMessage());
262
                    }
263
                    catch(PropertyNotFoundException pnfe)
264
                    {
265
                        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
266
                                " because the property 'application.tempDir' could not be found.");
267
                    }
268
				    
269
				    xmlSchema.setFileName(fileLocation);
270
				}
271
				else
272
				{
273
				    xmlSchema.setFileName(fileLocation);
274
				}
275
								
276
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) 
277
				{
278
					registeredSchemaList.add(xmlSchema);
279
				}
280
				else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://"))
281
                {  //the schema resides on a different server, to validate, we need to go get it 
282
                    registeredSchemaList.add(xmlSchema);
283
                }
284
				else 
285
				{
286
					logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
287
							+ " in the database but does not exist on the file system.");
288
				}
289
			}
290
		} catch (SQLException e) {
291
			logMetacat.error("XMLService.populateRegisteredSchemaList - SQL Error: "
292
					+ e.getMessage());
293
		} finally {
294
			try {
295
				pstmt.close();
296
			}// try
297
			catch (SQLException sqlE) {
298
				logMetacat.error("XMLSchemaService.populateRegisteredSchemaList - Error in XMLService.populateRegisteredSchemaList(): "
299
						+ sqlE.getMessage());
300
			}
301
			DBConnectionPool.returnDBConnection(conn, serialNumber);
302
		}
303
	}	
304
	
305
	/**
306
	 * create a space delimited string of all namespaces and locations
307
	 * in the registered schema list.
308
	 */
309
	private static void createRegisteredNameSpaceAndLocationString() {
310
		boolean firstRow = true;
311
		nameSpaceAndLocationString = "";
312
		
313
		for (XMLSchema xmlSchema : registeredSchemaList) {
314
			if (!firstRow) {
315
				nameSpaceAndLocationString += " ";
316
			}
317
			nameSpaceAndLocationString += xmlSchema.getFileNamespace() + " "
318
					+ xmlSchema.getLocalFileUri();
319
			firstRow = false;
320
		}
321
	}
322

    
323
	/**
324
	 * create a lsit of all namespaces in the registered schema list.
325
	 */
326
	private static void createRegisteredNameSpaceList() {
327
		nameSpaceList = new Vector<String>();
328
		for (XMLSchema xmlSchema : registeredSchemaList) {
329
			nameSpaceList.add(xmlSchema.getFileNamespace());
330
		}
331
	}
332
	
333
	/**
334
	 * Checks to see that all schemas are registered. If a single one in the
335
	 * list is not, this will return false.
336
	 * 
337
	 * @param schemaList
338
	 *            a list of schemas as they appear in xml.
339
	 * @return true if all schemas are registered.
340
	 */
341
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
342
		for (XMLSchema xmlSchema : schemaList) {
343
			if ( ! isSchemaRegistered(xmlSchema)) {
344
				return false;
345
			}
346
		}		
347
		return true;
348
	}
349
	
350
	/**
351
	 * Returns true if the schema is registered.
352
	 * 
353
	 * @param schema
354
	 *            a single schema as it appears in xml
355
	 * @return true if the schema is registered, false otherwise.
356
	 */
357
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
358
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
359
			if (registeredXmlSchema.getLocalFileUri().equals(
360
						xmlSchema.getLocalFileUri())
361
					&& registeredXmlSchema.getFileNamespace().equals(
362
							xmlSchema.getFileNamespace())) {
363
				return true;
364
			}
365
		}
366

    
367
		return false;
368
	}
369
	
370
    /**
371
	 * See if schemas have been specified in the xml:schemalocation attribute.
372
	 * If so, return a vector of the system ids.
373
	 * 
374
	 * @param xml
375
	 *            the document we want to look in for schema location
376
	 * @return a vector of XMLSchema objects, or an empty vector if none are
377
	 *         found
378
	 */
379
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
380
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
381
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
382

    
383
		// no xml. return empty vector
384
		if (xml == null) {
385
			logMetacat.debug("XMLSchemaService.findSchemasInXML - Returning empty schemaList.");
386
			return schemaList;
387
		}
388

    
389
		// Get the "second line" from the xml
390
		String targetLine = getSchemaLine(xml);
391

    
392
		// see if there is a match for xsi.schemaLocation. If so, extract the
393
		// schemas.
394
		if (targetLine != null) {
395
			String regex = "(\\p{Graph}*):schemaLocation=\"([^\"]*)\"";
396
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
397
					| Pattern.DOTALL);
398
			Matcher matcher = pattern.matcher(targetLine);
399
			int i = 0;
400
			while (i < targetLine.length()) {
401
				if (!matcher.find(i)) {
402
					break;
403
				}
404

    
405
				String uri = matcher.group(2);
406
				uri = StringUtil.replaceTabsNewLines(uri);
407
				uri = StringUtil.replaceDuplicateSpaces(uri);
408

    
409
				// each namespace could have several schema locations. parsedUri
410
				// will hold a list of uri and files.
411
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
412
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
413
					if (j + 1 >= parsedUri.size()) {
414
						throw new IOException(
415
								"Odd number of elements found when parsing schema location: "
416
										+ targetLine
417
										+ ". There should be an even number of uri/files in location.");
418
					}
419
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
420
							.get(j + 1));
421
					schemaList.add(xmlSchema);
422
				}
423
				i = matcher.end();
424
			}
425
		}
426

    
427
		logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for eml are " + schemaList.toString());
428

    
429
		return schemaList;
430
	}    
431
    
432
    /**
433
	 * Returns all the namespace for an xml document.  This is done by getting
434
	 * the internal namespace declaration (prefix) and looking for xmlns:<prefix>
435
	 * 
436
	 * @param xml
437
	 *            the document to search
438
	 * @return a string holding the namespace
439
	 */
440
	public static String findDocumentNamespace(StringReader xml) throws IOException {
441
		String namespace = null;
442

    
443
		String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
444
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
445
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
446

    
447
		if (xml == null) {
448
			logMetacat.debug("XMLSchemaService.findDocumentNamespace - XML doc is null.  There is no namespace.");
449
			return namespace;
450
		}
451

    
452
		String targetLine = getSchemaLine(xml);
453

    
454
		// the prefix is at the beginning of the doc
455
		String prefix = null;
456
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}* ";
457
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
458
		Matcher matcher = pattern.matcher(targetLine);
459
		if (matcher.find()) {
460
			prefix = matcher.group(1).trim();
461
		}
462

    
463
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
464
		// if no prefix was found, we grab the first namespace.
465
		String regex2;
466
		if (prefix != null) {
467
			regex2 = "xmlns:" + prefix + "=\"(.*)\"";
468
		} else {
469
			regex2 = "xmlns:.*=\"(.*)\"";
470
		}
471
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
472
		Matcher matcher2 = pattern2.matcher(targetLine);
473
		if (matcher2.find()) {
474
			namespace = matcher2.group(1);
475

    
476
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
477
				namespace = eml2_0_0NameSpace;
478
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
479
				namespace = eml2_0_1NameSpace;
480
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
481
				namespace = eml2_1_0NameSpace;
482
			}
483
		}
484

    
485
		return namespace;
486
	}
487
    
488
    /**
489
	 * Return the line from xml that holds the metadata like namespace and
490
	 * schema location
491
	 * 
492
	 * @param xml
493
	 *            the document to parse
494
	 * @return the "second" line of the document
495
	 */
496
    private static String getSchemaLine(StringReader xml) throws IOException {
497
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
498
        // find the line
499
        String secondLine = null;
500
        int count = 0;
501
        final int TARGETNUM = 1;
502
        StringBuffer buffer = new StringBuffer();
503
        boolean comment = false;
504
        boolean processingInstruction = false;
505
        char thirdPreviousCharacter = '?';
506
        char secondPreviousCharacter = '?';
507
        char previousCharacter = '?';
508
        char currentCharacter = '?';
509
        int tmp = xml.read();
510
        while (tmp != -1) {
511
            currentCharacter = (char)tmp;
512
            //in a comment
513
            if (currentCharacter == '-' && previousCharacter == '-'
514
                    && secondPreviousCharacter == '!'
515
                    && thirdPreviousCharacter == '<') {
516
                comment = true;
517
            }
518
            //out of comment
519
            if (comment && currentCharacter == '>' && previousCharacter == '-'
520
                    && secondPreviousCharacter == '-') {
521
                comment = false;
522
            }
523
            
524
            //in a processingInstruction
525
            if (currentCharacter == '?' && previousCharacter == '<') {
526
                processingInstruction = true;
527
            }
528
            
529
            //out of processingInstruction
530
            if (processingInstruction && currentCharacter == '>'
531
                    && previousCharacter == '?') {
532
                processingInstruction = false;
533
            }
534
            
535
            //this is not comment or a processingInstruction
536
            if (currentCharacter != '!' && previousCharacter == '<'
537
                    && !comment && !processingInstruction) {
538
                count++;
539
            }
540
            
541
            // get target line
542
            if (count == TARGETNUM && currentCharacter != '>') {
543
                buffer.append(currentCharacter);
544
            }
545
            if (count == TARGETNUM && currentCharacter == '>') {
546
                break;
547
            }
548
            thirdPreviousCharacter = secondPreviousCharacter;
549
            secondPreviousCharacter = previousCharacter;
550
            previousCharacter = currentCharacter;
551
            tmp = xml.read();
552
        }
553
        secondLine = buffer.toString();
554
        logMetacat.debug("XMLSchemaService.getSchemaLine - the second line string is: " + secondLine);
555
        
556
        xml.reset();
557
        return secondLine;
558
    }
559
    
560
    /**
561
	 * Get a schema file name from the schema uri.
562
	 * 
563
	 * @param uri
564
	 *            the uri from which to extract the file name
565
	 * @return a string holding the file name
566
	 */
567
    public static String getSchemaFileNameFromUri(String uri) {
568
		// get filename from systemId
569
		String filename = uri;
570
		
571
		if (filename != null && !(filename.trim()).equals("")) {
572
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
573
			if (slash > -1) {
574
				filename = filename.substring(slash + 1);
575
			}
576
		}
577

    
578
		return filename;
579
	}
580
}
(4-4/4)