Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.service;
28

    
29
import java.io.IOException;
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Vector;
35
import java.util.regex.Matcher;
36
import java.util.regex.Pattern;
37

    
38
import org.apache.log4j.Logger;
39

    
40
import edu.ucsb.nceas.metacat.DBConnection;
41
import edu.ucsb.nceas.metacat.DBConnectionPool;
42
import edu.ucsb.nceas.metacat.DocumentImpl;
43
import edu.ucsb.nceas.metacat.MetaCatServlet;
44
import edu.ucsb.nceas.utilities.FileUtil;
45
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
46
import edu.ucsb.nceas.utilities.StringUtil;
47

    
48
public class XMLSchemaService extends BaseService {
49
	
50
	public static final String NAMESPACEKEYWORD = "xmlns";
51
	
52
	public static final String SCHEMA_DIR = "/schema/";
53
	
54
	private static XMLSchemaService xmlSchemaService = null;
55
	
56
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
57
	
58
	private static boolean useFullSchemaValidation = false;
59
	
60
	private static String documentNamespace = null;
61
	
62
	// all schema objects that represent schemas registered in the db that 
63
	// actually have files on disk.
64
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
65
	
66
	// a convenience list that holds the names of registered namespaces.
67
    private static Vector<String> nameSpaceList = new Vector<String>();
68
    
69
    // a convenience string that holds all name spaces and locations in a space
70
    // delimited format
71
    private static String nameSpaceAndLocationString = ""; 
72
	
73
	/**
74
	 * private constructor since this is a singleton
75
	 */
76
	private XMLSchemaService() {
77
		try {
78
			setUseFullSchemaValidation();
79
			populateRegisteredSchemaList();
80
			createRegisteredNameSpaceList();
81
			createRegisteredNameSpaceAndLocationString();
82
			
83
		} catch (PropertyNotFoundException pnfe) {
84
			logMetacat.error("Could not find property: xml.useFullSchemaValidation. " + 
85
					"Setting to false.");
86
		}
87
	}
88
	
89
	/**
90
	 * Get the single instance of XMLService.
91
	 * 
92
	 * @return the single instance of XMLService
93
	 */
94
	public static XMLSchemaService getInstance() {
95
		if (xmlSchemaService == null) {
96
			xmlSchemaService = new XMLSchemaService();
97
		}
98
		return xmlSchemaService;
99
	}
100
	
101
	public boolean refreshable() {
102
		return true;
103
	}
104
	
105
	/**
106
	 * refresh the persistant values in this service.
107
	 */
108
	protected void doRefresh() {
109
		try {
110
			populateRegisteredSchemaList();
111
			setUseFullSchemaValidation();
112
			createRegisteredNameSpaceList();
113
			createRegisteredNameSpaceAndLocationString();
114
		} catch (PropertyNotFoundException pnfe) {
115
			logMetacat.error("Could not find property: xml.useFullSchemaValidation. " + 
116
					"Setting to false.");
117
		}
118
	}
119
	
120
	/**
121
	 * Gets the registered schema list. This list holds schemas that exist in
122
	 * the xml_catalog table that also have associated files in the schema
123
	 * directory.
124
	 * 
125
	 * @return a list of XMLSchema objects holding registered schema information
126
	 */
127
	public static Vector<XMLSchema> getRegisteredSchemaList() {
128
		return registeredSchemaList;
129
	}
130
	
131
	/**
132
	 * Gets the name space and location string. This is a convenience method.
133
	 * The string will have space delimited namespaces and locations that are
134
	 * held in the registered schema list.
135
	 * 
136
	 * @return a string that holds space delimited registered namespaces and
137
	 *         locations.
138
	 */
139
	public static String getNameSpaceAndLocationString() {
140
		return nameSpaceAndLocationString;
141
	}
142
	
143
	/**
144
	 * Gets a list of name spaces. This is a convenience method. The list will 
145
	 * have all namespaces that are held in the registered schema list.
146
	 * 
147
	 * @return a list that holds registered namespaces.
148
	 */
149
	public static Vector<String> getNameSpaceList() {
150
		return nameSpaceList;
151
	}
152
	
153
	/**
154
	 * Report whether xml parsing is set to use full schema parsing. If full
155
	 * schema parsing is true, new schemas will be validated before being
156
	 * entered into the database and file system.
157
	 * 
158
	 * @return true if the xml.useFullSchemaValidation property is set to true,
159
	 *         false otherwise.
160
	 */
161
	public static boolean useFullSchemaValidation() {
162
		return useFullSchemaValidation;
163
	}
164
	
165
	/**
166
	 * sets the UseFullSchemaValidation variable.  The only way this should be
167
	 * set is in the constructor or the refresh methods.
168
	 */
169
	private static void setUseFullSchemaValidation() throws PropertyNotFoundException {
170
		String strUseFullSchemaValidation = 
171
			PropertyService.getProperty("xml.useFullSchemaValidation");
172
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
173
	}
174

    
175
	/**
176
	 * Populate the list of registered schemas. This reads all schemas in the
177
	 * xml_catalog table and then makes sure the schema actually exists and is
178
	 * readable on disk.
179
	 */
180
	public static void populateRegisteredSchemaList() {
181
		DBConnection conn = null;
182
		int serialNumber = -1;
183
		PreparedStatement pstmt = null;
184
		ResultSet resultSet = null;
185
		registeredSchemaList = new Vector<XMLSchema>();
186

    
187
		// get the system id from the xml_catalog table for all schemas.
188
		String sql = "SELECT public_id, system_id FROM xml_catalog where "
189
				+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
190
		try {
191
			// check out DBConnection
192
			conn = DBConnectionPool
193
					.getDBConnection("XMLService.populateRegisteredSchemaList");
194
			serialNumber = conn.getCheckOutSerialNumber();
195
			pstmt = conn.prepareStatement(sql);
196
			logMetacat.debug("Selecting schemas: " + pstmt.toString());
197
			pstmt.execute();
198
			resultSet = pstmt.getResultSet();
199

    
200
			// make sure the schema actually exists on the file system. If so,
201
			// add it to the registered schema list.
202
			while (resultSet.next()) {
203
				String fileNamespace = resultSet.getString(1);
204
				String fileLocation = resultSet.getString(2);
205
				logMetacat.debug("Registering schema: " + fileNamespace + " " + fileLocation);
206
				XMLSchema xmlSchema = new XMLSchema(fileNamespace);
207
				xmlSchema.setFileName(fileLocation);
208

    
209
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
210
					registeredSchemaList.add(xmlSchema);
211
				} else {
212
					logMetacat.warn("Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
213
							+ " in the database but does not exist on the file system.");
214
				}
215
			}
216
		} catch (SQLException e) {
217
			logMetacat.error("XMLService.populateRegisteredSchemaList(): "
218
					+ e.getMessage());
219
		} finally {
220
			try {
221
				pstmt.close();
222
			}// try
223
			catch (SQLException sqlE) {
224
				logMetacat.error("Error in XMLService.populateRegisteredSchemaList(): "
225
						+ sqlE.getMessage());
226
			}
227
			DBConnectionPool.returnDBConnection(conn, serialNumber);
228
		}
229
	}	
230
	
231
	/**
232
	 * create a space delimited string of all namespaces and locations
233
	 * in the registered schema list.
234
	 */
235
	private static void createRegisteredNameSpaceAndLocationString() {
236
		boolean firstRow = true;
237
		nameSpaceAndLocationString = "";
238
		
239
		for (XMLSchema xmlSchema : registeredSchemaList) {
240
			if (!firstRow) {
241
				nameSpaceAndLocationString += " ";
242
			}
243
			nameSpaceAndLocationString += xmlSchema.getFileNamespace() + " "
244
					+ xmlSchema.getLocalFileUri();
245
			firstRow = false;
246
		}
247
	}
248

    
249
	/**
250
	 * create a lsit of all namespaces in the registered schema list.
251
	 */
252
	private static void createRegisteredNameSpaceList() {
253
		nameSpaceList = new Vector<String>();
254
		for (XMLSchema xmlSchema : registeredSchemaList) {
255
			nameSpaceList.add(xmlSchema.getFileNamespace());
256
		}
257
	}
258
	
259
	/**
260
	 * Checks to see that all schemas are registered. If a single one in the
261
	 * list is not, this will return false.
262
	 * 
263
	 * @param schemaList
264
	 *            a list of schemas as they appear in xml.
265
	 * @return true if all schemas are registered.
266
	 */
267
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
268
		for (XMLSchema xmlSchema : schemaList) {
269
			if ( ! isSchemaRegistered(xmlSchema)) {
270
				return false;
271
			}
272
		}		
273
		return true;
274
	}
275
	
276
	/**
277
	 * Returns true if the schema is registered.
278
	 * 
279
	 * @param schema
280
	 *            a single schema as it appears in xml
281
	 * @return true if the schema is registered, false otherwise.
282
	 */
283
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
284
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
285
			if (registeredXmlSchema.getLocalFileUri().equals(
286
						xmlSchema.getLocalFileUri())
287
					&& registeredXmlSchema.getFileNamespace().equals(
288
							xmlSchema.getFileNamespace())) {
289
				return true;
290
			}
291
		}
292

    
293
		return false;
294
	}
295
	
296
    /**
297
	 * See if schemas have been specified in the xml:schemalocation attribute.
298
	 * If so, return a vector of the system ids.
299
	 * 
300
	 * @param xml
301
	 *            the document we want to look in for schema location
302
	 * @return a vector of XMLSchema objects, or an empty vector if none are
303
	 *         found
304
	 */
305
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
306
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
307
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
308

    
309
		// no xml. return empty vector
310
		if (xml == null) {
311
			logMetacat.debug("Returning empty schemaList.");
312
			return schemaList;
313
		}
314

    
315
		// Get the "second line" from the xml
316
		String targetLine = getSchemaLine(xml);
317

    
318
		// see if there is a match for xsi.schemaLocation. If so, extract the
319
		// schemas.
320
		if (targetLine != null) {
321
			String regex = "(\\p{Graph}*):schemaLocation=\"(.*)\"";
322
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
323
					| Pattern.DOTALL);
324
			Matcher matcher = pattern.matcher(targetLine);
325
			int i = 0;
326
			while (i < targetLine.length()) {
327
				if (!matcher.find(i)) {
328
					break;
329
				}
330

    
331
				String uri = matcher.group(2);
332
				uri = StringUtil.replaceTabsNewLines(uri);
333
				uri = StringUtil.replaceDuplicateSpaces(uri);
334

    
335
				// each namespace could have several schema locations. parsedUri
336
				// will hold a list of uri and files.
337
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
338
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
339
					if (j + 1 >= parsedUri.size()) {
340
						throw new IOException(
341
								"Odd number of elements found when parsing schema location: "
342
										+ targetLine
343
										+ ". There should be an even number of uri/files in location.");
344
					}
345
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
346
							.get(j + 1));
347
					schemaList.add(xmlSchema);
348
				}
349
				i = matcher.end();
350
			}
351
		}
352

    
353
		logMetacat.debug("Schemas for eml are " + schemaList.toString());
354

    
355
		return schemaList;
356
	}    
357
    
358
    /**
359
	 * Returns all the namespace for an xml document.  This is done by getting
360
	 * the internal namespace declaration (prefix) and looking for xmlns:<prefix>
361
	 * 
362
	 * @param xml
363
	 *            the document to search
364
	 * @return a string holding the namespace
365
	 */
366
	public static String findDocumentNamespace(StringReader xml) throws IOException {
367
		String namespace = null;
368

    
369
		String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
370
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
371
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
372

    
373
		if (xml == null) {
374
			logMetacat.debug("XML doc is null.  There is no namespace.");
375
			return namespace;
376
		}
377

    
378
		String targetLine = getSchemaLine(xml);
379

    
380
		// the prefix is at the beginning of the doc
381
		String prefix = null;
382
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}* ";
383
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
384
		Matcher matcher = pattern.matcher(targetLine);
385
		if (matcher.find()) {
386
			prefix = matcher.group(1).trim();
387
		}
388

    
389
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
390
		// if no prefix was found, we grab the first namespace.
391
		String regex2;
392
		if (prefix != null) {
393
			regex2 = "xmlns:" + prefix + "=\"(.*)\"";
394
		} else {
395
			regex2 = "xmlns:.*=\"(.*)\"";
396
		}
397
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
398
		Matcher matcher2 = pattern2.matcher(targetLine);
399
		if (matcher2.find()) {
400
			namespace = matcher2.group(1);
401

    
402
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
403
				namespace = eml2_0_0NameSpace;
404
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
405
				namespace = eml2_0_1NameSpace;
406
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
407
				namespace = eml2_1_0NameSpace;
408
			}
409
		}
410

    
411
		return namespace;
412
	}
413
    
414
    /**
415
	 * Return the line from xml that holds the metadata like namespace and
416
	 * schema location
417
	 * 
418
	 * @param xml
419
	 *            the document to parse
420
	 * @return the "second" line of the document
421
	 */
422
    private static String getSchemaLine(StringReader xml) throws IOException {
423
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
424
        // find the line
425
        String secondLine = null;
426
        int count = 0;
427
        final int TARGETNUM = 1;
428
        StringBuffer buffer = new StringBuffer();
429
        boolean comment = false;
430
        boolean processingInstruction = false;
431
        char thirdPreviousCharacter = '?';
432
        char secondPreviousCharacter = '?';
433
        char previousCharacter = '?';
434
        char currentCharacter = '?';
435
        int tmp = xml.read();
436
        while (tmp != -1) {
437
            currentCharacter = (char)tmp;
438
            //in a comment
439
            if (currentCharacter == '-' && previousCharacter == '-'
440
                    && secondPreviousCharacter == '!'
441
                    && thirdPreviousCharacter == '<') {
442
                comment = true;
443
            }
444
            //out of comment
445
            if (comment && currentCharacter == '>' && previousCharacter == '-'
446
                    && secondPreviousCharacter == '-') {
447
                comment = false;
448
            }
449
            
450
            //in a processingInstruction
451
            if (currentCharacter == '?' && previousCharacter == '<') {
452
                processingInstruction = true;
453
            }
454
            
455
            //out of processingInstruction
456
            if (processingInstruction && currentCharacter == '>'
457
                    && previousCharacter == '?') {
458
                processingInstruction = false;
459
            }
460
            
461
            //this is not comment or a processingInstruction
462
            if (currentCharacter != '!' && previousCharacter == '<'
463
                    && !comment && !processingInstruction) {
464
                count++;
465
            }
466
            
467
            // get target line
468
            if (count == TARGETNUM && currentCharacter != '>') {
469
                buffer.append(currentCharacter);
470
            }
471
            if (count == TARGETNUM && currentCharacter == '>') {
472
                break;
473
            }
474
            thirdPreviousCharacter = secondPreviousCharacter;
475
            secondPreviousCharacter = previousCharacter;
476
            previousCharacter = currentCharacter;
477
            tmp = xml.read();
478
        }
479
        secondLine = buffer.toString();
480
        logMetacat.debug("the second line string is: " + secondLine);
481
        
482
        xml.reset();
483
        return secondLine;
484
    }
485
    
486
    /**
487
	 * Get a schema file name from the schema uri.
488
	 * 
489
	 * @param uri
490
	 *            the uri from which to extract the file name
491
	 * @return a string holding the file name
492
	 */
493
    public static String getSchemaFileNameFromUri(String uri) {
494
		// get filename from systemId
495
		String filename = uri;
496
		
497
		if (filename != null && !(filename.trim()).equals("")) {
498
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
499
			if (slash > -1) {
500
				filename = filename.substring(slash + 1);
501
			}
502
		}
503

    
504
		return filename;
505
	}
506
}
(9-9/9)