Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.service;
28

    
29
import java.io.IOException;
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Vector;
35
import java.util.regex.Matcher;
36
import java.util.regex.Pattern;
37

    
38
import org.apache.log4j.Logger;
39

    
40
import edu.ucsb.nceas.metacat.DBConnection;
41
import edu.ucsb.nceas.metacat.DBConnectionPool;
42
import edu.ucsb.nceas.metacat.DocumentImpl;
43
import edu.ucsb.nceas.metacat.MetaCatServlet;
44
import edu.ucsb.nceas.utilities.FileUtil;
45
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
46
import edu.ucsb.nceas.utilities.StringUtil;
47

    
48
public class XMLSchemaService extends BaseService {
49
	
50
	public static final String NAMESPACEKEYWORD = "xmlns";
51
	
52
	public static final String SCHEMA_DIR = "/schema/";
53
	
54
	private static XMLSchemaService xmlSchemaService = null;
55
	
56
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
57
	
58
	private static boolean useFullSchemaValidation = false;
59
	
60
	private static String documentNamespace = null;
61
	
62
	// all schema objects that represent schemas registered in the db that 
63
	// actually have files on disk.
64
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
65
	
66
	// a convenience list that holds the names of registered namespaces.
67
    private static Vector<String> nameSpaceList = new Vector<String>();
68
    
69
    // a convenience string that holds all name spaces and locations in a space
70
    // delimited format
71
    private static String nameSpaceAndLocationString = ""; 
72
	
73
	/**
74
	 * private constructor since this is a singleton
75
	 */
76
	private XMLSchemaService() {
77
		try {
78
			setUseFullSchemaValidation();
79
			populateRegisteredSchemaList();
80
			createRegisteredNameSpaceList();
81
			createRegisteredNameSpaceAndLocationString();
82
			
83
		} catch (PropertyNotFoundException pnfe) {
84
			logMetacat.error("Could not find property: xml.useFullSchemaValidation. " + 
85
					"Setting to false.");
86
		}
87
	}
88
	
89
	/**
90
	 * Get the single instance of XMLService.
91
	 * 
92
	 * @return the single instance of XMLService
93
	 */
94
	public static XMLSchemaService getInstance() {
95
		if (xmlSchemaService == null) {
96
			xmlSchemaService = new XMLSchemaService();
97
		}
98
		return xmlSchemaService;
99
	}
100
	
101
	public boolean refreshable() {
102
		return true;
103
	}
104
	
105
	/**
106
	 * refresh the persistant values in this service.
107
	 */
108
	protected void doRefresh() {
109
		try {
110
			populateRegisteredSchemaList();
111
			setUseFullSchemaValidation();
112
			createRegisteredNameSpaceList();
113
			createRegisteredNameSpaceAndLocationString();
114
		} catch (PropertyNotFoundException pnfe) {
115
			logMetacat.error("Could not find property: xml.useFullSchemaValidation. " + 
116
					"Setting to false.");
117
		}
118
	}
119
	
120
	/**
121
	 * Gets the registered schema list. This list holds schemas that exist in
122
	 * the xml_catalog table that also have associated files in the schema
123
	 * directory.
124
	 * 
125
	 * @return a list of XMLSchema objects holding registered schema information
126
	 */
127
	public static Vector<XMLSchema> getRegisteredSchemaList() {
128
		return registeredSchemaList;
129
	}
130
	
131
	/**
132
	 * Gets the name space and location string. This is a convenience method.
133
	 * The string will have space delimited namespaces and locations that are
134
	 * held in the registered schema list.
135
	 * 
136
	 * @return a string that holds space delimited registered namespaces and
137
	 *         locations.
138
	 */
139
	public static String getNameSpaceAndLocationString() {
140
		return nameSpaceAndLocationString;
141
	}
142
	
143
	/**
144
	 * Gets a list of name spaces. This is a convenience method. The list will 
145
	 * have all namespaces that are held in the registered schema list.
146
	 * 
147
	 * @return a list that holds registered namespaces.
148
	 */
149
	public static Vector<String> getNameSpaceList() {
150
		return nameSpaceList;
151
	}
152
	
153
	/**
154
	 * Report whether xml parsing is set to use full schema parsing. If full
155
	 * schema parsing is true, new schemas will be validated before being
156
	 * entered into the database and file system.
157
	 * 
158
	 * @return true if the xml.useFullSchemaValidation property is set to true,
159
	 *         false otherwise.
160
	 */
161
	public static boolean useFullSchemaValidation() {
162
		return useFullSchemaValidation;
163
	}
164
	
165
	/**
166
	 * sets the UseFullSchemaValidation variable.  The only way this should be
167
	 * set is in the constructor or the refresh methods.
168
	 */
169
	private static void setUseFullSchemaValidation() throws PropertyNotFoundException {
170
		String strUseFullSchemaValidation = 
171
			PropertyService.getProperty("xml.useFullSchemaValidation");
172
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
173
	}
174

    
175
	/**
176
	 * Populate the list of registered schemas. This reads all schemas in the
177
	 * xml_catalog table and then makes sure the schema actually exists and is
178
	 * readable on disk.
179
	 */
180
	public static void populateRegisteredSchemaList() {
181
		DBConnection conn = null;
182
		int serialNumber = -1;
183
		PreparedStatement pstmt = null;
184
		ResultSet resultSet = null;
185
		registeredSchemaList = new Vector<XMLSchema>();
186

    
187
		// get the system id from the xml_catalog table for all schemas.
188
		String sql = "SELECT public_id, system_id FROM xml_catalog where "
189
				+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
190
		try {
191
			// check out DBConnection
192
			conn = DBConnectionPool
193
					.getDBConnection("XMLService.populateRegisteredSchemaList");
194
			serialNumber = conn.getCheckOutSerialNumber();
195
			pstmt = conn.prepareStatement(sql);
196
			pstmt.execute();
197
			resultSet = pstmt.getResultSet();
198

    
199
			// make sure the schema actually exists on the file system. If so,
200
			// add it
201
			// to the registered schema list.
202
			while (resultSet.next()) {
203
				String fileNamespace = resultSet.getString(1);
204
				String fileLocation = resultSet.getString(2);
205
				XMLSchema xmlSchema = new XMLSchema(fileNamespace);
206
				xmlSchema.setFileName(fileLocation);
207

    
208
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
209
					registeredSchemaList.add(xmlSchema);
210
				} else {
211
					logMetacat.warn("Schema file: " + fileLocation + " is registered "
212
							+ " in the database but does not exist on the file system.");
213
				}
214
			}
215
		} catch (SQLException e) {
216
			logMetacat.error("XMLService.populateRegisteredSchemaList(): "
217
					+ e.getMessage());
218
		} finally {
219
			try {
220
				pstmt.close();
221
			}// try
222
			catch (SQLException sqlE) {
223
				logMetacat.error("Error in XMLService.populateRegisteredSchemaList(): "
224
						+ sqlE.getMessage());
225
			}
226
			DBConnectionPool.returnDBConnection(conn, serialNumber);
227
		}
228
	}	
229
	
230
	/**
231
	 * create a space delimited string of all namespaces and locations
232
	 * in the registered schema list.
233
	 */
234
	private static void createRegisteredNameSpaceAndLocationString() {
235
		boolean firstRow = true;
236
		nameSpaceAndLocationString = "";
237
		
238
		for (XMLSchema xmlSchema : registeredSchemaList) {
239
			if (!firstRow) {
240
				nameSpaceAndLocationString += " ";
241
			}
242
			nameSpaceAndLocationString += xmlSchema.getFileNamespace() + " "
243
					+ xmlSchema.getLocalFileUri();
244
			firstRow = false;
245
		}
246
	}
247

    
248
	/**
249
	 * create a lsit of all namespaces in the registered schema list.
250
	 */
251
	private static void createRegisteredNameSpaceList() {
252
		nameSpaceList = new Vector<String>();
253
		for (XMLSchema xmlSchema : registeredSchemaList) {
254
			nameSpaceList.add(xmlSchema.getFileNamespace());
255
		}
256
	}
257
	
258
	/**
259
	 * Checks to see that all schemas are registered. If a single one in the
260
	 * list is not, this will return false.
261
	 * 
262
	 * @param schemaList
263
	 *            a list of schemas as they appear in xml.
264
	 * @return true if all schemas are registered.
265
	 */
266
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
267
		for (XMLSchema xmlSchema : schemaList) {
268
			if ( ! isSchemaRegistered(xmlSchema)) {
269
				return false;
270
			}
271
		}		
272
		return true;
273
	}
274
	
275
	/**
276
	 * Returns true if the schema is registered.
277
	 * 
278
	 * @param schema
279
	 *            a single schema as it appears in xml
280
	 * @return true if the schema is registered, false otherwise.
281
	 */
282
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
283
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
284
			if (registeredXmlSchema.getLocalFileUri().equals(
285
						xmlSchema.getLocalFileUri())
286
					&& registeredXmlSchema.getFileNamespace().equals(
287
							xmlSchema.getFileNamespace())) {
288
				return true;
289
			}
290
		}
291

    
292
		return false;
293
	}
294
	
295
    /**
296
	 * See if schemas have been specified in the xml:schemalocation attribute.
297
	 * If so, return a vector of the system ids.
298
	 * 
299
	 * @param xml
300
	 *            the document we want to look in for schema location
301
	 * @return a vector of XMLSchema objects, or an empty vector if none are
302
	 *         found
303
	 */
304
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
305
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
306
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
307

    
308
		// no xml. return empty vector
309
		if (xml == null) {
310
			logMetacat.debug("Returning empty schemaList.");
311
			return schemaList;
312
		}
313

    
314
		// Get the "second line" from the xml
315
		String targetLine = getSchemaLine(xml);
316

    
317
		// see if there is a match for xsi.schemaLocation. If so, extract the
318
		// schemas.
319
		if (targetLine != null) {
320
			String regex = "(\\p{Graph}*):schemaLocation=\"(.*)\"";
321
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
322
					| Pattern.DOTALL);
323
			Matcher matcher = pattern.matcher(targetLine);
324
			int i = 0;
325
			while (i < targetLine.length()) {
326
				if (!matcher.find(i)) {
327
					break;
328
				}
329

    
330
				String uri = matcher.group(2);
331
				uri = StringUtil.replaceTabsNewLines(uri);
332
				uri = StringUtil.replaceDuplicateSpaces(uri);
333

    
334
				// each namespace could have several schema locations. parsedUri
335
				// will hold a list of uri and files.
336
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
337
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
338
					if (j + 1 >= parsedUri.size()) {
339
						throw new IOException(
340
								"Odd number of elements found when parsing schema location: "
341
										+ targetLine
342
										+ ". There should be an even number of uri/files in location.");
343
					}
344
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
345
							.get(j + 1));
346
					schemaList.add(xmlSchema);
347
				}
348
				i = matcher.end();
349
			}
350
		}
351

    
352
		logMetacat.debug("Schemas for eml are " + schemaList.toString());
353

    
354
		return schemaList;
355
	}    
356
    
357
    /**
358
	 * Returns all the namespace for an xml document.  This is done by getting
359
	 * the internal namespace declaration (prefix) and looking for xmlns:<prefix>
360
	 * 
361
	 * @param xml
362
	 *            the document to search
363
	 * @return a string holding the namespace
364
	 */
365
	public static String findDocumentNamespace(StringReader xml) throws IOException {
366
		String namespace = null;
367

    
368
		String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
369
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
370
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
371

    
372
		if (xml == null) {
373
			logMetacat.debug("XML doc is null.  There is no namespace.");
374
			return namespace;
375
		}
376

    
377
		String targetLine = getSchemaLine(xml);
378

    
379
		// the prefix is at the beginning of the doc
380
		String prefix = null;
381
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}* ";
382
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
383
		Matcher matcher = pattern.matcher(targetLine);
384
		if (matcher.find()) {
385
			prefix = matcher.group(1).trim();
386
		}
387

    
388
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
389
		// if no prefix was found, we grab the first namespace.
390
		String regex2;
391
		if (prefix != null) {
392
			regex2 = "xmlns:" + prefix + "=\"(.*)\"";
393
		} else {
394
			regex2 = "xmlns:.*=\"(.*)\"";
395
		}
396
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
397
		Matcher matcher2 = pattern2.matcher(targetLine);
398
		if (matcher2.find()) {
399
			namespace = matcher2.group(1);
400

    
401
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
402
				namespace = eml2_0_0NameSpace;
403
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
404
				namespace = eml2_0_1NameSpace;
405
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
406
				namespace = eml2_1_0NameSpace;
407
			}
408
		}
409

    
410
		return namespace;
411
	}
412
    
413
    /**
414
	 * Return the line from xml that holds the metadata like namespace and
415
	 * schema location
416
	 * 
417
	 * @param xml
418
	 *            the document to parse
419
	 * @return the "second" line of the document
420
	 */
421
    private static String getSchemaLine(StringReader xml) throws IOException {
422
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
423
        // find the line
424
        String secondLine = null;
425
        int count = 0;
426
        final int TARGETNUM = 1;
427
        StringBuffer buffer = new StringBuffer();
428
        boolean comment = false;
429
        boolean processingInstruction = false;
430
        char thirdPreviousCharacter = '?';
431
        char secondPreviousCharacter = '?';
432
        char previousCharacter = '?';
433
        char currentCharacter = '?';
434
        int tmp = xml.read();
435
        while (tmp != -1) {
436
            currentCharacter = (char)tmp;
437
            //in a comment
438
            if (currentCharacter == '-' && previousCharacter == '-'
439
                    && secondPreviousCharacter == '!'
440
                    && thirdPreviousCharacter == '<') {
441
                comment = true;
442
            }
443
            //out of comment
444
            if (comment && currentCharacter == '>' && previousCharacter == '-'
445
                    && secondPreviousCharacter == '-') {
446
                comment = false;
447
            }
448
            
449
            //in a processingInstruction
450
            if (currentCharacter == '?' && previousCharacter == '<') {
451
                processingInstruction = true;
452
            }
453
            
454
            //out of processingInstruction
455
            if (processingInstruction && currentCharacter == '>'
456
                    && previousCharacter == '?') {
457
                processingInstruction = false;
458
            }
459
            
460
            //this is not comment or a processingInstruction
461
            if (currentCharacter != '!' && previousCharacter == '<'
462
                    && !comment && !processingInstruction) {
463
                count++;
464
            }
465
            
466
            // get target line
467
            if (count == TARGETNUM && currentCharacter != '>') {
468
                buffer.append(currentCharacter);
469
            }
470
            if (count == TARGETNUM && currentCharacter == '>') {
471
                break;
472
            }
473
            thirdPreviousCharacter = secondPreviousCharacter;
474
            secondPreviousCharacter = previousCharacter;
475
            previousCharacter = currentCharacter;
476
            tmp = xml.read();
477
        }
478
        secondLine = buffer.toString();
479
        logMetacat.debug("the second line string is: " + secondLine);
480
        
481
        xml.reset();
482
        return secondLine;
483
    }
484
    
485
    /**
486
	 * Get a schema file name from the schema uri.
487
	 * 
488
	 * @param uri
489
	 *            the uri from which to extract the file name
490
	 * @return a string holding the file name
491
	 */
492
    public static String getSchemaFileNameFromUri(String uri) {
493
		// get filename from systemId
494
		String filename = uri;
495
		
496
		if (filename != null && !(filename.trim()).equals("")) {
497
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
498
			if (slash > -1) {
499
				filename = filename.substring(slash + 1);
500
			}
501
		}
502

    
503
		return filename;
504
	}
505
}
(9-9/9)