Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.service;
28

    
29
import java.io.IOException;
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Vector;
35
import java.util.regex.Matcher;
36
import java.util.regex.Pattern;
37

    
38
import org.apache.log4j.Logger;
39

    
40
import edu.ucsb.nceas.metacat.DocumentImpl;
41
import edu.ucsb.nceas.metacat.MetaCatServlet;
42
import edu.ucsb.nceas.metacat.database.DBConnection;
43
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
44
import edu.ucsb.nceas.metacat.properties.PropertyService;
45
import edu.ucsb.nceas.metacat.shared.BaseService;
46
import edu.ucsb.nceas.metacat.shared.ServiceException;
47
import edu.ucsb.nceas.utilities.FileUtil;
48
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
49
import edu.ucsb.nceas.utilities.StringUtil;
50

    
51
public class XMLSchemaService extends BaseService {
52
	
53
	public static final String NAMESPACEKEYWORD = "xmlns";
54
	
55
	public static final String SCHEMA_DIR = "/schema/";
56
	
57
	private static XMLSchemaService xmlSchemaService = null;
58
	
59
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
60
	
61
	private static boolean useFullSchemaValidation = false;
62
	
63
//	private static String documentNamespace = null;
64
	
65
	// all schema objects that represent schemas registered in the db that 
66
	// actually have files on disk.
67
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
68
	
69
	// a convenience list that holds the names of registered namespaces.
70
    private static Vector<String> nameSpaceList = new Vector<String>();
71
    
72
    // a convenience string that holds all name spaces and locations in a space
73
    // delimited format
74
    private static String nameSpaceAndLocationString = ""; 
75
	
76
	/**
77
	 * private constructor since this is a singleton
78
	 */
79
	private XMLSchemaService() {
80
		try {
81
			_serviceName = "XMLSchemaService";
82
			
83
			setUseFullSchemaValidation();
84
			populateRegisteredSchemaList();
85
			createRegisteredNameSpaceList();
86
			createRegisteredNameSpaceAndLocationString();
87
			
88
		} catch (PropertyNotFoundException pnfe) {
89
			logMetacat.error("XMLService.XMLSchemaService() - Could not find property: xml.useFullSchemaValidation. " + 
90
					"Setting to false.");
91
		}
92
	}
93
	
94
	/**
95
	 * Get the single instance of XMLService.
96
	 * 
97
	 * @return the single instance of XMLService
98
	 */
99
	public static XMLSchemaService getInstance() {
100
		if (xmlSchemaService == null) {
101
			xmlSchemaService = new XMLSchemaService();
102
		}
103
		return xmlSchemaService;
104
	}
105
	
106
	public boolean refreshable() {
107
		return true;
108
	}
109
	
110
	/**
111
	 * refresh the persistant values in this service.
112
	 */
113
	public void doRefresh() throws ServiceException {
114
		try {
115
			populateRegisteredSchemaList();
116
			setUseFullSchemaValidation();
117
			createRegisteredNameSpaceList();
118
			createRegisteredNameSpaceAndLocationString();
119
		} catch (PropertyNotFoundException pnfe) {
120
			logMetacat.error("XMLService.doRefresh - Could not find property: xml.useFullSchemaValidation. " + 
121
					"Setting to false.");
122
		}
123
	}
124
	
125
	public void stop() throws ServiceException {
126
		return;
127
	}
128
	
129
	/**
130
	 * Gets the registered schema list. This list holds schemas that exist in
131
	 * the xml_catalog table that also have associated files in the schema
132
	 * directory.
133
	 * 
134
	 * @return a list of XMLSchema objects holding registered schema information
135
	 */
136
	public static Vector<XMLSchema> getRegisteredSchemaList() {
137
		return registeredSchemaList;
138
	}
139
	
140
	/**
141
	 * Gets the name space and location string. This is a convenience method.
142
	 * The string will have space delimited namespaces and locations that are
143
	 * held in the registered schema list.
144
	 * 
145
	 * @return a string that holds space delimited registered namespaces and
146
	 *         locations.
147
	 */
148
	public static String getNameSpaceAndLocationString() {
149
		return nameSpaceAndLocationString;
150
	}
151
	
152
	/**
153
	 * Gets a list of name spaces. This is a convenience method. The list will 
154
	 * have all namespaces that are held in the registered schema list.
155
	 * 
156
	 * @return a list that holds registered namespaces.
157
	 */
158
	public static Vector<String> getNameSpaceList() {
159
		return nameSpaceList;
160
	}
161
	
162
	/**
163
	 * Report whether xml parsing is set to use full schema parsing. If full
164
	 * schema parsing is true, new schemas will be validated before being
165
	 * entered into the database and file system.
166
	 * 
167
	 * @return true if the xml.useFullSchemaValidation property is set to true,
168
	 *         false otherwise.
169
	 */
170
	public static boolean useFullSchemaValidation() {
171
		return useFullSchemaValidation;
172
	}
173
	
174
	/**
175
	 * sets the UseFullSchemaValidation variable.  The only way this should be
176
	 * set is in the constructor or the refresh methods.
177
	 */
178
	private static void setUseFullSchemaValidation() throws PropertyNotFoundException {
179
		String strUseFullSchemaValidation = 
180
			PropertyService.getProperty("xml.useFullSchemaValidation");
181
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
182
	}
183

    
184
	/**
185
	 * Populate the list of registered schemas. This reads all schemas in the
186
	 * xml_catalog table and then makes sure the schema actually exists and is
187
	 * readable on disk.
188
	 */
189
	public static void populateRegisteredSchemaList() {
190
		DBConnection conn = null;
191
		int serialNumber = -1;
192
		PreparedStatement pstmt = null;
193
		ResultSet resultSet = null;
194
		registeredSchemaList = new Vector<XMLSchema>();
195

    
196
		// get the system id from the xml_catalog table for all schemas.
197
		String sql = "SELECT public_id, system_id FROM xml_catalog where "
198
				+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
199
		try {
200
			// check out DBConnection
201
			conn = DBConnectionPool
202
					.getDBConnection("XMLService.populateRegisteredSchemaList");
203
			serialNumber = conn.getCheckOutSerialNumber();
204
			pstmt = conn.prepareStatement(sql);
205
			logMetacat.debug("XMLService.populateRegisteredSchemaList - Selecting schemas: " + pstmt.toString());
206
			pstmt.execute();
207
			resultSet = pstmt.getResultSet();
208

    
209
			// make sure the schema actually exists on the file system. If so,
210
			// add it to the registered schema list.
211
			while (resultSet.next()) {
212
				String fileNamespace = resultSet.getString(1);
213
				String fileLocation = resultSet.getString(2);
214
				logMetacat.debug("XMLService.populateRegisteredSchemaList - Registering schema: " + fileNamespace + " " + fileLocation);
215
				XMLSchema xmlSchema = new XMLSchema(fileNamespace);
216
				xmlSchema.setFileName(fileLocation);
217

    
218
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
219
					registeredSchemaList.add(xmlSchema);
220
				} else {
221
					logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
222
							+ " in the database but does not exist on the file system.");
223
				}
224
			}
225
		} catch (SQLException e) {
226
			logMetacat.error("XMLService.populateRegisteredSchemaList - SQL Error: "
227
					+ e.getMessage());
228
		} finally {
229
			try {
230
				pstmt.close();
231
			}// try
232
			catch (SQLException sqlE) {
233
				logMetacat.error("XMLSchemaService.populateRegisteredSchemaList - Error in XMLService.populateRegisteredSchemaList(): "
234
						+ sqlE.getMessage());
235
			}
236
			DBConnectionPool.returnDBConnection(conn, serialNumber);
237
		}
238
	}	
239
	
240
	/**
241
	 * create a space delimited string of all namespaces and locations
242
	 * in the registered schema list.
243
	 */
244
	private static void createRegisteredNameSpaceAndLocationString() {
245
		boolean firstRow = true;
246
		nameSpaceAndLocationString = "";
247
		
248
		for (XMLSchema xmlSchema : registeredSchemaList) {
249
			if (!firstRow) {
250
				nameSpaceAndLocationString += " ";
251
			}
252
			nameSpaceAndLocationString += xmlSchema.getFileNamespace() + " "
253
					+ xmlSchema.getLocalFileUri();
254
			firstRow = false;
255
		}
256
	}
257

    
258
	/**
259
	 * create a lsit of all namespaces in the registered schema list.
260
	 */
261
	private static void createRegisteredNameSpaceList() {
262
		nameSpaceList = new Vector<String>();
263
		for (XMLSchema xmlSchema : registeredSchemaList) {
264
			nameSpaceList.add(xmlSchema.getFileNamespace());
265
		}
266
	}
267
	
268
	/**
269
	 * Checks to see that all schemas are registered. If a single one in the
270
	 * list is not, this will return false.
271
	 * 
272
	 * @param schemaList
273
	 *            a list of schemas as they appear in xml.
274
	 * @return true if all schemas are registered.
275
	 */
276
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
277
		for (XMLSchema xmlSchema : schemaList) {
278
			if ( ! isSchemaRegistered(xmlSchema)) {
279
				return false;
280
			}
281
		}		
282
		return true;
283
	}
284
	
285
	/**
286
	 * Returns true if the schema is registered.
287
	 * 
288
	 * @param schema
289
	 *            a single schema as it appears in xml
290
	 * @return true if the schema is registered, false otherwise.
291
	 */
292
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
293
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
294
			if (registeredXmlSchema.getLocalFileUri().equals(
295
						xmlSchema.getLocalFileUri())
296
					&& registeredXmlSchema.getFileNamespace().equals(
297
							xmlSchema.getFileNamespace())) {
298
				return true;
299
			}
300
		}
301

    
302
		return false;
303
	}
304
	
305
    /**
306
	 * See if schemas have been specified in the xml:schemalocation attribute.
307
	 * If so, return a vector of the system ids.
308
	 * 
309
	 * @param xml
310
	 *            the document we want to look in for schema location
311
	 * @return a vector of XMLSchema objects, or an empty vector if none are
312
	 *         found
313
	 */
314
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
315
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
316
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
317

    
318
		// no xml. return empty vector
319
		if (xml == null) {
320
			logMetacat.debug("XMLSchemaService.findSchemasInXML - Returning empty schemaList.");
321
			return schemaList;
322
		}
323

    
324
		// Get the "second line" from the xml
325
		String targetLine = getSchemaLine(xml);
326

    
327
		// see if there is a match for xsi.schemaLocation. If so, extract the
328
		// schemas.
329
		if (targetLine != null) {
330
			String regex = "(\\p{Graph}*):schemaLocation=\"(.*)\"";
331
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
332
					| Pattern.DOTALL);
333
			Matcher matcher = pattern.matcher(targetLine);
334
			int i = 0;
335
			while (i < targetLine.length()) {
336
				if (!matcher.find(i)) {
337
					break;
338
				}
339

    
340
				String uri = matcher.group(2);
341
				uri = StringUtil.replaceTabsNewLines(uri);
342
				uri = StringUtil.replaceDuplicateSpaces(uri);
343

    
344
				// each namespace could have several schema locations. parsedUri
345
				// will hold a list of uri and files.
346
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
347
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
348
					if (j + 1 >= parsedUri.size()) {
349
						throw new IOException(
350
								"Odd number of elements found when parsing schema location: "
351
										+ targetLine
352
										+ ". There should be an even number of uri/files in location.");
353
					}
354
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
355
							.get(j + 1));
356
					schemaList.add(xmlSchema);
357
				}
358
				i = matcher.end();
359
			}
360
		}
361

    
362
		logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for eml are " + schemaList.toString());
363

    
364
		return schemaList;
365
	}    
366
    
367
    /**
368
	 * Returns all the namespace for an xml document.  This is done by getting
369
	 * the internal namespace declaration (prefix) and looking for xmlns:<prefix>
370
	 * 
371
	 * @param xml
372
	 *            the document to search
373
	 * @return a string holding the namespace
374
	 */
375
	public static String findDocumentNamespace(StringReader xml) throws IOException {
376
		String namespace = null;
377

    
378
		String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
379
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
380
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
381

    
382
		if (xml == null) {
383
			logMetacat.debug("XMLSchemaService.findDocumentNamespace - XML doc is null.  There is no namespace.");
384
			return namespace;
385
		}
386

    
387
		String targetLine = getSchemaLine(xml);
388

    
389
		// the prefix is at the beginning of the doc
390
		String prefix = null;
391
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}* ";
392
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
393
		Matcher matcher = pattern.matcher(targetLine);
394
		if (matcher.find()) {
395
			prefix = matcher.group(1).trim();
396
		}
397

    
398
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
399
		// if no prefix was found, we grab the first namespace.
400
		String regex2;
401
		if (prefix != null) {
402
			regex2 = "xmlns:" + prefix + "=\"(.*)\"";
403
		} else {
404
			regex2 = "xmlns:.*=\"(.*)\"";
405
		}
406
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
407
		Matcher matcher2 = pattern2.matcher(targetLine);
408
		if (matcher2.find()) {
409
			namespace = matcher2.group(1);
410

    
411
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
412
				namespace = eml2_0_0NameSpace;
413
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
414
				namespace = eml2_0_1NameSpace;
415
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
416
				namespace = eml2_1_0NameSpace;
417
			}
418
		}
419

    
420
		return namespace;
421
	}
422
    
423
    /**
424
	 * Return the line from xml that holds the metadata like namespace and
425
	 * schema location
426
	 * 
427
	 * @param xml
428
	 *            the document to parse
429
	 * @return the "second" line of the document
430
	 */
431
    private static String getSchemaLine(StringReader xml) throws IOException {
432
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
433
        // find the line
434
        String secondLine = null;
435
        int count = 0;
436
        final int TARGETNUM = 1;
437
        StringBuffer buffer = new StringBuffer();
438
        boolean comment = false;
439
        boolean processingInstruction = false;
440
        char thirdPreviousCharacter = '?';
441
        char secondPreviousCharacter = '?';
442
        char previousCharacter = '?';
443
        char currentCharacter = '?';
444
        int tmp = xml.read();
445
        while (tmp != -1) {
446
            currentCharacter = (char)tmp;
447
            //in a comment
448
            if (currentCharacter == '-' && previousCharacter == '-'
449
                    && secondPreviousCharacter == '!'
450
                    && thirdPreviousCharacter == '<') {
451
                comment = true;
452
            }
453
            //out of comment
454
            if (comment && currentCharacter == '>' && previousCharacter == '-'
455
                    && secondPreviousCharacter == '-') {
456
                comment = false;
457
            }
458
            
459
            //in a processingInstruction
460
            if (currentCharacter == '?' && previousCharacter == '<') {
461
                processingInstruction = true;
462
            }
463
            
464
            //out of processingInstruction
465
            if (processingInstruction && currentCharacter == '>'
466
                    && previousCharacter == '?') {
467
                processingInstruction = false;
468
            }
469
            
470
            //this is not comment or a processingInstruction
471
            if (currentCharacter != '!' && previousCharacter == '<'
472
                    && !comment && !processingInstruction) {
473
                count++;
474
            }
475
            
476
            // get target line
477
            if (count == TARGETNUM && currentCharacter != '>') {
478
                buffer.append(currentCharacter);
479
            }
480
            if (count == TARGETNUM && currentCharacter == '>') {
481
                break;
482
            }
483
            thirdPreviousCharacter = secondPreviousCharacter;
484
            secondPreviousCharacter = previousCharacter;
485
            previousCharacter = currentCharacter;
486
            tmp = xml.read();
487
        }
488
        secondLine = buffer.toString();
489
        logMetacat.debug("XMLSchemaService.getSchemaLine - the second line string is: " + secondLine);
490
        
491
        xml.reset();
492
        return secondLine;
493
    }
494
    
495
    /**
496
	 * Get a schema file name from the schema uri.
497
	 * 
498
	 * @param uri
499
	 *            the uri from which to extract the file name
500
	 * @return a string holding the file name
501
	 */
502
    public static String getSchemaFileNameFromUri(String uri) {
503
		// get filename from systemId
504
		String filename = uri;
505
		
506
		if (filename != null && !(filename.trim()).equals("")) {
507
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
508
			if (slash > -1) {
509
				filename = filename.substring(slash + 1);
510
			}
511
		}
512

    
513
		return filename;
514
	}
515
}
(4-4/4)