Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.service;
28

    
29
import java.io.IOException;
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Vector;
35
import java.util.regex.Matcher;
36
import java.util.regex.Pattern;
37

    
38
import org.apache.log4j.Logger;
39

    
40
import edu.ucsb.nceas.metacat.DocumentImpl;
41
import edu.ucsb.nceas.metacat.MetaCatServlet;
42
import edu.ucsb.nceas.metacat.database.DBConnection;
43
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
44
import edu.ucsb.nceas.metacat.properties.PropertyService;
45
import edu.ucsb.nceas.metacat.shared.BaseService;
46
import edu.ucsb.nceas.metacat.shared.ServiceException;
47
import edu.ucsb.nceas.utilities.FileUtil;
48
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
49
import edu.ucsb.nceas.utilities.StringUtil;
50

    
51
public class XMLSchemaService extends BaseService {
52
	
53
	public static final String NAMESPACEKEYWORD = "xmlns";
54
	
55
	public static final String SCHEMA_DIR = "/schema/";
56
	
57
	private static XMLSchemaService xmlSchemaService = null;
58
	
59
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
60
	
61
	private static boolean useFullSchemaValidation = false;
62
	
63
//	private static String documentNamespace = null;
64
	
65
	// all schema objects that represent schemas registered in the db that 
66
	// actually have files on disk.
67
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
68
	
69
	// a convenience list that holds the names of registered namespaces.
70
    private static Vector<String> nameSpaceList = new Vector<String>();
71
    
72
    // a convenience string that holds all name spaces and locations in a space
73
    // delimited format
74
    private static String nameSpaceAndLocationString = ""; 
75
	
76
	/**
77
	 * private constructor since this is a singleton
78
	 */
79
	private XMLSchemaService() {
80
        _serviceName = "XMLSchemaService";
81
        try {
82
            doRefresh();
83
        } catch (ServiceException e) {
84
            logMetacat.debug(e.getMessage());
85
        }
86
	}
87
	
88
	/**
89
	 * Get the single instance of XMLService.
90
	 * 
91
	 * @return the single instance of XMLService
92
	 */
93
	public static XMLSchemaService getInstance() {
94
		if (xmlSchemaService == null) {
95
			xmlSchemaService = new XMLSchemaService();
96
		}
97
		return xmlSchemaService;
98
	}
99
	
100
	public boolean refreshable() {
101
		return true;
102
	}
103
	
104
	/**
105
	 * refresh the persistant values in this service.
106
	 */
107
	public void doRefresh() throws ServiceException {
108
	    logMetacat.debug("XMLService.doRefresh - refreshing the schema service.");
109
		try {
110
			populateRegisteredSchemaList();
111
			setUseFullSchemaValidation();
112
			createRegisteredNameSpaceList();
113
			createRegisteredNameSpaceAndLocationString();
114
		} catch (PropertyNotFoundException pnfe) {
115
			logMetacat.error("XMLService.doRefresh - Could not find property: xml.useFullSchemaValidation. " + 
116
					"Setting to false.");
117
		}
118
	}
119
	
120
	public void stop() throws ServiceException {
121
		return;
122
	}
123
	
124
	/**
125
	 * Gets the registered schema list. This list holds schemas that exist in
126
	 * the xml_catalog table that also have associated files in the schema
127
	 * directory.
128
	 * 
129
	 * @return a list of XMLSchema objects holding registered schema information
130
	 */
131
	public Vector<XMLSchema> getRegisteredSchemaList() {
132
		return registeredSchemaList;
133
	}
134
	
135
	/**
136
	 * Gets the name space and location string. This is a convenience method.
137
	 * The string will have space delimited namespaces and locations that are
138
	 * held in the registered schema list.
139
	 * 
140
	 * @return a string that holds space delimited registered namespaces and
141
	 *         locations.
142
	 */
143
	public String getNameSpaceAndLocationString() {
144
		return nameSpaceAndLocationString;
145
	}
146
	
147
	/**
148
	 * Gets a list of name spaces. This is a convenience method. The list will 
149
	 * have all namespaces that are held in the registered schema list.
150
	 * 
151
	 * @return a list that holds registered namespaces.
152
	 */
153
	public Vector<String> getNameSpaceList() {
154
		return nameSpaceList;
155
	}
156
	
157
	/**
158
	 * Report whether xml parsing is set to use full schema parsing. If full
159
	 * schema parsing is true, new schemas will be validated before being
160
	 * entered into the database and file system.
161
	 * 
162
	 * @return true if the xml.useFullSchemaValidation property is set to true,
163
	 *         false otherwise.
164
	 */
165
	public boolean useFullSchemaValidation() {
166
		return useFullSchemaValidation;
167
	}
168
	
169
	/**
170
	 * sets the UseFullSchemaValidation variable.  The only way this should be
171
	 * set is in the constructor or the refresh methods.
172
	 */
173
	private void setUseFullSchemaValidation() throws PropertyNotFoundException {
174
		String strUseFullSchemaValidation = 
175
			PropertyService.getProperty("xml.useFullSchemaValidation");
176
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
177
	}
178

    
179
	/**
180
	 * Populate the list of registered schemas. This reads all schemas in the
181
	 * xml_catalog table and then makes sure the schema actually exists and is
182
	 * readable on disk.
183
	 */
184
	public void populateRegisteredSchemaList() {
185
		DBConnection conn = null;
186
		int serialNumber = -1;
187
		PreparedStatement pstmt = null;
188
		ResultSet resultSet = null;
189
		registeredSchemaList = new Vector<XMLSchema>();
190

    
191
		// get the system id from the xml_catalog table for all schemas.
192
		String sql = "SELECT public_id, system_id FROM xml_catalog where "
193
				+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
194
		try {
195
			// check out DBConnection
196
			conn = DBConnectionPool
197
					.getDBConnection("XMLService.populateRegisteredSchemaList");
198
			serialNumber = conn.getCheckOutSerialNumber();
199
			pstmt = conn.prepareStatement(sql);
200
			logMetacat.debug("XMLService.populateRegisteredSchemaList - Selecting schemas: " + pstmt.toString());
201
			pstmt.execute();
202
			resultSet = pstmt.getResultSet();
203

    
204
			// make sure the schema actually exists on the file system. If so,
205
			// add it to the registered schema list.
206
			while (resultSet.next()) {
207
				String fileNamespace = resultSet.getString(1);
208
				String fileLocation = resultSet.getString(2);
209
				logMetacat.debug("XMLService.populateRegisteredSchemaList - Registering schema: " + fileNamespace + " " + fileLocation);
210
				XMLSchema xmlSchema = new XMLSchema(fileNamespace);
211
				xmlSchema.setFileName(fileLocation);
212

    
213
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
214
					registeredSchemaList.add(xmlSchema);
215
				} else {
216
					logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
217
							+ " in the database but does not exist on the file system.");
218
				}
219
			}
220
		} catch (SQLException e) {
221
			logMetacat.error("XMLService.populateRegisteredSchemaList - SQL Error: "
222
					+ e.getMessage());
223
		} finally {
224
			try {
225
				pstmt.close();
226
			}// try
227
			catch (SQLException sqlE) {
228
				logMetacat.error("XMLSchemaService.populateRegisteredSchemaList - Error in XMLService.populateRegisteredSchemaList(): "
229
						+ sqlE.getMessage());
230
			}
231
			DBConnectionPool.returnDBConnection(conn, serialNumber);
232
		}
233
	}	
234
	
235
	/**
236
	 * create a space delimited string of all namespaces and locations
237
	 * in the registered schema list.
238
	 */
239
	private static void createRegisteredNameSpaceAndLocationString() {
240
		boolean firstRow = true;
241
		nameSpaceAndLocationString = "";
242
		
243
		for (XMLSchema xmlSchema : registeredSchemaList) {
244
			if (!firstRow) {
245
				nameSpaceAndLocationString += " ";
246
			}
247
			nameSpaceAndLocationString += xmlSchema.getFileNamespace() + " "
248
					+ xmlSchema.getLocalFileUri();
249
			firstRow = false;
250
		}
251
	}
252

    
253
	/**
254
	 * create a lsit of all namespaces in the registered schema list.
255
	 */
256
	private static void createRegisteredNameSpaceList() {
257
		nameSpaceList = new Vector<String>();
258
		for (XMLSchema xmlSchema : registeredSchemaList) {
259
			nameSpaceList.add(xmlSchema.getFileNamespace());
260
		}
261
	}
262
	
263
	/**
264
	 * Checks to see that all schemas are registered. If a single one in the
265
	 * list is not, this will return false.
266
	 * 
267
	 * @param schemaList
268
	 *            a list of schemas as they appear in xml.
269
	 * @return true if all schemas are registered.
270
	 */
271
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
272
		for (XMLSchema xmlSchema : schemaList) {
273
			if ( ! isSchemaRegistered(xmlSchema)) {
274
				return false;
275
			}
276
		}		
277
		return true;
278
	}
279
	
280
	/**
281
	 * Returns true if the schema is registered.
282
	 * 
283
	 * @param schema
284
	 *            a single schema as it appears in xml
285
	 * @return true if the schema is registered, false otherwise.
286
	 */
287
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
288
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
289
			if (registeredXmlSchema.getLocalFileUri().equals(
290
						xmlSchema.getLocalFileUri())
291
					&& registeredXmlSchema.getFileNamespace().equals(
292
							xmlSchema.getFileNamespace())) {
293
				return true;
294
			}
295
		}
296

    
297
		return false;
298
	}
299
	
300
    /**
301
	 * See if schemas have been specified in the xml:schemalocation attribute.
302
	 * If so, return a vector of the system ids.
303
	 * 
304
	 * @param xml
305
	 *            the document we want to look in for schema location
306
	 * @return a vector of XMLSchema objects, or an empty vector if none are
307
	 *         found
308
	 */
309
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
310
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
311
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
312

    
313
		// no xml. return empty vector
314
		if (xml == null) {
315
			logMetacat.debug("XMLSchemaService.findSchemasInXML - Returning empty schemaList.");
316
			return schemaList;
317
		}
318

    
319
		// Get the "second line" from the xml
320
		String targetLine = getSchemaLine(xml);
321

    
322
		// see if there is a match for xsi.schemaLocation. If so, extract the
323
		// schemas.
324
		if (targetLine != null) {
325
			String regex = "(\\p{Graph}*):schemaLocation=\"([^\"]*)\"";
326
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
327
					| Pattern.DOTALL);
328
			Matcher matcher = pattern.matcher(targetLine);
329
			int i = 0;
330
			while (i < targetLine.length()) {
331
				if (!matcher.find(i)) {
332
					break;
333
				}
334

    
335
				String uri = matcher.group(2);
336
				uri = StringUtil.replaceTabsNewLines(uri);
337
				uri = StringUtil.replaceDuplicateSpaces(uri);
338

    
339
				// each namespace could have several schema locations. parsedUri
340
				// will hold a list of uri and files.
341
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
342
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
343
					if (j + 1 >= parsedUri.size()) {
344
						throw new IOException(
345
								"Odd number of elements found when parsing schema location: "
346
										+ targetLine
347
										+ ". There should be an even number of uri/files in location.");
348
					}
349
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
350
							.get(j + 1));
351
					schemaList.add(xmlSchema);
352
				}
353
				i = matcher.end();
354
			}
355
		}
356

    
357
		logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for eml are " + schemaList.toString());
358

    
359
		return schemaList;
360
	}    
361
    
362
    /**
363
	 * Returns all the namespace for an xml document.  This is done by getting
364
	 * the internal namespace declaration (prefix) and looking for xmlns:<prefix>
365
	 * 
366
	 * @param xml
367
	 *            the document to search
368
	 * @return a string holding the namespace
369
	 */
370
	public static String findDocumentNamespace(StringReader xml) throws IOException {
371
		String namespace = null;
372

    
373
		String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
374
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
375
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
376

    
377
		if (xml == null) {
378
			logMetacat.debug("XMLSchemaService.findDocumentNamespace - XML doc is null.  There is no namespace.");
379
			return namespace;
380
		}
381

    
382
		String targetLine = getSchemaLine(xml);
383

    
384
		// the prefix is at the beginning of the doc
385
		String prefix = null;
386
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}* ";
387
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
388
		Matcher matcher = pattern.matcher(targetLine);
389
		if (matcher.find()) {
390
			prefix = matcher.group(1).trim();
391
		}
392

    
393
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
394
		// if no prefix was found, we grab the first namespace.
395
		String regex2;
396
		if (prefix != null) {
397
			regex2 = "xmlns:" + prefix + "=\"(.*)\"";
398
		} else {
399
			regex2 = "xmlns:.*=\"(.*)\"";
400
		}
401
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
402
		Matcher matcher2 = pattern2.matcher(targetLine);
403
		if (matcher2.find()) {
404
			namespace = matcher2.group(1);
405

    
406
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
407
				namespace = eml2_0_0NameSpace;
408
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
409
				namespace = eml2_0_1NameSpace;
410
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
411
				namespace = eml2_1_0NameSpace;
412
			}
413
		}
414

    
415
		return namespace;
416
	}
417
    
418
    /**
419
	 * Return the line from xml that holds the metadata like namespace and
420
	 * schema location
421
	 * 
422
	 * @param xml
423
	 *            the document to parse
424
	 * @return the "second" line of the document
425
	 */
426
    private static String getSchemaLine(StringReader xml) throws IOException {
427
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
428
        // find the line
429
        String secondLine = null;
430
        int count = 0;
431
        final int TARGETNUM = 1;
432
        StringBuffer buffer = new StringBuffer();
433
        boolean comment = false;
434
        boolean processingInstruction = false;
435
        char thirdPreviousCharacter = '?';
436
        char secondPreviousCharacter = '?';
437
        char previousCharacter = '?';
438
        char currentCharacter = '?';
439
        int tmp = xml.read();
440
        while (tmp != -1) {
441
            currentCharacter = (char)tmp;
442
            //in a comment
443
            if (currentCharacter == '-' && previousCharacter == '-'
444
                    && secondPreviousCharacter == '!'
445
                    && thirdPreviousCharacter == '<') {
446
                comment = true;
447
            }
448
            //out of comment
449
            if (comment && currentCharacter == '>' && previousCharacter == '-'
450
                    && secondPreviousCharacter == '-') {
451
                comment = false;
452
            }
453
            
454
            //in a processingInstruction
455
            if (currentCharacter == '?' && previousCharacter == '<') {
456
                processingInstruction = true;
457
            }
458
            
459
            //out of processingInstruction
460
            if (processingInstruction && currentCharacter == '>'
461
                    && previousCharacter == '?') {
462
                processingInstruction = false;
463
            }
464
            
465
            //this is not comment or a processingInstruction
466
            if (currentCharacter != '!' && previousCharacter == '<'
467
                    && !comment && !processingInstruction) {
468
                count++;
469
            }
470
            
471
            // get target line
472
            if (count == TARGETNUM && currentCharacter != '>') {
473
                buffer.append(currentCharacter);
474
            }
475
            if (count == TARGETNUM && currentCharacter == '>') {
476
                break;
477
            }
478
            thirdPreviousCharacter = secondPreviousCharacter;
479
            secondPreviousCharacter = previousCharacter;
480
            previousCharacter = currentCharacter;
481
            tmp = xml.read();
482
        }
483
        secondLine = buffer.toString();
484
        logMetacat.debug("XMLSchemaService.getSchemaLine - the second line string is: " + secondLine);
485
        
486
        xml.reset();
487
        return secondLine;
488
    }
489
    
490
    /**
491
	 * Get a schema file name from the schema uri.
492
	 * 
493
	 * @param uri
494
	 *            the uri from which to extract the file name
495
	 * @return a string holding the file name
496
	 */
497
    public static String getSchemaFileNameFromUri(String uri) {
498
		// get filename from systemId
499
		String filename = uri;
500
		
501
		if (filename != null && !(filename.trim()).equals("")) {
502
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
503
			if (slash > -1) {
504
				filename = filename.substring(slash + 1);
505
			}
506
		}
507

    
508
		return filename;
509
	}
510
}
(4-4/4)