Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.service;
28

    
29
import java.io.IOException;
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Vector;
35
import java.util.regex.Matcher;
36
import java.util.regex.Pattern;
37

    
38
import org.apache.log4j.Logger;
39

    
40
import edu.ucsb.nceas.metacat.DocumentImpl;
41
import edu.ucsb.nceas.metacat.MetaCatServlet;
42
import edu.ucsb.nceas.metacat.database.DBConnection;
43
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
44
import edu.ucsb.nceas.metacat.shared.BaseService;
45
import edu.ucsb.nceas.metacat.shared.ServiceException;
46
import edu.ucsb.nceas.utilities.FileUtil;
47
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
48
import edu.ucsb.nceas.utilities.StringUtil;
49

    
50
public class XMLSchemaService extends BaseService {
51
	
52
	public static final String NAMESPACEKEYWORD = "xmlns";
53
	
54
	public static final String SCHEMA_DIR = "/schema/";
55
	
56
	private static XMLSchemaService xmlSchemaService = null;
57
	
58
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
59
	
60
	private static boolean useFullSchemaValidation = false;
61
	
62
//	private static String documentNamespace = null;
63
	
64
	// all schema objects that represent schemas registered in the db that 
65
	// actually have files on disk.
66
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
67
	
68
	// a convenience list that holds the names of registered namespaces.
69
    private static Vector<String> nameSpaceList = new Vector<String>();
70
    
71
    // a convenience string that holds all name spaces and locations in a space
72
    // delimited format
73
    private static String nameSpaceAndLocationString = ""; 
74
	
75
	/**
76
	 * private constructor since this is a singleton
77
	 */
78
	private XMLSchemaService() {
79
		try {
80
			_serviceName = "XMLSchemaService";
81
			
82
			setUseFullSchemaValidation();
83
			populateRegisteredSchemaList();
84
			createRegisteredNameSpaceList();
85
			createRegisteredNameSpaceAndLocationString();
86
			
87
		} catch (PropertyNotFoundException pnfe) {
88
			logMetacat.error("Could not find property: xml.useFullSchemaValidation. " + 
89
					"Setting to false.");
90
		}
91
	}
92
	
93
	/**
94
	 * Get the single instance of XMLService.
95
	 * 
96
	 * @return the single instance of XMLService
97
	 */
98
	public static XMLSchemaService getInstance() {
99
		if (xmlSchemaService == null) {
100
			xmlSchemaService = new XMLSchemaService();
101
		}
102
		return xmlSchemaService;
103
	}
104
	
105
	public boolean refreshable() {
106
		return true;
107
	}
108
	
109
	/**
110
	 * refresh the persistant values in this service.
111
	 */
112
	public void doRefresh() throws ServiceException {
113
		try {
114
			populateRegisteredSchemaList();
115
			setUseFullSchemaValidation();
116
			createRegisteredNameSpaceList();
117
			createRegisteredNameSpaceAndLocationString();
118
		} catch (PropertyNotFoundException pnfe) {
119
			logMetacat.error("Could not find property: xml.useFullSchemaValidation. " + 
120
					"Setting to false.");
121
		}
122
	}
123
	
124
	public void stop() throws ServiceException {
125
		return;
126
	}
127
	
128
	/**
129
	 * Gets the registered schema list. This list holds schemas that exist in
130
	 * the xml_catalog table that also have associated files in the schema
131
	 * directory.
132
	 * 
133
	 * @return a list of XMLSchema objects holding registered schema information
134
	 */
135
	public static Vector<XMLSchema> getRegisteredSchemaList() {
136
		return registeredSchemaList;
137
	}
138
	
139
	/**
140
	 * Gets the name space and location string. This is a convenience method.
141
	 * The string will have space delimited namespaces and locations that are
142
	 * held in the registered schema list.
143
	 * 
144
	 * @return a string that holds space delimited registered namespaces and
145
	 *         locations.
146
	 */
147
	public static String getNameSpaceAndLocationString() {
148
		return nameSpaceAndLocationString;
149
	}
150
	
151
	/**
152
	 * Gets a list of name spaces. This is a convenience method. The list will 
153
	 * have all namespaces that are held in the registered schema list.
154
	 * 
155
	 * @return a list that holds registered namespaces.
156
	 */
157
	public static Vector<String> getNameSpaceList() {
158
		return nameSpaceList;
159
	}
160
	
161
	/**
162
	 * Report whether xml parsing is set to use full schema parsing. If full
163
	 * schema parsing is true, new schemas will be validated before being
164
	 * entered into the database and file system.
165
	 * 
166
	 * @return true if the xml.useFullSchemaValidation property is set to true,
167
	 *         false otherwise.
168
	 */
169
	public static boolean useFullSchemaValidation() {
170
		return useFullSchemaValidation;
171
	}
172
	
173
	/**
174
	 * sets the UseFullSchemaValidation variable.  The only way this should be
175
	 * set is in the constructor or the refresh methods.
176
	 */
177
	private static void setUseFullSchemaValidation() throws PropertyNotFoundException {
178
		String strUseFullSchemaValidation = 
179
			PropertyService.getProperty("xml.useFullSchemaValidation");
180
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
181
	}
182

    
183
	/**
184
	 * Populate the list of registered schemas. This reads all schemas in the
185
	 * xml_catalog table and then makes sure the schema actually exists and is
186
	 * readable on disk.
187
	 */
188
	public static void populateRegisteredSchemaList() {
189
		DBConnection conn = null;
190
		int serialNumber = -1;
191
		PreparedStatement pstmt = null;
192
		ResultSet resultSet = null;
193
		registeredSchemaList = new Vector<XMLSchema>();
194

    
195
		// get the system id from the xml_catalog table for all schemas.
196
		String sql = "SELECT public_id, system_id FROM xml_catalog where "
197
				+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
198
		try {
199
			// check out DBConnection
200
			conn = DBConnectionPool
201
					.getDBConnection("XMLService.populateRegisteredSchemaList");
202
			serialNumber = conn.getCheckOutSerialNumber();
203
			pstmt = conn.prepareStatement(sql);
204
			logMetacat.debug("Selecting schemas: " + pstmt.toString());
205
			pstmt.execute();
206
			resultSet = pstmt.getResultSet();
207

    
208
			// make sure the schema actually exists on the file system. If so,
209
			// add it to the registered schema list.
210
			while (resultSet.next()) {
211
				String fileNamespace = resultSet.getString(1);
212
				String fileLocation = resultSet.getString(2);
213
				logMetacat.debug("Registering schema: " + fileNamespace + " " + fileLocation);
214
				XMLSchema xmlSchema = new XMLSchema(fileNamespace);
215
				xmlSchema.setFileName(fileLocation);
216

    
217
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
218
					registeredSchemaList.add(xmlSchema);
219
				} else {
220
					logMetacat.warn("Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
221
							+ " in the database but does not exist on the file system.");
222
				}
223
			}
224
		} catch (SQLException e) {
225
			logMetacat.error("XMLService.populateRegisteredSchemaList(): "
226
					+ e.getMessage());
227
		} finally {
228
			try {
229
				pstmt.close();
230
			}// try
231
			catch (SQLException sqlE) {
232
				logMetacat.error("Error in XMLService.populateRegisteredSchemaList(): "
233
						+ sqlE.getMessage());
234
			}
235
			DBConnectionPool.returnDBConnection(conn, serialNumber);
236
		}
237
	}	
238
	
239
	/**
240
	 * create a space delimited string of all namespaces and locations
241
	 * in the registered schema list.
242
	 */
243
	private static void createRegisteredNameSpaceAndLocationString() {
244
		boolean firstRow = true;
245
		nameSpaceAndLocationString = "";
246
		
247
		for (XMLSchema xmlSchema : registeredSchemaList) {
248
			if (!firstRow) {
249
				nameSpaceAndLocationString += " ";
250
			}
251
			nameSpaceAndLocationString += xmlSchema.getFileNamespace() + " "
252
					+ xmlSchema.getLocalFileUri();
253
			firstRow = false;
254
		}
255
	}
256

    
257
	/**
258
	 * create a lsit of all namespaces in the registered schema list.
259
	 */
260
	private static void createRegisteredNameSpaceList() {
261
		nameSpaceList = new Vector<String>();
262
		for (XMLSchema xmlSchema : registeredSchemaList) {
263
			nameSpaceList.add(xmlSchema.getFileNamespace());
264
		}
265
	}
266
	
267
	/**
268
	 * Checks to see that all schemas are registered. If a single one in the
269
	 * list is not, this will return false.
270
	 * 
271
	 * @param schemaList
272
	 *            a list of schemas as they appear in xml.
273
	 * @return true if all schemas are registered.
274
	 */
275
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
276
		for (XMLSchema xmlSchema : schemaList) {
277
			if ( ! isSchemaRegistered(xmlSchema)) {
278
				return false;
279
			}
280
		}		
281
		return true;
282
	}
283
	
284
	/**
285
	 * Returns true if the schema is registered.
286
	 * 
287
	 * @param schema
288
	 *            a single schema as it appears in xml
289
	 * @return true if the schema is registered, false otherwise.
290
	 */
291
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
292
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
293
			if (registeredXmlSchema.getLocalFileUri().equals(
294
						xmlSchema.getLocalFileUri())
295
					&& registeredXmlSchema.getFileNamespace().equals(
296
							xmlSchema.getFileNamespace())) {
297
				return true;
298
			}
299
		}
300

    
301
		return false;
302
	}
303
	
304
    /**
305
	 * See if schemas have been specified in the xml:schemalocation attribute.
306
	 * If so, return a vector of the system ids.
307
	 * 
308
	 * @param xml
309
	 *            the document we want to look in for schema location
310
	 * @return a vector of XMLSchema objects, or an empty vector if none are
311
	 *         found
312
	 */
313
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
314
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
315
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
316

    
317
		// no xml. return empty vector
318
		if (xml == null) {
319
			logMetacat.debug("Returning empty schemaList.");
320
			return schemaList;
321
		}
322

    
323
		// Get the "second line" from the xml
324
		String targetLine = getSchemaLine(xml);
325

    
326
		// see if there is a match for xsi.schemaLocation. If so, extract the
327
		// schemas.
328
		if (targetLine != null) {
329
			String regex = "(\\p{Graph}*):schemaLocation=\"(.*)\"";
330
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
331
					| Pattern.DOTALL);
332
			Matcher matcher = pattern.matcher(targetLine);
333
			int i = 0;
334
			while (i < targetLine.length()) {
335
				if (!matcher.find(i)) {
336
					break;
337
				}
338

    
339
				String uri = matcher.group(2);
340
				uri = StringUtil.replaceTabsNewLines(uri);
341
				uri = StringUtil.replaceDuplicateSpaces(uri);
342

    
343
				// each namespace could have several schema locations. parsedUri
344
				// will hold a list of uri and files.
345
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
346
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
347
					if (j + 1 >= parsedUri.size()) {
348
						throw new IOException(
349
								"Odd number of elements found when parsing schema location: "
350
										+ targetLine
351
										+ ". There should be an even number of uri/files in location.");
352
					}
353
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
354
							.get(j + 1));
355
					schemaList.add(xmlSchema);
356
				}
357
				i = matcher.end();
358
			}
359
		}
360

    
361
		logMetacat.debug("Schemas for eml are " + schemaList.toString());
362

    
363
		return schemaList;
364
	}    
365
    
366
    /**
367
	 * Returns all the namespace for an xml document.  This is done by getting
368
	 * the internal namespace declaration (prefix) and looking for xmlns:<prefix>
369
	 * 
370
	 * @param xml
371
	 *            the document to search
372
	 * @return a string holding the namespace
373
	 */
374
	public static String findDocumentNamespace(StringReader xml) throws IOException {
375
		String namespace = null;
376

    
377
		String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
378
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
379
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
380

    
381
		if (xml == null) {
382
			logMetacat.debug("XML doc is null.  There is no namespace.");
383
			return namespace;
384
		}
385

    
386
		String targetLine = getSchemaLine(xml);
387

    
388
		// the prefix is at the beginning of the doc
389
		String prefix = null;
390
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}* ";
391
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
392
		Matcher matcher = pattern.matcher(targetLine);
393
		if (matcher.find()) {
394
			prefix = matcher.group(1).trim();
395
		}
396

    
397
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
398
		// if no prefix was found, we grab the first namespace.
399
		String regex2;
400
		if (prefix != null) {
401
			regex2 = "xmlns:" + prefix + "=\"(.*)\"";
402
		} else {
403
			regex2 = "xmlns:.*=\"(.*)\"";
404
		}
405
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
406
		Matcher matcher2 = pattern2.matcher(targetLine);
407
		if (matcher2.find()) {
408
			namespace = matcher2.group(1);
409

    
410
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
411
				namespace = eml2_0_0NameSpace;
412
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
413
				namespace = eml2_0_1NameSpace;
414
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
415
				namespace = eml2_1_0NameSpace;
416
			}
417
		}
418

    
419
		return namespace;
420
	}
421
    
422
    /**
423
	 * Return the line from xml that holds the metadata like namespace and
424
	 * schema location
425
	 * 
426
	 * @param xml
427
	 *            the document to parse
428
	 * @return the "second" line of the document
429
	 */
430
    private static String getSchemaLine(StringReader xml) throws IOException {
431
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
432
        // find the line
433
        String secondLine = null;
434
        int count = 0;
435
        final int TARGETNUM = 1;
436
        StringBuffer buffer = new StringBuffer();
437
        boolean comment = false;
438
        boolean processingInstruction = false;
439
        char thirdPreviousCharacter = '?';
440
        char secondPreviousCharacter = '?';
441
        char previousCharacter = '?';
442
        char currentCharacter = '?';
443
        int tmp = xml.read();
444
        while (tmp != -1) {
445
            currentCharacter = (char)tmp;
446
            //in a comment
447
            if (currentCharacter == '-' && previousCharacter == '-'
448
                    && secondPreviousCharacter == '!'
449
                    && thirdPreviousCharacter == '<') {
450
                comment = true;
451
            }
452
            //out of comment
453
            if (comment && currentCharacter == '>' && previousCharacter == '-'
454
                    && secondPreviousCharacter == '-') {
455
                comment = false;
456
            }
457
            
458
            //in a processingInstruction
459
            if (currentCharacter == '?' && previousCharacter == '<') {
460
                processingInstruction = true;
461
            }
462
            
463
            //out of processingInstruction
464
            if (processingInstruction && currentCharacter == '>'
465
                    && previousCharacter == '?') {
466
                processingInstruction = false;
467
            }
468
            
469
            //this is not comment or a processingInstruction
470
            if (currentCharacter != '!' && previousCharacter == '<'
471
                    && !comment && !processingInstruction) {
472
                count++;
473
            }
474
            
475
            // get target line
476
            if (count == TARGETNUM && currentCharacter != '>') {
477
                buffer.append(currentCharacter);
478
            }
479
            if (count == TARGETNUM && currentCharacter == '>') {
480
                break;
481
            }
482
            thirdPreviousCharacter = secondPreviousCharacter;
483
            secondPreviousCharacter = previousCharacter;
484
            previousCharacter = currentCharacter;
485
            tmp = xml.read();
486
        }
487
        secondLine = buffer.toString();
488
        logMetacat.debug("the second line string is: " + secondLine);
489
        
490
        xml.reset();
491
        return secondLine;
492
    }
493
    
494
    /**
495
	 * Get a schema file name from the schema uri.
496
	 * 
497
	 * @param uri
498
	 *            the uri from which to extract the file name
499
	 * @return a string holding the file name
500
	 */
501
    public static String getSchemaFileNameFromUri(String uri) {
502
		// get filename from systemId
503
		String filename = uri;
504
		
505
		if (filename != null && !(filename.trim()).equals("")) {
506
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
507
			if (slash > -1) {
508
				filename = filename.substring(slash + 1);
509
			}
510
		}
511

    
512
		return filename;
513
	}
514
}
(6-6/6)