Project

General

Profile

« Previous | Next » 

Revision 4430

Added by daigle about 16 years ago

Add service to keep persistent information about xml schemas. This maintains a list of all registered schemas

View differences:

src/edu/ucsb/nceas/metacat/service/XMLSchemaService.java
1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

  
27
package edu.ucsb.nceas.metacat.service;
28

  
29
import java.io.IOException;
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Vector;
35
import java.util.regex.Matcher;
36
import java.util.regex.Pattern;
37

  
38
import org.apache.log4j.Logger;
39

  
40
import edu.ucsb.nceas.metacat.DBConnection;
41
import edu.ucsb.nceas.metacat.DBConnectionPool;
42
import edu.ucsb.nceas.metacat.DocumentImpl;
43
import edu.ucsb.nceas.metacat.MetaCatServlet;
44
import edu.ucsb.nceas.utilities.FileUtil;
45
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
46
import edu.ucsb.nceas.utilities.StringUtil;
47

  
48
public class XMLSchemaService implements ServiceInterface {
49
	
50
	public static final String NAMESPACEKEYWORD = "xmlns";
51
	
52
	public static final String SCHEMA_DIR = "/schema/";
53
	
54
	private static XMLSchemaService xmlSchemaService = null;
55
	
56
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
57
	
58
	private static boolean useFullSchemaValidation = false;
59
	
60
	private static String documentNamespace = null;
61
	
62
	// all schema objects that represent schemas registered in the db that 
63
	// actually have files on disk.
64
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
65
	
66
	// a convenience list that holds the names of registered namespaces.
67
    private static Vector<String> nameSpaceList = new Vector<String>();
68
    
69
    // a convenience string that holds all name spaces and locations in a space
70
    // delimited format
71
    private static String nameSpaceAndLocationString = ""; 
72
	
73
	/**
74
	 * private constructor since this is a singleton
75
	 */
76
	private XMLSchemaService() {
77
		try {
78
			setUseFullSchemaValidation();
79
			populateRegisteredSchemaList();
80
			createRegisteredNameSpaceList();
81
			createRegisteredNameSpaceAndLocationString();
82
			
83
		} catch (PropertyNotFoundException pnfe) {
84
			logMetacat.error("Could not find property: xml.useFullSchemaValidation. " + 
85
					"Setting to false.");
86
		}
87
	}
88
	
89
	/**
90
	 * Get the single instance of XMLService.
91
	 * 
92
	 * @return the single instance of XMLService
93
	 */
94
	public static XMLSchemaService getInstance() {
95
		if (xmlSchemaService == null) {
96
			xmlSchemaService = new XMLSchemaService();
97
		}
98
		return xmlSchemaService;
99
	}
100
	
101
	/**
102
	 * refresh the persistant values in this service.
103
	 */
104
	public static void refresh() {
105
		try {
106
			populateRegisteredSchemaList();
107
			setUseFullSchemaValidation();
108
			createRegisteredNameSpaceList();
109
			createRegisteredNameSpaceAndLocationString();
110
		} catch (PropertyNotFoundException pnfe) {
111
			logMetacat.error("Could not find property: xml.useFullSchemaValidation. " + 
112
					"Setting to false.");
113
		}
114
	}
115
	
116
	/**
117
	 * Gets the registered schema list. This list holds schemas that exist in
118
	 * the xml_catalog table that also have associated files in the schema
119
	 * directory.
120
	 * 
121
	 * @return a list of XMLSchema objects holding registered schema information
122
	 */
123
	public static Vector<XMLSchema> getRegisteredSchemaList() {
124
		return registeredSchemaList;
125
	}
126
	
127
	/**
128
	 * Gets the name space and location string. This is a convenience method.
129
	 * The string will have space delimited namespaces and locations that are
130
	 * held in the registered schema list.
131
	 * 
132
	 * @return a string that holds space delimited registered namespaces and
133
	 *         locations.
134
	 */
135
	public static String getNameSpaceAndLocationString() {
136
		return nameSpaceAndLocationString;
137
	}
138
	
139
	/**
140
	 * Gets a list of name spaces. This is a convenience method. The list will 
141
	 * have all namespaces that are held in the registered schema list.
142
	 * 
143
	 * @return a list that holds registered namespaces.
144
	 */
145
	public static Vector<String> getNameSpaceList() {
146
		return nameSpaceList;
147
	}
148
	
149
	/**
150
	 * Report whether xml parsing is set to use full schema parsing. If full
151
	 * schema parsing is true, new schemas will be validated before being
152
	 * entered into the database and file system.
153
	 * 
154
	 * @return true if the xml.useFullSchemaValidation property is set to true,
155
	 *         false otherwise.
156
	 */
157
	public static boolean useFullSchemaValidation() {
158
		return useFullSchemaValidation;
159
	}
160
	
161
	/**
162
	 * sets the UseFullSchemaValidation variable.  The only way this should be
163
	 * set is in the constructor or the refresh methods.
164
	 */
165
	private static void setUseFullSchemaValidation() throws PropertyNotFoundException {
166
		String strUseFullSchemaValidation = 
167
			PropertyService.getProperty("xml.useFullSchemaValidation");
168
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
169
	}
170

  
171
	/**
172
	 * Populate the list of registered schemas. This reads all schemas in the
173
	 * xml_catalog table and then makes sure the schema actually exists and is
174
	 * readable on disk.
175
	 */
176
	public static void populateRegisteredSchemaList() {
177
		DBConnection conn = null;
178
		int serialNumber = -1;
179
		PreparedStatement pstmt = null;
180
		ResultSet resultSet = null;
181
		registeredSchemaList = new Vector<XMLSchema>();
182

  
183
		// get the system id from the xml_catalog table for all schemas.
184
		String sql = "SELECT public_id, system_id FROM xml_catalog where "
185
				+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
186
		try {
187
			// check out DBConnection
188
			conn = DBConnectionPool
189
					.getDBConnection("XMLService.populateRegisteredSchemaList");
190
			serialNumber = conn.getCheckOutSerialNumber();
191
			pstmt = conn.prepareStatement(sql);
192
			pstmt.execute();
193
			resultSet = pstmt.getResultSet();
194

  
195
			// make sure the schema actually exists on the file system. If so,
196
			// add it
197
			// to the registered schema list.
198
			while (resultSet.next()) {
199
				String fileNamespace = resultSet.getString(1);
200
				String fileLocation = resultSet.getString(2);
201
				XMLSchema xmlSchema = new XMLSchema(fileNamespace);
202
				xmlSchema.setFileName(fileLocation);
203

  
204
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
205
					registeredSchemaList.add(xmlSchema);
206
				} else {
207
					logMetacat.warn("Schema file: " + fileLocation + " is registered "
208
							+ " in the database but does not exist on the file system.");
209
				}
210
			}
211
		} catch (SQLException e) {
212
			logMetacat.error("XMLService.populateRegisteredSchemaList(): "
213
					+ e.getMessage());
214
		} finally {
215
			try {
216
				pstmt.close();
217
			}// try
218
			catch (SQLException sqlE) {
219
				logMetacat.error("Error in XMLService.populateRegisteredSchemaList(): "
220
						+ sqlE.getMessage());
221
			}
222
			DBConnectionPool.returnDBConnection(conn, serialNumber);
223
		}
224
	}	
225
	
226
	/**
227
	 * create a space delimited string of all namespaces and locations
228
	 * in the registered schema list.
229
	 */
230
	private static void createRegisteredNameSpaceAndLocationString() {
231
		boolean firstRow = true;
232
		for (XMLSchema xmlSchema : registeredSchemaList) {
233
			if (!firstRow) {
234
				nameSpaceAndLocationString += " ";
235
			}
236
			nameSpaceAndLocationString += xmlSchema.getFileNamespace() + " "
237
					+ xmlSchema.getLocalFileUri();
238
			firstRow = false;
239
		}
240
	}
241

  
242
	/**
243
	 * create a lsit of all namespaces in the registered schema list.
244
	 */
245
	private static void createRegisteredNameSpaceList() {
246
		for (XMLSchema xmlSchema : registeredSchemaList) {
247
			nameSpaceList.add(xmlSchema.getFileNamespace());
248
		}
249
	}
250
	
251
	/**
252
	 * Checks to see that all schemas are registered. If a single one in the
253
	 * list is not, this will return false.
254
	 * 
255
	 * @param schemaList
256
	 *            a list of schemas as they appear in xml.
257
	 * @return true if all schemas are registered.
258
	 */
259
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
260
		for (XMLSchema xmlSchema : schemaList) {
261
			if ( ! isSchemaRegistered(xmlSchema)) {
262
				return false;
263
			}
264
		}		
265
		return true;
266
	}
267
	
268
	/**
269
	 * Returns true if the schema is registered.
270
	 * 
271
	 * @param schema
272
	 *            a single schema as it appears in xml
273
	 * @return true if the schema is registered, false otherwise.
274
	 */
275
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
276
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
277
			if (registeredXmlSchema.getLocalFileUri().equals(
278
						xmlSchema.getLocalFileUri())
279
					&& registeredXmlSchema.getFileNamespace().equals(
280
							xmlSchema.getFileNamespace())) {
281
				return true;
282
			}
283
		}
284

  
285
		return false;
286
	}
287
	
288
    /**
289
	 * See if schemas have been specified in the xml:schemalocation attribute.
290
	 * If so, return a vector of the system ids.
291
	 * 
292
	 * @param xml
293
	 *            the document we want to look in for schema location
294
	 * @return a vector of XMLSchema objects, or an empty vector if none are
295
	 *         found
296
	 */
297
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
298
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
299
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
300

  
301
		// no xml. return empty vector
302
		if (xml == null) {
303
			logMetacat.debug("Returning empty schemaList.");
304
			return schemaList;
305
		}
306

  
307
		// Get the "second line" from the xml
308
		String targetLine = getSchemaLine(xml);
309

  
310
		// see if there is a match for xsi.schemaLocation. If so, extract the
311
		// schemas.
312
		if (targetLine != null) {
313
			String regex = "(\\p{Graph}*):schemaLocation=\"(.*)\"";
314
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
315
					| Pattern.DOTALL);
316
			Matcher matcher = pattern.matcher(targetLine);
317
			int i = 0;
318
			while (i < targetLine.length()) {
319
				if (!matcher.find(i)) {
320
					break;
321
				}
322

  
323
				String uri = matcher.group(2);
324
				uri = StringUtil.replaceTabsNewLines(uri);
325
				uri = StringUtil.replaceDuplicateSpaces(uri);
326

  
327
				// each namespace could have several schema locations. parsedUri
328
				// will hold a list of uri and files.
329
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
330
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
331
					if (j + 1 >= parsedUri.size()) {
332
						throw new IOException(
333
								"Odd number of elements found when parsing schema location: "
334
										+ targetLine
335
										+ ". There should be an even number of uri/files in location.");
336
					}
337
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
338
							.get(j + 1));
339
					schemaList.add(xmlSchema);
340
				}
341
				i = matcher.end();
342
			}
343
		}
344

  
345
		logMetacat.debug("Schemas for eml are " + schemaList.toString());
346

  
347
		return schemaList;
348
	}    
349
    
350
    /**
351
	 * Returns all the namespace for an xml document.  This is done by getting
352
	 * the internal namespace declaration (prefix) and looking for xmlns:<prefix>
353
	 * 
354
	 * @param xml
355
	 *            the document to search
356
	 * @return a string holding the namespace
357
	 */
358
	public static String findDocumentNamespace(StringReader xml) throws IOException {
359
		String namespace = null;
360

  
361
		String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
362
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
363
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
364

  
365
		if (xml == null) {
366
			logMetacat.debug("XML doc is null.  There is no namespace.");
367
			return namespace;
368
		}
369

  
370
		String targetLine = getSchemaLine(xml);
371

  
372
		// the prefix is at the beginning of the doc
373
		String prefix = null;
374
		String regex1 = "^\\s*(.+):.* ";
375
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
376
		Matcher matcher = pattern.matcher(targetLine);
377
		if (matcher.find()) {
378
			prefix = matcher.group(1).trim();
379
		}
380

  
381
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
382
		// if no prefix was found, we grab the first namespace.
383
		String regex2;
384
		if (prefix != null) {
385
			regex2 = "xmlns:" + prefix + "=\"(.*)\"";
386
		} else {
387
			regex2 = "xmlns:.*=\"(.*)\"";
388
		}
389
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
390
		Matcher matcher2 = pattern2.matcher(targetLine);
391
		if (matcher2.find()) {
392
			namespace = matcher2.group(1);
393

  
394
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
395
				namespace = eml2_0_0NameSpace;
396
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
397
				namespace = eml2_0_1NameSpace;
398
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
399
				namespace = eml2_1_0NameSpace;
400
			}
401
		}
402

  
403
		return namespace;
404
	}
405
    
406
    /**
407
	 * Return the line from xml that holds the metadata like namespace and
408
	 * schema location
409
	 * 
410
	 * @param xml
411
	 *            the document to parse
412
	 * @return the "second" line of the document
413
	 */
414
    private static String getSchemaLine(StringReader xml) throws IOException {
415
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
416
        // find the line
417
        String secondLine = null;
418
        int count = 0;
419
        final int TARGETNUM = 1;
420
        StringBuffer buffer = new StringBuffer();
421
        boolean comment = false;
422
        boolean processingInstruction = false;
423
        char thirdPreviousCharacter = '?';
424
        char secondPreviousCharacter = '?';
425
        char previousCharacter = '?';
426
        char currentCharacter = '?';
427
        int tmp = xml.read();
428
        while (tmp != -1) {
429
            currentCharacter = (char)tmp;
430
            //in a comment
431
            if (currentCharacter == '-' && previousCharacter == '-'
432
                    && secondPreviousCharacter == '!'
433
                    && thirdPreviousCharacter == '<') {
434
                comment = true;
435
            }
436
            //out of comment
437
            if (comment && currentCharacter == '>' && previousCharacter == '-'
438
                    && secondPreviousCharacter == '-') {
439
                comment = false;
440
            }
441
            
442
            //in a processingInstruction
443
            if (currentCharacter == '?' && previousCharacter == '<') {
444
                processingInstruction = true;
445
            }
446
            
447
            //out of processingInstruction
448
            if (processingInstruction && currentCharacter == '>'
449
                    && previousCharacter == '?') {
450
                processingInstruction = false;
451
            }
452
            
453
            //this is not comment or a processingInstruction
454
            if (currentCharacter != '!' && previousCharacter == '<'
455
                    && !comment && !processingInstruction) {
456
                count++;
457
            }
458
            
459
            // get target line
460
            if (count == TARGETNUM && currentCharacter != '>') {
461
                buffer.append(currentCharacter);
462
            }
463
            if (count == TARGETNUM && currentCharacter == '>') {
464
                break;
465
            }
466
            thirdPreviousCharacter = secondPreviousCharacter;
467
            secondPreviousCharacter = previousCharacter;
468
            previousCharacter = currentCharacter;
469
            tmp = xml.read();
470
        }
471
        secondLine = buffer.toString();
472
        logMetacat.debug("the second line string is: " + secondLine);
473
        
474
        xml.reset();
475
        return secondLine;
476
    }
477
    
478
    /**
479
	 * Get a schema file name from the schema uri.
480
	 * 
481
	 * @param uri
482
	 *            the uri from which to extract the file name
483
	 * @return a string holding the file name
484
	 */
485
    public static String getSchemaFileNameFromUri(String uri) {
486
		// get filename from systemId
487
		String filename = uri;
488
		
489
		if (filename != null && !(filename.trim()).equals("")) {
490
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
491
			if (slash > -1) {
492
				filename = filename.substring(slash + 1);
493
			}
494
		}
495

  
496
		return filename;
497
	}
498
}
0 499

  

Also available in: Unified diff