Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.service;
28

    
29
import java.io.File;
30
import java.io.FileWriter;
31
import java.io.IOException;
32
import java.io.OutputStream;
33
import java.io.StringReader;
34
import java.net.HttpURLConnection;
35
import java.net.MalformedURLException;
36
import java.net.URL;
37
import java.sql.PreparedStatement;
38
import java.sql.ResultSet;
39
import java.sql.SQLException;
40
import java.util.Hashtable;
41
import java.util.Vector;
42
import java.util.regex.Matcher;
43
import java.util.regex.Pattern;
44

    
45
import org.apache.commons.io.IOUtils;
46
import org.apache.log4j.Logger;
47
import org.xml.sax.SAXException;
48

    
49
import edu.ucsb.nceas.metacat.DocumentImpl;
50
import edu.ucsb.nceas.metacat.MetaCatServlet;
51
import edu.ucsb.nceas.metacat.client.MetacatException;
52
import edu.ucsb.nceas.metacat.database.DBConnection;
53
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
54
import edu.ucsb.nceas.metacat.properties.PropertyService;
55
import edu.ucsb.nceas.metacat.shared.BaseService;
56
import edu.ucsb.nceas.metacat.shared.ServiceException;
57
import edu.ucsb.nceas.utilities.FileUtil;
58
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
59
import edu.ucsb.nceas.utilities.StringUtil;
60

    
61
public class XMLSchemaService extends BaseService {
62
	
63
	public static final String NAMESPACEKEYWORD = "xmlns";
64
	
65
	public static final String SCHEMA_DIR = "/schema/";
66
	
67
	private static XMLSchemaService xmlSchemaService = null;
68
	
69
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
70
	
71
	private static boolean useFullSchemaValidation = false;
72
	
73
//	private static String documentNamespace = null;
74
	
75
	// all schema objects that represent schemas registered in the db that 
76
	// actually have files on disk. It doesn't include the schemas without namespace
77
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
78
	
79
	// all non-amespace schema objects that represent schemas registered in the db that 
80
    // actually have files on disk. It doesn't include the schemas with namespaces
81
	private static Vector<XMLNoNamespaceSchema> registeredNoNamespaceSchemaList = new Vector<XMLNoNamespaceSchema>();
82
	
83
	// a convenience list that holds the names of registered namespaces.
84
    private static Vector<String> nameSpaceList = new Vector<String>();
85
    
86
    // a convenience string that holds all name spaces and locations in a space
87
    // delimited format. Those items don't have a format id. This is the old way we handle the schema location
88
    private static String nameSpaceAndLocationStringWithoutFormatId = ""; 
89
	
90
    //this hash table is design for schema variants. Two schemas have the same name space,
91
    //but they have different content (location). So we different format id to
92
    //distinguish them. The key of the hash table is the format id, the values is all the namespace schema location
93
    //delimited string for this format id.
94
    private static Hashtable<String, String> formatId_NamespaceLocationHash = new Hashtable<String, String>();
95
	/**
96
	 * private constructor since this is a singleton
97
	 */
98
	private XMLSchemaService() {
99
        _serviceName = "XMLSchemaService";
100
        try {
101
            doRefresh();
102
        } catch (ServiceException e) {
103
            logMetacat.debug(e.getMessage());
104
        }
105
	}
106
	
107
	/**
108
	 * Get the single instance of XMLService.
109
	 * 
110
	 * @return the single instance of XMLService
111
	 */
112
	public static XMLSchemaService getInstance() {
113
		if (xmlSchemaService == null) {
114
			xmlSchemaService = new XMLSchemaService();
115
		}
116
		return xmlSchemaService;
117
	}
118
	
119
	public boolean refreshable() {
120
		return true;
121
	}
122
	
123
	/**
124
	 * refresh the persistant values in this service.
125
	 */
126
	public void doRefresh() throws ServiceException {
127
	    logMetacat.debug("XMLService.doRefresh - refreshing the schema service.");
128
		try {
129
			populateRegisteredSchemaList();
130
			populateRegisteredNoNamespaceSchemaList();
131
			setUseFullSchemaValidation();
132
			createRegisteredNameSpaceList();
133
			createRegisteredNameSpaceAndLocationString();
134
		} catch (PropertyNotFoundException pnfe) {
135
			logMetacat.error("XMLService.doRefresh - Could not find property: xml.useFullSchemaValidation. " + 
136
					"Setting to false.");
137
		}
138
	}
139
	
140
	public void stop() throws ServiceException {
141
		return;
142
	}
143
	
144
	/**
145
	 * Gets the registered schema list. This list holds schemas that exist in
146
	 * the xml_catalog table that also have associated files in the schema
147
	 * directory.
148
	 * 
149
	 * @return a list of XMLSchema objects holding registered schema information
150
	 */
151
	public Vector<XMLSchema> getRegisteredSchemaList() {
152
		return registeredSchemaList;
153
	}
154
	
155
	/**
156
	 * Gets the name space and location string. This is a convenience method.
157
	 * The string will have space delimited namespaces and locations that are
158
	 * held in the registered schema list. This is the old way Metacat worked.
159
	 * Usually, we will call the method getNameSapceAndLocation(String formatId) first.
160
	 * If the method return null, we will call this method.
161
	 * 
162
	 * @return a string that holds space delimited registered namespaces and
163
	 *         locations.
164
	 */
165
	public String getNameSpaceAndLocationStringWithoutFormatId() {
166
		return nameSpaceAndLocationStringWithoutFormatId;
167
	}
168
	
169
	
170
	/**
171
	 * Get the all schema-location pairs registered for the formatId.
172
	 * The null will be returned, if we can find it.
173
	 * @param formatId
174
	 * @return
175
	 */
176
	public String getNameSpaceAndLocation(String formatId) {
177
	    if(formatId == null) {
178
	        return null;
179
	    } else {
180
	        return formatId_NamespaceLocationHash.get(formatId);
181
	    }
182
	}
183
	
184
	/**
185
	 * Gets a list of name spaces. This is a convenience method. The list will 
186
	 * have all namespaces that are held in the registered schema list.
187
	 * 
188
	 * @return a list that holds registered namespaces.
189
	 */
190
	public Vector<String> getNameSpaceList() {
191
		return nameSpaceList;
192
	}
193
	
194
	/**
195
	 * Report whether xml parsing is set to use full schema parsing. If full
196
	 * schema parsing is true, new schemas will be validated before being
197
	 * entered into the database and file system.
198
	 * 
199
	 * @return true if the xml.useFullSchemaValidation property is set to true,
200
	 *         false otherwise.
201
	 */
202
	public boolean useFullSchemaValidation() {
203
		return useFullSchemaValidation;
204
	}
205
	
206
	/**
207
	 * sets the UseFullSchemaValidation variable.  The only way this should be
208
	 * set is in the constructor or the refresh methods.
209
	 */
210
	private void setUseFullSchemaValidation() throws PropertyNotFoundException {
211
		String strUseFullSchemaValidation = 
212
			PropertyService.getProperty("xml.useFullSchemaValidation");
213
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
214
	}
215

    
216
	/**
217
	 * Populate the list of registered schemas. This reads all schemas in the
218
	 * xml_catalog table and then makes sure the schema actually exists and is
219
	 * readable on disk.
220
	 */
221
	public void populateRegisteredSchemaList() {
222
		DBConnection conn = null;
223
		int serialNumber = -1;
224
		PreparedStatement pstmt = null;
225
		ResultSet resultSet = null;
226
		registeredSchemaList = new Vector<XMLSchema>();
227

    
228
		// get the system id from the xml_catalog table for all schemas.
229
		String sql = "SELECT public_id, system_id, format_id FROM xml_catalog where "
230
				+ "entry_type ='" + XMLSchema.getType() + "'";
231
		try {
232
			// check out DBConnection
233
			conn = DBConnectionPool
234
					.getDBConnection("XMLService.populateRegisteredSchemaList");
235
			serialNumber = conn.getCheckOutSerialNumber();
236
			pstmt = conn.prepareStatement(sql);
237
			logMetacat.debug("XMLService.populateRegisteredSchemaList - Selecting schemas: " + pstmt.toString());
238
			pstmt.execute();
239
			resultSet = pstmt.getResultSet();
240

    
241
			// make sure the schema actually exists on the file system. If so,
242
			// add it to the registered schema list.
243
			while (resultSet.next()) {
244
				String fileNamespace = resultSet.getString(1);
245
				String fileLocation = resultSet.getString(2);
246
				String formatId = resultSet.getString(3);
247
				logMetacat.debug("XMLService.populateRegisteredSchemaList - Registering schema: " + fileNamespace + " " + fileLocation+ " and format id "+formatId);
248
				XMLSchema xmlSchema = new XMLSchema(fileNamespace, fileLocation, formatId);
249
				if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://"))
250
				{
251
				    continue;//skip the external schemas.
252
				    /*//System.out.println("processing an http schemal location");
253
				    logMetacat.debug("XMLService.populateRegisteredSchemaList - Processing http schema location: " + fileLocation);
254
				    xmlSchema.setExternalFileUri(fileLocation);
255
				    //cache the file
256
				    try
257
				    {
258
				        URL u = new URL(fileLocation);
259
				        //System.out.println("downloading " + fileLocation);
260
				        logMetacat.debug("XMLService.populateRegisteredSchemaList - Downloading http based schema...");
261
				        HttpURLConnection connection = (HttpURLConnection) u.openConnection();
262
				        connection.setDoOutput(true);
263
			            connection.setRequestMethod("GET");
264
                                    connection.setReadTimeout(5000);
265
                                    //System.out.println("the ============== the read timeout is ================"+connection.getReadTimeout());
266
			            //System.out.println("the ============== the connection timeout is ================"+connection.getConnectTimeout());
267
			            connection.connect();
268
			            String schema = IOUtils.toString(connection.getInputStream());
269
			            
270
			            String deployDir = PropertyService.getProperty("application.deployDir");
271
			            String contextName = PropertyService.getProperty("application.context");
272
			            String filename = fileLocation.substring(fileLocation.lastIndexOf("/"), 
273
                                fileLocation.length());
274
			            File schemaFile = new File(deployDir + "/" + contextName + "/" +
275
			                    "schema/" + filename);
276
			            //System.out.println("writing schema to " + schemaFile.getAbsolutePath());
277
			            FileWriter fw = new FileWriter(schemaFile);
278
			            fw.write(schema);
279
			            fw.flush();
280
			            fw.close();
281
			            logMetacat.debug("XMLService.populateRegisteredSchemaList - Schema downloaded to " + schemaFile.getAbsolutePath());
282
			            fileLocation = "/schema/" + filename;
283
			            //System.out.println("fileLocation set to " + fileLocation);
284
			            logMetacat.debug("XMLService.populateRegisteredSchemaList - fileLocation set to " + fileLocation);
285
			            xmlSchema.setFileName(fileLocation);
286
				    }
287
				    catch(MalformedURLException me)
288
				    {
289
				        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
290
				                " because a connection could not be made to the given url: " + 
291
				                me.getMessage());
292
				    }
293
                    catch (IOException ioe)
294
                    {
295
                        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
296
                        " because an IOException occured: " + ioe.getMessage());
297
                    }
298
                    catch(PropertyNotFoundException pnfe)
299
                    {
300
                        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
301
                                " because the property 'application.tempDir' could not be found.");
302
                    }
303
				    
304
				    xmlSchema.setFileName(fileLocation);*/
305
				}
306
				else
307
				{
308
				    xmlSchema.setFileName(fileLocation);
309
				}
310
								
311
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) 
312
				{
313
					registeredSchemaList.add(xmlSchema);
314
				}
315
				else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://"))
316
                {  //the schema resides on a different server, to validate, we need to go get it 
317
                    //registeredSchemaList.add(xmlSchema);
318
				    logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered schema list.");
319
                }
320
				else 
321
				{
322
					logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
323
							+ " in the database but does not exist on the file system. So we don't add it to the registered schema list.");
324
				}
325
			}
326
		} catch (SQLException e) {
327
			logMetacat.error("XMLService.populateRegisteredSchemaList - SQL Error: "
328
					+ e.getMessage());
329
		} finally {
330
			try {
331
				pstmt.close();
332
			}// try
333
			catch (SQLException sqlE) {
334
				logMetacat.error("XMLSchemaService.populateRegisteredSchemaList - Error in XMLService.populateRegisteredSchemaList(): "
335
						+ sqlE.getMessage());
336
			}
337
			DBConnectionPool.returnDBConnection(conn, serialNumber);
338
		}
339
	}	
340
	
341
	/*
342
	 * Populate the list of registered no-namespace schemas. This reads all no-namespace schemas in the
343
     * xml_catalog table and then makes sure the schema actually exists and is
344
     * readable on disk.
345
	 */
346
	private void populateRegisteredNoNamespaceSchemaList() {
347
	    DBConnection conn = null;
348
        int serialNumber = -1;
349
        PreparedStatement pstmt = null;
350
        ResultSet resultSet = null;
351
        registeredNoNamespaceSchemaList = new Vector<XMLNoNamespaceSchema>();
352
        // get the system id from the xml_catalog table for all schemas.
353
        String sql = "SELECT no_namespace_schema_location, system_id, format_id FROM xml_catalog where "
354
                + "entry_type ='" + XMLNoNamespaceSchema.getType()+ "'";
355
        try {
356
            // check out DBConnection
357
            conn = DBConnectionPool
358
                    .getDBConnection("XMLService.populateRegisteredNoNamespaceSchemaList");
359
            serialNumber = conn.getCheckOutSerialNumber();
360
            pstmt = conn.prepareStatement(sql);
361
            logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - Selecting schemas: " + pstmt.toString());
362
            pstmt.execute();
363
            resultSet = pstmt.getResultSet();
364

    
365
            // make sure the schema actually exists on the file system. If so,
366
            // add it to the registered schema list.
367
            while (resultSet.next()) {
368
                String noNamespaceSchemaLocationURI = resultSet.getString(1);
369
                String fileLocation = resultSet.getString(2);
370
                String formatId = resultSet.getString(3);
371
                logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - try to register schema: " + noNamespaceSchemaLocationURI + "(no namespace-schema-location-uri) " + fileLocation+ " and format id "+formatId);
372
                XMLNoNamespaceSchema xmlSchema = new XMLNoNamespaceSchema(noNamespaceSchemaLocationURI, fileLocation, formatId);
373
                if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) {
374
                    continue;//skip the external schemas.
375
                }
376
                else {
377
                    xmlSchema.setFileName(fileLocation);
378
                }
379
                                
380
                if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
381
                    registeredNoNamespaceSchemaList.add(xmlSchema);
382
                }
383
                else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) {  //the schema resides on a different server, to validate, we need to go get it 
384
                    //registeredSchemaList.add(xmlSchema);
385
                    logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered no-namespace schema list.");
386
                }
387
                else {
388
                    logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
389
                            + " in the database but does not exist on the file system. So we don't add it to the registered no-namespace schema list.");
390
                }
391
            }
392
        } catch (SQLException e) {
393
            e.printStackTrace();
394
            logMetacat.error("XMLService.populateRegisteredNoNamespaceSchemaList - SQL Error: "
395
                    + e.getMessage());
396
        } finally {
397
            try {
398
                pstmt.close();
399
            }// try
400
            catch (SQLException sqlE) {
401
                logMetacat.error("XMLSchemaService.populateRegisteredNoNamespaceSchemaList - Error in close the pstmt: "
402
                        + sqlE.getMessage());
403
            }
404
            DBConnectionPool.returnDBConnection(conn, serialNumber);
405
        }
406
	}
407
	
408
	/**
409
	 * create a space delimited string of all namespaces and locations
410
	 * in the registered schema list.
411
	 */
412
	private static void createRegisteredNameSpaceAndLocationString() {
413
		boolean firstRowWithoutFormatid = true;
414
		boolean firstRowWithFormatid = true;
415
		nameSpaceAndLocationStringWithoutFormatId = "";
416
		
417
		for (XMLSchema xmlSchema : registeredSchemaList) {
418
		    String formatId = xmlSchema.getFormatId();
419
		    if( formatId == null ||formatId.trim().equals("")) {
420
		        //this is to handle the old way - no schema variants 
421
		        if (!firstRowWithoutFormatid) {
422
	                nameSpaceAndLocationStringWithoutFormatId += " ";
423
	            }
424
	            nameSpaceAndLocationStringWithoutFormatId += xmlSchema.getFileNamespace() + " "
425
	                    + xmlSchema.getLocalFileUri();
426
	            firstRowWithoutFormatid = false;
427
		    } else {
428
		        //it has a format id on the xml_catalog table. It is a variant.
429
		        if(!formatId_NamespaceLocationHash.containsKey(xmlSchema.getFormatId())) {
430
		            //the hash table hasn't stored the value. So put it on the hash.
431
		            formatId_NamespaceLocationHash.put(formatId, xmlSchema.getFileNamespace() + " "
432
	                        + xmlSchema.getLocalFileUri());
433
		        } else {
434
		          //the hash table already has it. We will attache the new pair to the exist value
435
		            String value = formatId_NamespaceLocationHash.get(formatId);
436
		            value += " "+ xmlSchema.getFileNamespace() + " "
437
	                        + xmlSchema.getLocalFileUri();
438
		            formatId_NamespaceLocationHash.put(formatId, value);
439
		        }
440
		    }
441
			
442
		}
443
	}
444

    
445
	/**
446
	 * create a lsit of all namespaces in the registered schema list.
447
	 */
448
	private static void createRegisteredNameSpaceList() {
449
		nameSpaceList = new Vector<String>();
450
		for (XMLSchema xmlSchema : registeredSchemaList) {
451
			nameSpaceList.add(xmlSchema.getFileNamespace());
452
		}
453
	}
454
	
455
	/**
456
	 * Checks to see that all schemas are registered. If a single one in the
457
	 * list is not, this will return false.
458
	 * 
459
	 * @param schemaList
460
	 *            a list of schemas as they appear in xml.
461
	 * @return true if all schemas are registered.
462
	 */
463
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
464
		for (XMLSchema xmlSchema : schemaList) {
465
			if ( ! isSchemaRegistered(xmlSchema)) {
466
				return false;
467
			}
468
		}		
469
		return true;
470
	}
471
	
472
	/**
473
	 * Returns true if the schema is registered.
474
	 * 
475
	 * @param schema
476
	 *            a single schema as it appears in xml
477
	 * @return true if the schema is registered, false otherwise.
478
	 */
479
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
480
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
481
			if (registeredXmlSchema.getLocalFileUri().equals(
482
						xmlSchema.getLocalFileUri())
483
					&& registeredXmlSchema.getFileNamespace().equals(
484
							xmlSchema.getFileNamespace())) {
485
				return true;
486
			}
487
		}
488

    
489
		return false;
490
	}
491
	
492
	/**
493
	 * Test if the given namespace registered in Metacat
494
	 * @param namespace the namespace will be tested
495
	 * @return true if the namespace is registered; otherwise false.
496
	 */
497
	public static boolean isNamespaceRegistered(String namespace) {
498
	    boolean registered = false;
499
	    if(namespace != null && !namespace.trim().equals("")) {
500
	        if(nameSpaceList != null && !nameSpaceList.isEmpty()) {
501
	            for (String registeredNamespace : nameSpaceList) {
502
	                logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Loop the registered namespaces in Metacat: "+
503
	                                                    registeredNamespace+" to compare the given namespace "+namespace);
504
	                if (registeredNamespace != null && registeredNamespace.equals(namespace)) {
505
	                    registered = true;
506
	                    break;
507
	                }
508
	            }
509
	        } else {
510
	            logMetacat.error("XMLSchemaService.isNamespaceRegistered - The registered namespace list is null or empty! So we will reject any document which needs validataion");
511
	        }
512
	        
513
	    } else {
514
	        logMetacat.debug("XMLSchemaService.isNamespaceRegistered - The given namespace is null or blank. So it is not registered.");
515
	    }
516
	    logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Is the namespace "+namespace+" registered in Metacat? "+registered);
517
	    return registered;
518
	}
519
	
520
	/**
521
	 * Get the namespace-schemaLocation pairs string based on given formatId and namespace.
522
	 * The algorithm is:
523
	 * 1. Look up all pairs of namespace--schemalocation for the given formatId in the xml_catalog table. If we find it, return all of the pairs.
524
	 * 2. If we can't find anything on the step 1, look up the record for the given namespace. If we find it, return all of pairs namespace-location without formatid.
525
	 * 3. Return null if we can't find anything. 
526
	 * @param formatId  the given format id
527
	 * @param namespace  the given namespace
528
	 * @return the string of the namespace-schemaLocation pairs (separated by white spaces). The null will be returned, if we can't find one.
529
	 */
530
	public String findNamespaceAndSchemaLocalLocation(String formatId, String namespace) throws MetacatException{
531
	    String location = null;
532
	    location = getNameSpaceAndLocation(formatId);
533
	    logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based the format id "+formatId+" is "+location);
534
	    if(location == null) {
535
	        //can't find it for given formId. Now we look up namespace
536
	        logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based on the format id "+formatId+" is null and we will lookup the given namespace "+namespace);
537
            if(isNamespaceRegistered(namespace)) {
538
                location = getNameSpaceAndLocationStringWithoutFormatId();
539
                logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is registered in Metacat");
540
            } else {
541
                logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is NOT registered in Metacat");
542
            }
543
	    }
544
	    if(location == null) {
545
	        logMetacat.error("XMLSchemaService.findNamespaceAndSchemaLocation - We can't find the schema location for the namespace "+namespace+" and format id "+formatId+". This means they are not registered in Metacat.");
546
	        throw new MetacatException("The namespace "+namespace+" and the format id "+formatId+
547
	                " are not registered in the Metacat. So the object using the namespace was rejected since Metacat can't validate the xml instance. Please contact the Metacat operator to register them.");
548
	    }
549
	    logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - The final location string for the namespace "+namespace+" and format id "+formatId+" is "+location);
550
	    return location;
551
	}
552
	
553
	/**
554
	 * Get the local (official) location for a no-namespace schema based on the given format id or no-name-space schema location uri.
555
	 * The format id has the higher priority
556
	 * 1. Compare the given format id with all registered no-namespace schema. If a match is found, return it.
557
	 * 2. If the step 1 return null, compare the given noNamespaceSchemaLocationuri.
558
	 * @param formatId
559
	 * @param noNamespaceSchemaLocation
560
	 * @return
561
	 */
562
	public String findNoNamespaceSchemaLocalLocation(String formatId, String noNamespaceSchemaLocation) throws MetacatException {
563
	    String location = null;
564
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id for determining the schema local location is "+formatId);
565
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocationURI for determining the schema local location is "+noNamespaceSchemaLocation);
566
	    if(registeredNoNamespaceSchemaList != null && !registeredNoNamespaceSchemaList.isEmpty()) {
567
	        if((formatId != null && !formatId.trim().equals(""))) {
568
                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id "+formatId+ "is not null and let's compare format id first.");
569
    	        for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) {
570
    	            if(schema != null) {
571
    	                String registeredFormatId = schema.getFormatId();
572
    	                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has the format id "+registeredFormatId);
573
    	                    if(registeredFormatId != null && !registeredFormatId.trim().equals("")) {
574
    	                        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered format id "+registeredFormatId+ "is not null as well. Compare it");
575
    	                        if(formatId.equals(registeredFormatId)) {
576
    	                            logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered format id is the same: "+formatId+". Match sucessfully!");
577
    	                            location = schema.getLocalFileUri();
578
    	                            break;
579
    	                        }
580
    	                    }
581
    	             } 
582
    	         }
583
	        }
584
	        if(location == null) {
585
	           logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - we can't find any regisered no-namespace schema has the foramtid "+formatId+ 
586
	                   " (if it is null, this means there is no given format id.) Let's compare the noNamespaceSchemaLocaionURL which the given value is "+noNamespaceSchemaLocation);
587
	           if(noNamespaceSchemaLocation != null && !noNamespaceSchemaLocation.trim().equals("")) {
588
	               logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation+ "is not null and let's compare it.");
589
	                for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) {
590
	                    if(schema != null) {
591
	                        String registeredSchemaLocationURI = schema.getNoNamespaceSchemaLocation();
592
	                        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has noNamespaceSchemaLocation uri "+registeredSchemaLocationURI);
593
	                            if(registeredSchemaLocationURI != null && !registeredSchemaLocationURI.trim().equals("")) {
594
	                                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered registeredSchemaLocation URI "+registeredSchemaLocationURI+ "is not null as well. Compare it");
595
	                                if(noNamespaceSchemaLocation.equals(registeredSchemaLocationURI)) {
596
	                                    logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered noNamespaceSchemaLocation is the same: "+noNamespaceSchemaLocation+". Match sucessfully!");
597
	                                    location = schema.getLocalFileUri();
598
	                                    break;
599
	                                }
600
	                            }
601
	                        } 
602
	                 }
603
	           }
604
	        }
605
	        
606
	    } else {
607
	        logMetacat.warn("XMLSchemaService.findNoNamespaceSchemaLocalLocation - there is no registered no-namespace schema in the Metacat");
608
	    }
609
	    
610
	    if(location == null) {
611
            logMetacat.error("XMLSchemaService.findNoNamespaceSchemaLocalLocation - We can't find Metacat local schema location for the noNamespaceLocation "+noNamespaceSchemaLocation+
612
                    " and format id "+formatId+". This means they are not registered in Metacat.");
613
            throw new MetacatException("The noNamespaceSchemaLocation "+noNamespaceSchemaLocation+" or the format id "+formatId+
614
                    " is not registered in the Metacat. So the object using them was rejected since Metacat can't validate the xml instance. Please contact the Metacat operator to register them.");
615
        }
616
	    logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the schema location is "+location+" (if it is null, this means it is not registered) for the format id "+formatId+
617
	            " or noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation);
618
	    return location;
619
	}
620
	
621
    /**
622
	 * See if schemas have been specified in the xml:schemalocation attribute.
623
	 * If so, return a vector of the system ids.
624
	 * 
625
	 * @param xml
626
	 *            the document we want to look in for schema location
627
	 * @return a vector of XMLSchema objects, or an empty vector if none are
628
	 *         found
629
	 */
630
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
631
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
632
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
633

    
634
		// no xml. return empty vector
635
		if (xml == null) {
636
			logMetacat.debug("XMLSchemaService.findSchemasInXML - Returning empty schemaList.");
637
			return schemaList;
638
		}
639

    
640
		// Get the "second line" from the xml
641
		String targetLine = getSchemaLine(xml);
642

    
643
		// see if there is a match for xsi.schemaLocation. If so, extract the
644
		// schemas.
645
		if (targetLine != null) {
646
			String regex = "(\\p{Graph}*):schemaLocation=\"([^\"]*)\"";
647
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
648
					| Pattern.DOTALL);
649
			Matcher matcher = pattern.matcher(targetLine);
650
			int i = 0;
651
			while (i < targetLine.length()) {
652
				if (!matcher.find(i)) {
653
					break;
654
				}
655

    
656
				String uri = matcher.group(2);
657
				uri = StringUtil.replaceTabsNewLines(uri);
658
				uri = StringUtil.replaceDuplicateSpaces(uri);
659

    
660
				// each namespace could have several schema locations. parsedUri
661
				// will hold a list of uri and files.
662
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
663
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
664
					if (j + 1 >= parsedUri.size()) {
665
						throw new IOException(
666
								"Odd number of elements found when parsing schema location: "
667
										+ targetLine
668
										+ ". There should be an even number of uri/files in location.");
669
					}
670
					String formatId = null;
671
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
672
							.get(j + 1), formatId);
673
					schemaList.add(xmlSchema);
674
				}
675
				i = matcher.end();
676
			}
677
		}
678

    
679
		logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for xml are " + schemaList.toString());
680

    
681
		return schemaList;
682
	}    
683
    
684
    /**
685
	 * Returns the namespace for an xml document. 
686
	 * @param xml
687
	 *            the document to search
688
	 * @return a string holding the namespace. Null will be returned if there is no namespace.
689
     * @throws SAXException 
690
     * @throws PropertyNotFoundException 
691
	 */
692
	public static String findDocumentNamespace(StringReader xml) throws IOException, PropertyNotFoundException, SAXException {
693
		String namespace = null;
694

    
695
		/*String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
696
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
697
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
698
		String eml2_1_1NameSpace = DocumentImpl.EML2_1_1NAMESPACE;*/
699

    
700

    
701
		if (xml == null) {
702
			logMetacat.debug("XMLSchemaService.findDocumentNamespace - XML doc is null.  There is no namespace.");
703
			return namespace;
704
		}
705
		XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml);
706
		namespaceParser.parse();
707
		namespace = namespaceParser.getNamespace();
708
		/*String targetLine = getSchemaLine(xml);
709

    
710
		// the prefix is at the beginning of the doc
711
		String prefix = null;
712
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}*\\s+";
713
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
714
		Matcher matcher = pattern.matcher(targetLine);
715
		if (matcher.find()) {
716
			prefix = matcher.group(1).trim();
717
		}
718

    
719
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
720
		// if no prefix was found, we will look for the default namespace.
721
		String regex2;
722
		if (prefix != null) {
723
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - found the prefix for the document "+prefix);
724
			regex2 = "xmlns:" + prefix + "=['\"]([^\"])*['\"]";
725
		} else {
726
			//regex2 = "xmlns:.*=['\"](.*)['\"]";
727
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - can't found the prefix for the document, so we look for the default namespace");
728
		    regex2 = "xmlns=['\"](.*)['\"]";
729
		}
730
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
731
		Matcher matcher2 = pattern2.matcher(targetLine);
732
		if (matcher2.find()) {
733
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - it has either a prefix or a default namespace");
734
		    System.out.println("the match group 0"+" is "+matcher2.group());
735
			namespace = matcher2.group(1);
736
			
737
			System.out.println("the match group "+" is "+namespace);
738

    
739
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
740
				namespace = eml2_0_0NameSpace;
741
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
742
				namespace = eml2_0_1NameSpace;
743
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
744
				namespace = eml2_1_0NameSpace;
745
			} else if (namespace.indexOf(eml2_1_1NameSpace) != -1) {
746
				namespace = eml2_1_1NameSpace;
747
			}
748
		}*/
749
		logMetacat.debug("XMLSchemaService.findDocumentNamespace - the namespace (null means no namespace) in the document is "+namespace);
750
		return namespace;
751
	}
752
	
753
	/**
754
	 * Get the attribute value of the noNamespaceSchemaLcation of the given xml
755
	 * @param xml the xml obect needs to be searched
756
	 * @return the attribute value of the noNamespaceSchemaLcation. The null will return if it can't be found.
757
	 * @throws SAXException 
758
	 * @throws PropertyNotFoundException 
759
	 * @throws IOException 
760
	 */
761
	public static String findNoNamespaceSchemaLocationAttr(StringReader xml) throws PropertyNotFoundException, SAXException, IOException {
762
	    String noNamespaceSchemaLocation = null;
763
	    XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml);
764
        namespaceParser.parse();
765
        noNamespaceSchemaLocation = namespaceParser.getNoNamespaceSchemaLocation();
766
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocation - the noNamespaceSchemaLocation (null means no namespace) in the document is "+noNamespaceSchemaLocation);
767
	    return noNamespaceSchemaLocation;
768
	}
769
    
770
    /**
771
	 * Return the line from xml that holds the metadata like namespace and
772
	 * schema location
773
	 * 
774
	 * @param xml
775
	 *            the document to parse
776
	 * @return the "second" line of the document
777
	 */
778
    private static String getSchemaLine(StringReader xml) throws IOException {
779
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
780
        // find the line
781
        String secondLine = null;
782
        int count = 0;
783
        final int TARGETNUM = 1;
784
        StringBuffer buffer = new StringBuffer();
785
        boolean comment = false;
786
        boolean processingInstruction = false;
787
        char thirdPreviousCharacter = '?';
788
        char secondPreviousCharacter = '?';
789
        char previousCharacter = '?';
790
        char currentCharacter = '?';
791
        int tmp = xml.read();
792
        while (tmp != -1) {
793
            currentCharacter = (char)tmp;
794
            //in a comment
795
            if (currentCharacter == '-' && previousCharacter == '-'
796
                    && secondPreviousCharacter == '!'
797
                    && thirdPreviousCharacter == '<') {
798
                comment = true;
799
            }
800
            //out of comment
801
            if (comment && currentCharacter == '>' && previousCharacter == '-'
802
                    && secondPreviousCharacter == '-') {
803
                comment = false;
804
            }
805
            
806
            //in a processingInstruction
807
            if (currentCharacter == '?' && previousCharacter == '<') {
808
                processingInstruction = true;
809
            }
810
            
811
            //out of processingInstruction
812
            if (processingInstruction && currentCharacter == '>'
813
                    && previousCharacter == '?') {
814
                processingInstruction = false;
815
            }
816
            
817
            //this is not comment or a processingInstruction
818
            if (currentCharacter != '!' && previousCharacter == '<'
819
                    && !comment && !processingInstruction) {
820
                count++;
821
            }
822
            
823
            // get target line
824
            if (count == TARGETNUM && currentCharacter != '>') {
825
                buffer.append(currentCharacter);
826
            }
827
            if (count == TARGETNUM && currentCharacter == '>') {
828
                break;
829
            }
830
            thirdPreviousCharacter = secondPreviousCharacter;
831
            secondPreviousCharacter = previousCharacter;
832
            previousCharacter = currentCharacter;
833
            tmp = xml.read();
834
        }
835
        secondLine = buffer.toString();
836
        logMetacat.debug("XMLSchemaService.getSchemaLine - the second line string is: " + secondLine);
837
        
838
        xml.reset();
839
        return secondLine;
840
    }
841
    
842
    /**
843
	 * Get a schema file name from the schema uri.
844
	 * 
845
	 * @param uri
846
	 *            the uri from which to extract the file name
847
	 * @return a string holding the file name
848
	 */
849
    public static String getSchemaFileNameFromUri(String uri) {
850
		// get filename from systemId
851
		String filename = uri;
852
		
853
		if (filename != null && !(filename.trim()).equals("")) {
854
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
855
			if (slash > -1) {
856
				filename = filename.substring(slash + 1);
857
			}
858
		}
859

    
860
		return filename;
861
	}
862
    
863
    /**
864
     * Get a base url from the schema url. If url=http://www.example.com/example.xsd,
865
     * http://www.example.com/ will be returned.
866
     * 
867
     * @param uri
868
     *            the uri from which to extract the base url
869
     * @return a string holding the base url. null will be return if it is not url.
870
     */
871
      public static String getBaseUrlFromSchemaURL(String url) 
872
      {
873
        String baseURL = null;        
874
        if (url != null && (url.indexOf("http://") != -1 || url.indexOf("https://") !=-1)) 
875
        {
876
          int slash = url.lastIndexOf('/');
877
          if (slash > -1) 
878
          {
879
            baseURL = url.substring(0,slash+1);
880
          }
881
        } 
882
        return baseURL;
883
      }
884
}
(7-7/7)