Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements session utility methods 
4
 *  Copyright: 2008 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $'
10
 * '$Revision: 4297 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.service;
28

    
29
import java.io.File;
30
import java.io.FileWriter;
31
import java.io.IOException;
32
import java.io.OutputStream;
33
import java.io.StringReader;
34
import java.net.HttpURLConnection;
35
import java.net.MalformedURLException;
36
import java.net.URL;
37
import java.sql.PreparedStatement;
38
import java.sql.ResultSet;
39
import java.sql.SQLException;
40
import java.util.Hashtable;
41
import java.util.Vector;
42
import java.util.regex.Matcher;
43
import java.util.regex.Pattern;
44

    
45
import org.apache.commons.io.IOUtils;
46
import org.apache.log4j.Logger;
47
import org.xml.sax.SAXException;
48

    
49
import edu.ucsb.nceas.metacat.DocumentImpl;
50
import edu.ucsb.nceas.metacat.MetaCatServlet;
51
import edu.ucsb.nceas.metacat.database.DBConnection;
52
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
53
import edu.ucsb.nceas.metacat.properties.PropertyService;
54
import edu.ucsb.nceas.metacat.shared.BaseService;
55
import edu.ucsb.nceas.metacat.shared.ServiceException;
56
import edu.ucsb.nceas.utilities.FileUtil;
57
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
58
import edu.ucsb.nceas.utilities.StringUtil;
59

    
60
public class XMLSchemaService extends BaseService {
61
	
62
	public static final String NAMESPACEKEYWORD = "xmlns";
63
	
64
	public static final String SCHEMA_DIR = "/schema/";
65
	
66
	private static XMLSchemaService xmlSchemaService = null;
67
	
68
	private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class);
69
	
70
	private static boolean useFullSchemaValidation = false;
71
	
72
//	private static String documentNamespace = null;
73
	
74
	// all schema objects that represent schemas registered in the db that 
75
	// actually have files on disk. It doesn't include the schemas without namespace
76
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
77
	
78
	// all non-amespace schema objects that represent schemas registered in the db that 
79
    // actually have files on disk. It doesn't include the schemas with namespaces
80
	private static Vector<XMLNoNamespaceSchema> registeredNoNamespaceSchemaList = new Vector<XMLNoNamespaceSchema>();
81
	
82
	// a convenience list that holds the names of registered namespaces.
83
    private static Vector<String> nameSpaceList = new Vector<String>();
84
    
85
    // a convenience string that holds all name spaces and locations in a space
86
    // delimited format. Those items don't have a format id. This is the old way we handle the schema location
87
    private static String nameSpaceAndLocationStringWithoutFormatId = ""; 
88
	
89
    //this hash table is design for schema variants. Two schemas have the same name space,
90
    //but they have different content (location). So we different format id to
91
    //distinguish them. The key of the hash table is the format id, the values is all the namespace schema location
92
    //delimited string for this format id.
93
    private static Hashtable<String, String> formatId_NamespaceLocationHash = new Hashtable<String, String>();
94
	/**
95
	 * private constructor since this is a singleton
96
	 */
97
	private XMLSchemaService() {
98
        _serviceName = "XMLSchemaService";
99
        try {
100
            doRefresh();
101
        } catch (ServiceException e) {
102
            logMetacat.debug(e.getMessage());
103
        }
104
	}
105
	
106
	/**
107
	 * Get the single instance of XMLService.
108
	 * 
109
	 * @return the single instance of XMLService
110
	 */
111
	public static XMLSchemaService getInstance() {
112
		if (xmlSchemaService == null) {
113
			xmlSchemaService = new XMLSchemaService();
114
		}
115
		return xmlSchemaService;
116
	}
117
	
118
	public boolean refreshable() {
119
		return true;
120
	}
121
	
122
	/**
123
	 * refresh the persistant values in this service.
124
	 */
125
	public void doRefresh() throws ServiceException {
126
	    logMetacat.debug("XMLService.doRefresh - refreshing the schema service.");
127
		try {
128
			populateRegisteredSchemaList();
129
			populateRegisteredNoNamespaceSchemaList();
130
			setUseFullSchemaValidation();
131
			createRegisteredNameSpaceList();
132
			createRegisteredNameSpaceAndLocationString();
133
		} catch (PropertyNotFoundException pnfe) {
134
			logMetacat.error("XMLService.doRefresh - Could not find property: xml.useFullSchemaValidation. " + 
135
					"Setting to false.");
136
		}
137
	}
138
	
139
	public void stop() throws ServiceException {
140
		return;
141
	}
142
	
143
	/**
144
	 * Gets the registered schema list. This list holds schemas that exist in
145
	 * the xml_catalog table that also have associated files in the schema
146
	 * directory.
147
	 * 
148
	 * @return a list of XMLSchema objects holding registered schema information
149
	 */
150
	public Vector<XMLSchema> getRegisteredSchemaList() {
151
		return registeredSchemaList;
152
	}
153
	
154
	/**
155
	 * Gets the name space and location string. This is a convenience method.
156
	 * The string will have space delimited namespaces and locations that are
157
	 * held in the registered schema list. This is the old way Metacat worked.
158
	 * Usually, we will call the method getNameSapceAndLocation(String formatId) first.
159
	 * If the method return null, we will call this method.
160
	 * 
161
	 * @return a string that holds space delimited registered namespaces and
162
	 *         locations.
163
	 */
164
	public String getNameSpaceAndLocationStringWithoutFormatId() {
165
		return nameSpaceAndLocationStringWithoutFormatId;
166
	}
167
	
168
	
169
	/**
170
	 * Get the all schema-location pairs registered for the formatId.
171
	 * The null will be returned, if we can find it.
172
	 * @param formatId
173
	 * @return
174
	 */
175
	public String getNameSpaceAndLocation(String formatId) {
176
	    if(formatId == null) {
177
	        return null;
178
	    } else {
179
	        return formatId_NamespaceLocationHash.get(formatId);
180
	    }
181
	}
182
	
183
	/**
184
	 * Gets a list of name spaces. This is a convenience method. The list will 
185
	 * have all namespaces that are held in the registered schema list.
186
	 * 
187
	 * @return a list that holds registered namespaces.
188
	 */
189
	public Vector<String> getNameSpaceList() {
190
		return nameSpaceList;
191
	}
192
	
193
	/**
194
	 * Report whether xml parsing is set to use full schema parsing. If full
195
	 * schema parsing is true, new schemas will be validated before being
196
	 * entered into the database and file system.
197
	 * 
198
	 * @return true if the xml.useFullSchemaValidation property is set to true,
199
	 *         false otherwise.
200
	 */
201
	public boolean useFullSchemaValidation() {
202
		return useFullSchemaValidation;
203
	}
204
	
205
	/**
206
	 * sets the UseFullSchemaValidation variable.  The only way this should be
207
	 * set is in the constructor or the refresh methods.
208
	 */
209
	private void setUseFullSchemaValidation() throws PropertyNotFoundException {
210
		String strUseFullSchemaValidation = 
211
			PropertyService.getProperty("xml.useFullSchemaValidation");
212
		useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation);
213
	}
214

    
215
	/**
216
	 * Populate the list of registered schemas. This reads all schemas in the
217
	 * xml_catalog table and then makes sure the schema actually exists and is
218
	 * readable on disk.
219
	 */
220
	public void populateRegisteredSchemaList() {
221
		DBConnection conn = null;
222
		int serialNumber = -1;
223
		PreparedStatement pstmt = null;
224
		ResultSet resultSet = null;
225
		registeredSchemaList = new Vector<XMLSchema>();
226

    
227
		// get the system id from the xml_catalog table for all schemas.
228
		String sql = "SELECT public_id, system_id, format_id FROM xml_catalog where "
229
				+ "entry_type ='" + XMLSchema.getType() + "'";
230
		try {
231
			// check out DBConnection
232
			conn = DBConnectionPool
233
					.getDBConnection("XMLService.populateRegisteredSchemaList");
234
			serialNumber = conn.getCheckOutSerialNumber();
235
			pstmt = conn.prepareStatement(sql);
236
			logMetacat.debug("XMLService.populateRegisteredSchemaList - Selecting schemas: " + pstmt.toString());
237
			pstmt.execute();
238
			resultSet = pstmt.getResultSet();
239

    
240
			// make sure the schema actually exists on the file system. If so,
241
			// add it to the registered schema list.
242
			while (resultSet.next()) {
243
				String fileNamespace = resultSet.getString(1);
244
				String fileLocation = resultSet.getString(2);
245
				String formatId = resultSet.getString(3);
246
				logMetacat.debug("XMLService.populateRegisteredSchemaList - Registering schema: " + fileNamespace + " " + fileLocation+ " and format id "+formatId);
247
				XMLSchema xmlSchema = new XMLSchema(fileNamespace, fileLocation, formatId);
248
				if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://"))
249
				{
250
				    continue;//skip the external schemas.
251
				    /*//System.out.println("processing an http schemal location");
252
				    logMetacat.debug("XMLService.populateRegisteredSchemaList - Processing http schema location: " + fileLocation);
253
				    xmlSchema.setExternalFileUri(fileLocation);
254
				    //cache the file
255
				    try
256
				    {
257
				        URL u = new URL(fileLocation);
258
				        //System.out.println("downloading " + fileLocation);
259
				        logMetacat.debug("XMLService.populateRegisteredSchemaList - Downloading http based schema...");
260
				        HttpURLConnection connection = (HttpURLConnection) u.openConnection();
261
				        connection.setDoOutput(true);
262
			            connection.setRequestMethod("GET");
263
                                    connection.setReadTimeout(5000);
264
                                    //System.out.println("the ============== the read timeout is ================"+connection.getReadTimeout());
265
			            //System.out.println("the ============== the connection timeout is ================"+connection.getConnectTimeout());
266
			            connection.connect();
267
			            String schema = IOUtils.toString(connection.getInputStream());
268
			            
269
			            String deployDir = PropertyService.getProperty("application.deployDir");
270
			            String contextName = PropertyService.getProperty("application.context");
271
			            String filename = fileLocation.substring(fileLocation.lastIndexOf("/"), 
272
                                fileLocation.length());
273
			            File schemaFile = new File(deployDir + "/" + contextName + "/" +
274
			                    "schema/" + filename);
275
			            //System.out.println("writing schema to " + schemaFile.getAbsolutePath());
276
			            FileWriter fw = new FileWriter(schemaFile);
277
			            fw.write(schema);
278
			            fw.flush();
279
			            fw.close();
280
			            logMetacat.debug("XMLService.populateRegisteredSchemaList - Schema downloaded to " + schemaFile.getAbsolutePath());
281
			            fileLocation = "/schema/" + filename;
282
			            //System.out.println("fileLocation set to " + fileLocation);
283
			            logMetacat.debug("XMLService.populateRegisteredSchemaList - fileLocation set to " + fileLocation);
284
			            xmlSchema.setFileName(fileLocation);
285
				    }
286
				    catch(MalformedURLException me)
287
				    {
288
				        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
289
				                " because a connection could not be made to the given url: " + 
290
				                me.getMessage());
291
				    }
292
                    catch (IOException ioe)
293
                    {
294
                        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
295
                        " because an IOException occured: " + ioe.getMessage());
296
                    }
297
                    catch(PropertyNotFoundException pnfe)
298
                    {
299
                        logMetacat.warn("Could not cache a registered schema at " + fileLocation +
300
                                " because the property 'application.tempDir' could not be found.");
301
                    }
302
				    
303
				    xmlSchema.setFileName(fileLocation);*/
304
				}
305
				else
306
				{
307
				    xmlSchema.setFileName(fileLocation);
308
				}
309
								
310
				if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) 
311
				{
312
					registeredSchemaList.add(xmlSchema);
313
				}
314
				else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://"))
315
                {  //the schema resides on a different server, to validate, we need to go get it 
316
                    //registeredSchemaList.add(xmlSchema);
317
				    logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered schema list.");
318
                }
319
				else 
320
				{
321
					logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
322
							+ " in the database but does not exist on the file system. So we don't add it to the registered schema list.");
323
				}
324
			}
325
		} catch (SQLException e) {
326
			logMetacat.error("XMLService.populateRegisteredSchemaList - SQL Error: "
327
					+ e.getMessage());
328
		} finally {
329
			try {
330
				pstmt.close();
331
			}// try
332
			catch (SQLException sqlE) {
333
				logMetacat.error("XMLSchemaService.populateRegisteredSchemaList - Error in XMLService.populateRegisteredSchemaList(): "
334
						+ sqlE.getMessage());
335
			}
336
			DBConnectionPool.returnDBConnection(conn, serialNumber);
337
		}
338
	}	
339
	
340
	/*
341
	 * Populate the list of registered no-namespace schemas. This reads all no-namespace schemas in the
342
     * xml_catalog table and then makes sure the schema actually exists and is
343
     * readable on disk.
344
	 */
345
	private void populateRegisteredNoNamespaceSchemaList() {
346
	    DBConnection conn = null;
347
        int serialNumber = -1;
348
        PreparedStatement pstmt = null;
349
        ResultSet resultSet = null;
350
        registeredNoNamespaceSchemaList = new Vector<XMLNoNamespaceSchema>();
351
        // get the system id from the xml_catalog table for all schemas.
352
        String sql = "SELECT no_namespace_schema_location, system_id, format_id FROM xml_catalog where "
353
                + "entry_type ='" + XMLNoNamespaceSchema.getType()+ "'";
354
        try {
355
            // check out DBConnection
356
            conn = DBConnectionPool
357
                    .getDBConnection("XMLService.populateRegisteredNoNamespaceSchemaList");
358
            serialNumber = conn.getCheckOutSerialNumber();
359
            pstmt = conn.prepareStatement(sql);
360
            logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - Selecting schemas: " + pstmt.toString());
361
            pstmt.execute();
362
            resultSet = pstmt.getResultSet();
363

    
364
            // make sure the schema actually exists on the file system. If so,
365
            // add it to the registered schema list.
366
            while (resultSet.next()) {
367
                String noNamespaceSchemaLocationURI = resultSet.getString(1);
368
                String fileLocation = resultSet.getString(2);
369
                String formatId = resultSet.getString(3);
370
                logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - try to register schema: " + noNamespaceSchemaLocationURI + "(no namespace-schema-location-uri) " + fileLocation+ " and format id "+formatId);
371
                XMLNoNamespaceSchema xmlSchema = new XMLNoNamespaceSchema(noNamespaceSchemaLocationURI, fileLocation, formatId);
372
                if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) {
373
                    continue;//skip the external schemas.
374
                }
375
                else {
376
                    xmlSchema.setFileName(fileLocation);
377
                }
378
                                
379
                if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
380
                    registeredNoNamespaceSchemaList.add(xmlSchema);
381
                }
382
                else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) {  //the schema resides on a different server, to validate, we need to go get it 
383
                    //registeredSchemaList.add(xmlSchema);
384
                    logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered no-namespace schema list.");
385
                }
386
                else {
387
                    logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
388
                            + " in the database but does not exist on the file system. So we don't add it to the registered no-namespace schema list.");
389
                }
390
            }
391
        } catch (SQLException e) {
392
            e.printStackTrace();
393
            logMetacat.error("XMLService.populateRegisteredNoNamespaceSchemaList - SQL Error: "
394
                    + e.getMessage());
395
        } finally {
396
            try {
397
                pstmt.close();
398
            }// try
399
            catch (SQLException sqlE) {
400
                logMetacat.error("XMLSchemaService.populateRegisteredNoNamespaceSchemaList - Error in close the pstmt: "
401
                        + sqlE.getMessage());
402
            }
403
            DBConnectionPool.returnDBConnection(conn, serialNumber);
404
        }
405
	}
406
	
407
	/**
408
	 * create a space delimited string of all namespaces and locations
409
	 * in the registered schema list.
410
	 */
411
	private static void createRegisteredNameSpaceAndLocationString() {
412
		boolean firstRowWithoutFormatid = true;
413
		boolean firstRowWithFormatid = true;
414
		nameSpaceAndLocationStringWithoutFormatId = "";
415
		
416
		for (XMLSchema xmlSchema : registeredSchemaList) {
417
		    String formatId = xmlSchema.getFormatId();
418
		    if( formatId == null ||formatId.trim().equals("")) {
419
		        //this is to handle the old way - no schema variants 
420
		        if (!firstRowWithoutFormatid) {
421
	                nameSpaceAndLocationStringWithoutFormatId += " ";
422
	            }
423
	            nameSpaceAndLocationStringWithoutFormatId += xmlSchema.getFileNamespace() + " "
424
	                    + xmlSchema.getLocalFileUri();
425
	            firstRowWithoutFormatid = false;
426
		    } else {
427
		        //it has a format id on the xml_catalog table. It is a variant.
428
		        if(!formatId_NamespaceLocationHash.containsKey(xmlSchema.getFormatId())) {
429
		            //the hash table hasn't stored the value. So put it on the hash.
430
		            formatId_NamespaceLocationHash.put(formatId, xmlSchema.getFileNamespace() + " "
431
	                        + xmlSchema.getLocalFileUri());
432
		        } else {
433
		          //the hash table already has it. We will attache the new pair to the exist value
434
		            String value = formatId_NamespaceLocationHash.get(formatId);
435
		            value += " "+ xmlSchema.getFileNamespace() + " "
436
	                        + xmlSchema.getLocalFileUri();
437
		            formatId_NamespaceLocationHash.put(formatId, value);
438
		        }
439
		    }
440
			
441
		}
442
	}
443

    
444
	/**
445
	 * create a lsit of all namespaces in the registered schema list.
446
	 */
447
	private static void createRegisteredNameSpaceList() {
448
		nameSpaceList = new Vector<String>();
449
		for (XMLSchema xmlSchema : registeredSchemaList) {
450
			nameSpaceList.add(xmlSchema.getFileNamespace());
451
		}
452
	}
453
	
454
	/**
455
	 * Checks to see that all schemas are registered. If a single one in the
456
	 * list is not, this will return false.
457
	 * 
458
	 * @param schemaList
459
	 *            a list of schemas as they appear in xml.
460
	 * @return true if all schemas are registered.
461
	 */
462
	public static boolean areAllSchemasRegistered(Vector<XMLSchema> schemaList) {			
463
		for (XMLSchema xmlSchema : schemaList) {
464
			if ( ! isSchemaRegistered(xmlSchema)) {
465
				return false;
466
			}
467
		}		
468
		return true;
469
	}
470
	
471
	/**
472
	 * Returns true if the schema is registered.
473
	 * 
474
	 * @param schema
475
	 *            a single schema as it appears in xml
476
	 * @return true if the schema is registered, false otherwise.
477
	 */
478
	public static boolean isSchemaRegistered(XMLSchema xmlSchema) {
479
		for (XMLSchema registeredXmlSchema : registeredSchemaList) {
480
			if (registeredXmlSchema.getLocalFileUri().equals(
481
						xmlSchema.getLocalFileUri())
482
					&& registeredXmlSchema.getFileNamespace().equals(
483
							xmlSchema.getFileNamespace())) {
484
				return true;
485
			}
486
		}
487

    
488
		return false;
489
	}
490
	
491
	/**
492
	 * Test if the given namespace registered in Metacat
493
	 * @param namespace the namespace will be tested
494
	 * @return true if the namespace is registered; otherwise false.
495
	 */
496
	public static boolean isNamespaceRegistered(String namespace) {
497
	    boolean registered = false;
498
	    if(namespace != null && !namespace.trim().equals("")) {
499
	        if(nameSpaceList != null && !nameSpaceList.isEmpty()) {
500
	            for (String registeredNamespace : nameSpaceList) {
501
	                logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Loop the registered namespaces in Metacat: "+
502
	                                                    registeredNamespace+" to compare the given namespace "+namespace);
503
	                if (registeredNamespace != null && registeredNamespace.equals(namespace)) {
504
	                    registered = true;
505
	                    break;
506
	                }
507
	            }
508
	        } else {
509
	            logMetacat.error("XMLSchemaService.isNamespaceRegistered - The registered namespace list is null or empty! So we will reject any document which needs validataion");
510
	        }
511
	        
512
	    } else {
513
	        logMetacat.debug("XMLSchemaService.isNamespaceRegistered - The given namespace is null or blank. So it is not registered.");
514
	    }
515
	    logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Is the namespace "+namespace+" registered in Metacat? "+registered);
516
	    return registered;
517
	}
518
	
519
	/**
520
	 * Get the namespace-schemaLocation pairs string based on given formatId and namespace.
521
	 * The algorithm is:
522
	 * 1. Look up all pairs of namespace--schemalocation for the given formatId in the xml_catalog table. If we find it, return all of the pairs.
523
	 * 2. If we can't find anything on the step 1, look up the record for the given namespace. If we find it, return all of pairs namespace-location without formatid.
524
	 * 3. Return null if we can't find anything. 
525
	 * @param formatId  the given format id
526
	 * @param namespace  the given namespace
527
	 * @return the string of the namespace-schemaLocation pairs (separated by white spaces). The null will be returned, if we can't find one.
528
	 */
529
	public String findNamespaceAndSchemaLocalLocation(String formatId, String namespace) {
530
	    String location = null;
531
	    location = getNameSpaceAndLocation(formatId);
532
	    logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based the format id "+formatId+" is "+location);
533
	    if(location == null) {
534
	        //can't find it for given formId. Now we look up namespace
535
	        logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based on the format id "+formatId+" is null and we will lookup the given namespace "+namespace);
536
            if(isNamespaceRegistered(namespace)) {
537
                location = getNameSpaceAndLocationStringWithoutFormatId();
538
                logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is registered in Metacat");
539
            } else {
540
                logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is NOT registered in Metacat");
541
            }
542
	    }
543
	    logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - The final location string for the namespace "+namespace+" and format id "+formatId+" is "+location);
544
	    return location;
545
	}
546
	
547
	/**
548
	 * Get the local (official) location for a no-namespace schema based on the given format id or no-name-space schema location uri.
549
	 * The format id has the higher priority
550
	 * 1. Compare the given format id with all registered no-namespace schema. If a match is found, return it.
551
	 * 2. If the step 1 return null, compare the given noNamespaceSchemaLocationuri.
552
	 * @param formatId
553
	 * @param noNamespaceSchemaLocation
554
	 * @return
555
	 */
556
	public String findNoNamespaceSchemaLocalLocation(String formatId, String noNamespaceSchemaLocation) {
557
	    String location = null;
558
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id for determining the schema local location is "+formatId);
559
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocationURI for determining the schema local location is "+noNamespaceSchemaLocation);
560
	    if(registeredNoNamespaceSchemaList != null && !registeredNoNamespaceSchemaList.isEmpty()) {
561
	        if((formatId != null && !formatId.trim().equals(""))) {
562
                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id "+formatId+ "is not null and let's compare format id first.");
563
    	        for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) {
564
    	            if(schema != null) {
565
    	                String registeredFormatId = schema.getFormatId();
566
    	                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has the format id "+registeredFormatId);
567
    	                    if(registeredFormatId != null && !registeredFormatId.trim().equals("")) {
568
    	                        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered format id "+registeredFormatId+ "is not null as well. Compare it");
569
    	                        if(formatId.equals(registeredFormatId)) {
570
    	                            logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered format id is the same: "+formatId+". Match sucessfully!");
571
    	                            location = schema.getLocalFileUri();
572
    	                            break;
573
    	                        }
574
    	                    }
575
    	             } 
576
    	         }
577
	        }
578
	        if(location == null) {
579
	           logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - we can't find any regisered no-namespace schema has the foramtid "+formatId+ 
580
	                   " (if it is null, this means there is no given format id.) Let's compare the noNamespaceSchemaLocaionURL which the given value is "+noNamespaceSchemaLocation);
581
	           if(noNamespaceSchemaLocation != null && !noNamespaceSchemaLocation.trim().equals("")) {
582
	               logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation+ "is not null and let's compare it.");
583
	                for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) {
584
	                    if(schema != null) {
585
	                        String registeredSchemaLocationURI = schema.getNoNamespaceSchemaLocation();
586
	                        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has noNamespaceSchemaLocation uri "+registeredSchemaLocationURI);
587
	                            if(registeredSchemaLocationURI != null && !registeredSchemaLocationURI.trim().equals("")) {
588
	                                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered registeredSchemaLocation URI "+registeredSchemaLocationURI+ "is not null as well. Compare it");
589
	                                if(noNamespaceSchemaLocation.equals(registeredSchemaLocationURI)) {
590
	                                    logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered noNamespaceSchemaLocation is the same: "+noNamespaceSchemaLocation+". Match sucessfully!");
591
	                                    location = schema.getLocalFileUri();
592
	                                    break;
593
	                                }
594
	                            }
595
	                        } 
596
	                 }
597
	           }
598
	        }
599
	        
600
	    } else {
601
	        logMetacat.warn("XMLSchemaService.findNoNamespaceSchemaLocalLocation - there is no registered no-namespace schema in the Metacat");
602
	    }
603
	    logMetacat.warn("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the schema location is "+location+" (if it is null, this means it is not registered) for the format id "+formatId+
604
	            " or noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation);
605
	    return location;
606
	}
607
	
608
    /**
609
	 * See if schemas have been specified in the xml:schemalocation attribute.
610
	 * If so, return a vector of the system ids.
611
	 * 
612
	 * @param xml
613
	 *            the document we want to look in for schema location
614
	 * @return a vector of XMLSchema objects, or an empty vector if none are
615
	 *         found
616
	 */
617
	public static Vector<XMLSchema> findSchemasInXML(StringReader xml) throws IOException {
618
		Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
619
		Vector<XMLSchema> schemaList = new Vector<XMLSchema>();
620

    
621
		// no xml. return empty vector
622
		if (xml == null) {
623
			logMetacat.debug("XMLSchemaService.findSchemasInXML - Returning empty schemaList.");
624
			return schemaList;
625
		}
626

    
627
		// Get the "second line" from the xml
628
		String targetLine = getSchemaLine(xml);
629

    
630
		// see if there is a match for xsi.schemaLocation. If so, extract the
631
		// schemas.
632
		if (targetLine != null) {
633
			String regex = "(\\p{Graph}*):schemaLocation=\"([^\"]*)\"";
634
			Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE
635
					| Pattern.DOTALL);
636
			Matcher matcher = pattern.matcher(targetLine);
637
			int i = 0;
638
			while (i < targetLine.length()) {
639
				if (!matcher.find(i)) {
640
					break;
641
				}
642

    
643
				String uri = matcher.group(2);
644
				uri = StringUtil.replaceTabsNewLines(uri);
645
				uri = StringUtil.replaceDuplicateSpaces(uri);
646

    
647
				// each namespace could have several schema locations. parsedUri
648
				// will hold a list of uri and files.
649
				Vector<String> parsedUri = StringUtil.toVector(uri, ' ');
650
				for (int j = 0; j < parsedUri.size(); j = j + 2) {
651
					if (j + 1 >= parsedUri.size()) {
652
						throw new IOException(
653
								"Odd number of elements found when parsing schema location: "
654
										+ targetLine
655
										+ ". There should be an even number of uri/files in location.");
656
					}
657
					String formatId = null;
658
					XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri
659
							.get(j + 1), formatId);
660
					schemaList.add(xmlSchema);
661
				}
662
				i = matcher.end();
663
			}
664
		}
665

    
666
		logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for xml are " + schemaList.toString());
667

    
668
		return schemaList;
669
	}    
670
    
671
    /**
672
	 * Returns the namespace for an xml document. 
673
	 * @param xml
674
	 *            the document to search
675
	 * @return a string holding the namespace. Null will be returned if there is no namespace.
676
     * @throws SAXException 
677
     * @throws PropertyNotFoundException 
678
	 */
679
	public static String findDocumentNamespace(StringReader xml) throws IOException, PropertyNotFoundException, SAXException {
680
		String namespace = null;
681

    
682
		/*String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
683
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
684
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
685
		String eml2_1_1NameSpace = DocumentImpl.EML2_1_1NAMESPACE;*/
686

    
687

    
688
		if (xml == null) {
689
			logMetacat.debug("XMLSchemaService.findDocumentNamespace - XML doc is null.  There is no namespace.");
690
			return namespace;
691
		}
692
		XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml);
693
		namespaceParser.parse();
694
		namespace = namespaceParser.getNamespace();
695
		/*String targetLine = getSchemaLine(xml);
696

    
697
		// the prefix is at the beginning of the doc
698
		String prefix = null;
699
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}*\\s+";
700
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
701
		Matcher matcher = pattern.matcher(targetLine);
702
		if (matcher.find()) {
703
			prefix = matcher.group(1).trim();
704
		}
705

    
706
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
707
		// if no prefix was found, we will look for the default namespace.
708
		String regex2;
709
		if (prefix != null) {
710
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - found the prefix for the document "+prefix);
711
			regex2 = "xmlns:" + prefix + "=['\"]([^\"])*['\"]";
712
		} else {
713
			//regex2 = "xmlns:.*=['\"](.*)['\"]";
714
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - can't found the prefix for the document, so we look for the default namespace");
715
		    regex2 = "xmlns=['\"](.*)['\"]";
716
		}
717
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
718
		Matcher matcher2 = pattern2.matcher(targetLine);
719
		if (matcher2.find()) {
720
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - it has either a prefix or a default namespace");
721
		    System.out.println("the match group 0"+" is "+matcher2.group());
722
			namespace = matcher2.group(1);
723
			
724
			System.out.println("the match group "+" is "+namespace);
725

    
726
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
727
				namespace = eml2_0_0NameSpace;
728
			} else if (namespace.indexOf(eml2_0_1NameSpace) != -1) {
729
				namespace = eml2_0_1NameSpace;
730
			} else if (namespace.indexOf(eml2_1_0NameSpace) != -1) {
731
				namespace = eml2_1_0NameSpace;
732
			} else if (namespace.indexOf(eml2_1_1NameSpace) != -1) {
733
				namespace = eml2_1_1NameSpace;
734
			}
735
		}*/
736
		logMetacat.debug("XMLSchemaService.findDocumentNamespace - the namespace (null means no namespace) in the document is "+namespace);
737
		return namespace;
738
	}
739
	
740
	/**
741
	 * Get the attribute value of the noNamespaceSchemaLcation of the given xml
742
	 * @param xml the xml obect needs to be searched
743
	 * @return the attribute value of the noNamespaceSchemaLcation. The null will return if it can't be found.
744
	 * @throws SAXException 
745
	 * @throws PropertyNotFoundException 
746
	 * @throws IOException 
747
	 */
748
	public static String findNoNamespaceSchemaLocationAttr(StringReader xml) throws PropertyNotFoundException, SAXException, IOException {
749
	    String noNamespaceSchemaLocation = null;
750
	    XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml);
751
        namespaceParser.parse();
752
        noNamespaceSchemaLocation = namespaceParser.getNoNamespaceSchemaLocation();
753
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocation - the noNamespaceSchemaLocation (null means no namespace) in the document is "+noNamespaceSchemaLocation);
754
	    return noNamespaceSchemaLocation;
755
	}
756
    
757
    /**
758
	 * Return the line from xml that holds the metadata like namespace and
759
	 * schema location
760
	 * 
761
	 * @param xml
762
	 *            the document to parse
763
	 * @return the "second" line of the document
764
	 */
765
    private static String getSchemaLine(StringReader xml) throws IOException {
766
        Logger logMetacat = Logger.getLogger(MetaCatServlet.class);
767
        // find the line
768
        String secondLine = null;
769
        int count = 0;
770
        final int TARGETNUM = 1;
771
        StringBuffer buffer = new StringBuffer();
772
        boolean comment = false;
773
        boolean processingInstruction = false;
774
        char thirdPreviousCharacter = '?';
775
        char secondPreviousCharacter = '?';
776
        char previousCharacter = '?';
777
        char currentCharacter = '?';
778
        int tmp = xml.read();
779
        while (tmp != -1) {
780
            currentCharacter = (char)tmp;
781
            //in a comment
782
            if (currentCharacter == '-' && previousCharacter == '-'
783
                    && secondPreviousCharacter == '!'
784
                    && thirdPreviousCharacter == '<') {
785
                comment = true;
786
            }
787
            //out of comment
788
            if (comment && currentCharacter == '>' && previousCharacter == '-'
789
                    && secondPreviousCharacter == '-') {
790
                comment = false;
791
            }
792
            
793
            //in a processingInstruction
794
            if (currentCharacter == '?' && previousCharacter == '<') {
795
                processingInstruction = true;
796
            }
797
            
798
            //out of processingInstruction
799
            if (processingInstruction && currentCharacter == '>'
800
                    && previousCharacter == '?') {
801
                processingInstruction = false;
802
            }
803
            
804
            //this is not comment or a processingInstruction
805
            if (currentCharacter != '!' && previousCharacter == '<'
806
                    && !comment && !processingInstruction) {
807
                count++;
808
            }
809
            
810
            // get target line
811
            if (count == TARGETNUM && currentCharacter != '>') {
812
                buffer.append(currentCharacter);
813
            }
814
            if (count == TARGETNUM && currentCharacter == '>') {
815
                break;
816
            }
817
            thirdPreviousCharacter = secondPreviousCharacter;
818
            secondPreviousCharacter = previousCharacter;
819
            previousCharacter = currentCharacter;
820
            tmp = xml.read();
821
        }
822
        secondLine = buffer.toString();
823
        logMetacat.debug("XMLSchemaService.getSchemaLine - the second line string is: " + secondLine);
824
        
825
        xml.reset();
826
        return secondLine;
827
    }
828
    
829
    /**
830
	 * Get a schema file name from the schema uri.
831
	 * 
832
	 * @param uri
833
	 *            the uri from which to extract the file name
834
	 * @return a string holding the file name
835
	 */
836
    public static String getSchemaFileNameFromUri(String uri) {
837
		// get filename from systemId
838
		String filename = uri;
839
		
840
		if (filename != null && !(filename.trim()).equals("")) {
841
			int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
842
			if (slash > -1) {
843
				filename = filename.substring(slash + 1);
844
			}
845
		}
846

    
847
		return filename;
848
	}
849
    
850
    /**
851
     * Get a base url from the schema url. If url=http://www.example.com/example.xsd,
852
     * http://www.example.com/ will be returned.
853
     * 
854
     * @param uri
855
     *            the uri from which to extract the base url
856
     * @return a string holding the base url. null will be return if it is not url.
857
     */
858
      public static String getBaseUrlFromSchemaURL(String url) 
859
      {
860
        String baseURL = null;        
861
        if (url != null && (url.indexOf("http://") != -1 || url.indexOf("https://") !=-1)) 
862
        {
863
          int slash = url.lastIndexOf('/');
864
          if (slash > -1) 
865
          {
866
            baseURL = url.substring(0,slash+1);
867
          }
868
        } 
869
        return baseURL;
870
      }
871
}
(7-7/7)