Project

General

Profile

« Previous | Next » 

Revision 9948

Added by Jing Tao over 7 years ago

Add the methods to figure out the schema location base on namespace, format id or no-namespace-schema-location uri.

View differences:

XMLSchemaService.java
44 44

  
45 45
import org.apache.commons.io.IOUtils;
46 46
import org.apache.log4j.Logger;
47
import org.xml.sax.SAXException;
47 48

  
48 49
import edu.ucsb.nceas.metacat.DocumentImpl;
49 50
import edu.ucsb.nceas.metacat.MetaCatServlet;
......
71 72
//	private static String documentNamespace = null;
72 73
	
73 74
	// all schema objects that represent schemas registered in the db that 
74
	// actually have files on disk.
75
	// actually have files on disk. It doesn't include the schemas without namespace
75 76
	private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>();
76 77
	
78
	// all non-amespace schema objects that represent schemas registered in the db that 
79
    // actually have files on disk. It doesn't include the schemas with namespaces
80
	private static Vector<XMLNoNamespaceSchema> registeredNoNamespaceSchemaList = new Vector<XMLNoNamespaceSchema>();
81
	
77 82
	// a convenience list that holds the names of registered namespaces.
78 83
    private static Vector<String> nameSpaceList = new Vector<String>();
79 84
    
......
121 126
	    logMetacat.debug("XMLService.doRefresh - refreshing the schema service.");
122 127
		try {
123 128
			populateRegisteredSchemaList();
129
			populateRegisteredNoNamespaceSchemaList();
124 130
			setUseFullSchemaValidation();
125 131
			createRegisteredNameSpaceList();
126 132
			createRegisteredNameSpaceAndLocationString();
......
220 226

  
221 227
		// get the system id from the xml_catalog table for all schemas.
222 228
		String sql = "SELECT public_id, system_id, format_id FROM xml_catalog where "
223
				+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
229
				+ "entry_type ='" + XMLSchema.getType() + "'";
224 230
		try {
225 231
			// check out DBConnection
226 232
			conn = DBConnectionPool
......
308 314
				else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://"))
309 315
                {  //the schema resides on a different server, to validate, we need to go get it 
310 316
                    //registeredSchemaList.add(xmlSchema);
317
				    logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered schema list.");
311 318
                }
312 319
				else 
313 320
				{
314 321
					logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
315
							+ " in the database but does not exist on the file system.");
322
							+ " in the database but does not exist on the file system. So we don't add it to the registered schema list.");
316 323
				}
317 324
			}
318 325
		} catch (SQLException e) {
......
330 337
		}
331 338
	}	
332 339
	
340
	/*
341
	 * Populate the list of registered no-namespace schemas. This reads all no-namespace schemas in the
342
     * xml_catalog table and then makes sure the schema actually exists and is
343
     * readable on disk.
344
	 */
345
	private void populateRegisteredNoNamespaceSchemaList() {
346
	    DBConnection conn = null;
347
        int serialNumber = -1;
348
        PreparedStatement pstmt = null;
349
        ResultSet resultSet = null;
350
        registeredNoNamespaceSchemaList = new Vector<XMLNoNamespaceSchema>();
351
        // get the system id from the xml_catalog table for all schemas.
352
        String sql = "SELECT no_namespace_schema_location, system_id, format_id FROM xml_catalog where "
353
                + "entry_type ='" + XMLNoNamespaceSchema.getType()+ "'";
354
        try {
355
            // check out DBConnection
356
            conn = DBConnectionPool
357
                    .getDBConnection("XMLService.populateRegisteredNoNamespaceSchemaList");
358
            serialNumber = conn.getCheckOutSerialNumber();
359
            pstmt = conn.prepareStatement(sql);
360
            logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - Selecting schemas: " + pstmt.toString());
361
            pstmt.execute();
362
            resultSet = pstmt.getResultSet();
363

  
364
            // make sure the schema actually exists on the file system. If so,
365
            // add it to the registered schema list.
366
            while (resultSet.next()) {
367
                String noNamespaceSchemaLocationURI = resultSet.getString(1);
368
                String fileLocation = resultSet.getString(2);
369
                String formatId = resultSet.getString(3);
370
                logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - try to register schema: " + noNamespaceSchemaLocationURI + "(no namespace-schema-location-uri) " + fileLocation+ " and format id "+formatId);
371
                XMLNoNamespaceSchema xmlSchema = new XMLNoNamespaceSchema(noNamespaceSchemaLocationURI, fileLocation, formatId);
372
                if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) {
373
                    continue;//skip the external schemas.
374
                }
375
                else {
376
                    xmlSchema.setFileName(fileLocation);
377
                }
378
                                
379
                if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) {
380
                    registeredNoNamespaceSchemaList.add(xmlSchema);
381
                }
382
                else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) {  //the schema resides on a different server, to validate, we need to go get it 
383
                    //registeredSchemaList.add(xmlSchema);
384
                    logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered no-namespace schema list.");
385
                }
386
                else {
387
                    logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered "
388
                            + " in the database but does not exist on the file system. So we don't add it to the registered no-namespace schema list.");
389
                }
390
            }
391
        } catch (SQLException e) {
392
            e.printStackTrace();
393
            logMetacat.error("XMLService.populateRegisteredNoNamespaceSchemaList - SQL Error: "
394
                    + e.getMessage());
395
        } finally {
396
            try {
397
                pstmt.close();
398
            }// try
399
            catch (SQLException sqlE) {
400
                logMetacat.error("XMLSchemaService.populateRegisteredNoNamespaceSchemaList - Error in close the pstmt: "
401
                        + sqlE.getMessage());
402
            }
403
            DBConnectionPool.returnDBConnection(conn, serialNumber);
404
        }
405
	}
406
	
333 407
	/**
334 408
	 * create a space delimited string of all namespaces and locations
335 409
	 * in the registered schema list.
......
414 488
		return false;
415 489
	}
416 490
	
491
	/**
492
	 * Test if the given namespace registered in Metacat
493
	 * @param namespace the namespace will be tested
494
	 * @return true if the namespace is registered; otherwise false.
495
	 */
496
	public static boolean isNamespaceRegistered(String namespace) {
497
	    boolean registered = false;
498
	    if(namespace != null && !namespace.trim().equals("")) {
499
	        if(nameSpaceList != null && !nameSpaceList.isEmpty()) {
500
	            for (String registeredNamespace : nameSpaceList) {
501
	                logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Loop the registered namespaces in Metacat: "+
502
	                                                    registeredNamespace+" to compare the given namespace "+namespace);
503
	                if (registeredNamespace != null && registeredNamespace.equals(namespace)) {
504
	                    registered = true;
505
	                    break;
506
	                }
507
	            }
508
	        } else {
509
	            logMetacat.error("XMLSchemaService.isNamespaceRegistered - The registered namespace list is null or empty! So we will reject any document which needs validataion");
510
	        }
511
	        
512
	    } else {
513
	        logMetacat.debug("XMLSchemaService.isNamespaceRegistered - The given namespace is null or blank. So it is not registered.");
514
	    }
515
	    logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Is the namespace "+namespace+" registered in Metacat? "+registered);
516
	    return registered;
517
	}
518
	
519
	/**
520
	 * Get the namespace-schemaLocation pairs string based on given formatId and namespace.
521
	 * The algorithm is:
522
	 * 1. Look up all pairs of namespace--schemalocation for the given formatId in the xml_catalog table. If we find it, return all of the pairs.
523
	 * 2. If we can't find anything on the step 1, look up the record for the given namespace. If we find it, return all of pairs namespace-location without formatid.
524
	 * 3. Return null if we can't find anything. 
525
	 * @param formatId  the given format id
526
	 * @param namespace  the given namespace
527
	 * @return the string of the namespace-schemaLocation pairs (separated by white spaces). The null will be returned, if we can't find one.
528
	 */
529
	public String findNamespaceAndSchemaLocalLocation(String formatId, String namespace) {
530
	    String location = null;
531
	    location = getNameSpaceAndLocation(formatId);
532
	    logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based the format id "+formatId+" is "+location);
533
	    if(location == null) {
534
	        //can't find it for given formId. Now we look up namespace
535
	        logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based on the format id "+formatId+" is null and we will lookup the given namespace "+namespace);
536
            if(isNamespaceRegistered(namespace)) {
537
                location = getNameSpaceAndLocationStringWithoutFormatId();
538
                logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is registered in Metacat");
539
            } else {
540
                logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is NOT registered in Metacat");
541
            }
542
	    }
543
	    logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - The final location string for the namespace "+namespace+" and format id "+formatId+" is "+location);
544
	    return location;
545
	}
546
	
547
	/**
548
	 * Get the local (official) location for a no-namespace schema based on the given format id or no-name-space schema location uri.
549
	 * The format id has the higher priority
550
	 * 1. Compare the given format id with all registered no-namespace schema. If a match is found, return it.
551
	 * 2. If the step 1 return null, compare the given noNamespaceSchemaLocationuri.
552
	 * @param formatId
553
	 * @param noNamespaceSchemaLocation
554
	 * @return
555
	 */
556
	public String findNoNamespaceSchemaLocalLocation(String formatId, String noNamespaceSchemaLocation) {
557
	    String location = null;
558
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id for determining the schema local location is "+formatId);
559
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocationURI for determining the schema local location is "+noNamespaceSchemaLocation);
560
	    if(registeredNoNamespaceSchemaList != null && !registeredNoNamespaceSchemaList.isEmpty()) {
561
	        if((formatId != null && !formatId.trim().equals(""))) {
562
                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id "+formatId+ "is not null and let's compare format id first.");
563
    	        for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) {
564
    	            if(schema != null) {
565
    	                String registeredFormatId = schema.getFormatId();
566
    	                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has the format id "+registeredFormatId);
567
    	                    if(registeredFormatId != null && !registeredFormatId.trim().equals("")) {
568
    	                        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered format id "+registeredFormatId+ "is not null as well. Compare it");
569
    	                        if(formatId.equals(registeredFormatId)) {
570
    	                            logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered format id is the same: "+formatId+". Match sucessfully!");
571
    	                            location = schema.getLocalFileUri();
572
    	                            break;
573
    	                        }
574
    	                    }
575
    	             } 
576
    	         }
577
	        }
578
	        if(location == null) {
579
	           logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - we can't find any regisered no-namespace schema has the foramtid "+formatId+ 
580
	                   " (if it is null, this means there is no given format id.) Let's compare the noNamespaceSchemaLocaionURL which the given value is "+noNamespaceSchemaLocation);
581
	           if(noNamespaceSchemaLocation != null && !noNamespaceSchemaLocation.trim().equals("")) {
582
	               logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation+ "is not null and let's compare it.");
583
	                for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) {
584
	                    if(schema != null) {
585
	                        String registeredSchemaLocationURI = schema.getNoNamespaceSchemaLocation();
586
	                        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has noNamespaceSchemaLocation uri "+registeredSchemaLocationURI);
587
	                            if(registeredSchemaLocationURI != null && !registeredSchemaLocationURI.trim().equals("")) {
588
	                                logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered registeredSchemaLocation URI "+registeredSchemaLocationURI+ "is not null as well. Compare it");
589
	                                if(noNamespaceSchemaLocation.equals(registeredSchemaLocationURI)) {
590
	                                    logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered noNamespaceSchemaLocation is the same: "+noNamespaceSchemaLocation+". Match sucessfully!");
591
	                                    location = schema.getLocalFileUri();
592
	                                    break;
593
	                                }
594
	                            }
595
	                        } 
596
	                 }
597
	           }
598
	        }
599
	        
600
	    } else {
601
	        logMetacat.warn("XMLSchemaService.findNoNamespaceSchemaLocalLocation - there is no registered no-namespace schema in the Metacat");
602
	    }
603
	    logMetacat.warn("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the schema location is "+location+" (if it is null, this means it is not registered) for the format id "+formatId+
604
	            " or noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation);
605
	    return location;
606
	}
607
	
417 608
    /**
418 609
	 * See if schemas have been specified in the xml:schemalocation attribute.
419 610
	 * If so, return a vector of the system ids.
......
472 663
			}
473 664
		}
474 665

  
475
		logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for eml are " + schemaList.toString());
666
		logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for xml are " + schemaList.toString());
476 667

  
477 668
		return schemaList;
478 669
	}    
479 670
    
480 671
    /**
481
	 * Returns all the namespace for an xml document.  This is done by getting
482
	 * the internal namespace declaration (prefix) and looking for xmlns:<prefix>
483
	 * 
672
	 * Returns the namespace for an xml document. 
484 673
	 * @param xml
485 674
	 *            the document to search
486
	 * @return a string holding the namespace
675
	 * @return a string holding the namespace. Null will be returned if there is no namespace.
676
     * @throws SAXException 
677
     * @throws PropertyNotFoundException 
487 678
	 */
488
	public static String findDocumentNamespace(StringReader xml) throws IOException {
679
	public static String findDocumentNamespace(StringReader xml) throws IOException, PropertyNotFoundException, SAXException {
489 680
		String namespace = null;
490 681

  
491
		String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
682
		/*String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
492 683
		String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE;
493 684
		String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE;
494
		String eml2_1_1NameSpace = DocumentImpl.EML2_1_1NAMESPACE;
685
		String eml2_1_1NameSpace = DocumentImpl.EML2_1_1NAMESPACE;*/
495 686

  
496 687

  
497 688
		if (xml == null) {
498 689
			logMetacat.debug("XMLSchemaService.findDocumentNamespace - XML doc is null.  There is no namespace.");
499 690
			return namespace;
500 691
		}
692
		XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml);
693
		namespaceParser.parse();
694
		namespace = namespaceParser.getNamespace();
695
		/*String targetLine = getSchemaLine(xml);
501 696

  
502
		String targetLine = getSchemaLine(xml);
503

  
504 697
		// the prefix is at the beginning of the doc
505 698
		String prefix = null;
506
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}* ";
699
		String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}*\\s+";
507 700
		Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
508 701
		Matcher matcher = pattern.matcher(targetLine);
509 702
		if (matcher.find()) {
......
511 704
		}
512 705

  
513 706
		// if a prefix was found, we are looking for xmlns:<prefix>="namespace"
514
		// if no prefix was found, we grab the first namespace.
707
		// if no prefix was found, we will look for the default namespace.
515 708
		String regex2;
516 709
		if (prefix != null) {
517
			regex2 = "xmlns:" + prefix + "=['\"](.*)['\"]";
710
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - found the prefix for the document "+prefix);
711
			regex2 = "xmlns:" + prefix + "=['\"]([^\"])*['\"]";
518 712
		} else {
519
			regex2 = "xmlns:.*=['\"](.*)['\"]";
713
			//regex2 = "xmlns:.*=['\"](.*)['\"]";
714
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - can't found the prefix for the document, so we look for the default namespace");
715
		    regex2 = "xmlns=['\"](.*)['\"]";
520 716
		}
521 717
		Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
522 718
		Matcher matcher2 = pattern2.matcher(targetLine);
523 719
		if (matcher2.find()) {
720
		    logMetacat.debug("XMLSchemaService.findDocumentNamespace - it has either a prefix or a default namespace");
721
		    System.out.println("the match group 0"+" is "+matcher2.group());
524 722
			namespace = matcher2.group(1);
723
			
724
			System.out.println("the match group "+" is "+namespace);
525 725

  
526 726
			if (namespace.indexOf(eml2_0_0NameSpace) != -1) {
527 727
				namespace = eml2_0_0NameSpace;
......
532 732
			} else if (namespace.indexOf(eml2_1_1NameSpace) != -1) {
533 733
				namespace = eml2_1_1NameSpace;
534 734
			}
535
		}
536

  
735
		}*/
736
		logMetacat.debug("XMLSchemaService.findDocumentNamespace - the namespace (null means no namespace) in the document is "+namespace);
537 737
		return namespace;
538 738
	}
739
	
740
	/**
741
	 * Get the attribute value of the noNamespaceSchemaLcation of the given xml
742
	 * @param xml the xml obect needs to be searched
743
	 * @return the attribute value of the noNamespaceSchemaLcation. The null will return if it can't be found.
744
	 * @throws SAXException 
745
	 * @throws PropertyNotFoundException 
746
	 * @throws IOException 
747
	 */
748
	public static String findNoNamespaceSchemaLocationAttr(StringReader xml) throws PropertyNotFoundException, SAXException, IOException {
749
	    String noNamespaceSchemaLocation = null;
750
	    XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml);
751
        namespaceParser.parse();
752
        noNamespaceSchemaLocation = namespaceParser.getNoNamespaceSchemaLocation();
753
        logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocation - the noNamespaceSchemaLocation (null means no namespace) in the document is "+noNamespaceSchemaLocation);
754
	    return noNamespaceSchemaLocation;
755
	}
539 756
    
540 757
    /**
541 758
	 * Return the line from xml that holds the metadata like namespace and

Also available in: Unified diff