Revision 9948
Added by Jing Tao over 7 years ago
XMLSchemaService.java | ||
---|---|---|
44 | 44 |
|
45 | 45 |
import org.apache.commons.io.IOUtils; |
46 | 46 |
import org.apache.log4j.Logger; |
47 |
import org.xml.sax.SAXException; |
|
47 | 48 |
|
48 | 49 |
import edu.ucsb.nceas.metacat.DocumentImpl; |
49 | 50 |
import edu.ucsb.nceas.metacat.MetaCatServlet; |
... | ... | |
71 | 72 |
// private static String documentNamespace = null; |
72 | 73 |
|
73 | 74 |
// all schema objects that represent schemas registered in the db that |
74 |
// actually have files on disk. |
|
75 |
// actually have files on disk. It doesn't include the schemas without namespace
|
|
75 | 76 |
private static Vector<XMLSchema> registeredSchemaList = new Vector<XMLSchema>(); |
76 | 77 |
|
78 |
// all non-amespace schema objects that represent schemas registered in the db that |
|
79 |
// actually have files on disk. It doesn't include the schemas with namespaces |
|
80 |
private static Vector<XMLNoNamespaceSchema> registeredNoNamespaceSchemaList = new Vector<XMLNoNamespaceSchema>(); |
|
81 |
|
|
77 | 82 |
// a convenience list that holds the names of registered namespaces. |
78 | 83 |
private static Vector<String> nameSpaceList = new Vector<String>(); |
79 | 84 |
|
... | ... | |
121 | 126 |
logMetacat.debug("XMLService.doRefresh - refreshing the schema service."); |
122 | 127 |
try { |
123 | 128 |
populateRegisteredSchemaList(); |
129 |
populateRegisteredNoNamespaceSchemaList(); |
|
124 | 130 |
setUseFullSchemaValidation(); |
125 | 131 |
createRegisteredNameSpaceList(); |
126 | 132 |
createRegisteredNameSpaceAndLocationString(); |
... | ... | |
220 | 226 |
|
221 | 227 |
// get the system id from the xml_catalog table for all schemas. |
222 | 228 |
String sql = "SELECT public_id, system_id, format_id FROM xml_catalog where " |
223 |
+ "entry_type ='" + DocumentImpl.SCHEMA + "'";
|
|
229 |
+ "entry_type ='" + XMLSchema.getType() + "'";
|
|
224 | 230 |
try { |
225 | 231 |
// check out DBConnection |
226 | 232 |
conn = DBConnectionPool |
... | ... | |
308 | 314 |
else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) |
309 | 315 |
{ //the schema resides on a different server, to validate, we need to go get it |
310 | 316 |
//registeredSchemaList.add(xmlSchema); |
317 |
logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered schema list."); |
|
311 | 318 |
} |
312 | 319 |
else |
313 | 320 |
{ |
314 | 321 |
logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered " |
315 |
+ " in the database but does not exist on the file system."); |
|
322 |
+ " in the database but does not exist on the file system. So we don't add it to the registered schema list.");
|
|
316 | 323 |
} |
317 | 324 |
} |
318 | 325 |
} catch (SQLException e) { |
... | ... | |
330 | 337 |
} |
331 | 338 |
} |
332 | 339 |
|
340 |
/* |
|
341 |
* Populate the list of registered no-namespace schemas. This reads all no-namespace schemas in the |
|
342 |
* xml_catalog table and then makes sure the schema actually exists and is |
|
343 |
* readable on disk. |
|
344 |
*/ |
|
345 |
private void populateRegisteredNoNamespaceSchemaList() { |
|
346 |
DBConnection conn = null; |
|
347 |
int serialNumber = -1; |
|
348 |
PreparedStatement pstmt = null; |
|
349 |
ResultSet resultSet = null; |
|
350 |
registeredNoNamespaceSchemaList = new Vector<XMLNoNamespaceSchema>(); |
|
351 |
// get the system id from the xml_catalog table for all schemas. |
|
352 |
String sql = "SELECT no_namespace_schema_location, system_id, format_id FROM xml_catalog where " |
|
353 |
+ "entry_type ='" + XMLNoNamespaceSchema.getType()+ "'"; |
|
354 |
try { |
|
355 |
// check out DBConnection |
|
356 |
conn = DBConnectionPool |
|
357 |
.getDBConnection("XMLService.populateRegisteredNoNamespaceSchemaList"); |
|
358 |
serialNumber = conn.getCheckOutSerialNumber(); |
|
359 |
pstmt = conn.prepareStatement(sql); |
|
360 |
logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - Selecting schemas: " + pstmt.toString()); |
|
361 |
pstmt.execute(); |
|
362 |
resultSet = pstmt.getResultSet(); |
|
363 |
|
|
364 |
// make sure the schema actually exists on the file system. If so, |
|
365 |
// add it to the registered schema list. |
|
366 |
while (resultSet.next()) { |
|
367 |
String noNamespaceSchemaLocationURI = resultSet.getString(1); |
|
368 |
String fileLocation = resultSet.getString(2); |
|
369 |
String formatId = resultSet.getString(3); |
|
370 |
logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - try to register schema: " + noNamespaceSchemaLocationURI + "(no namespace-schema-location-uri) " + fileLocation+ " and format id "+formatId); |
|
371 |
XMLNoNamespaceSchema xmlSchema = new XMLNoNamespaceSchema(noNamespaceSchemaLocationURI, fileLocation, formatId); |
|
372 |
if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) { |
|
373 |
continue;//skip the external schemas. |
|
374 |
} |
|
375 |
else { |
|
376 |
xmlSchema.setFileName(fileLocation); |
|
377 |
} |
|
378 |
|
|
379 |
if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) { |
|
380 |
registeredNoNamespaceSchemaList.add(xmlSchema); |
|
381 |
} |
|
382 |
else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) { //the schema resides on a different server, to validate, we need to go get it |
|
383 |
//registeredSchemaList.add(xmlSchema); |
|
384 |
logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered no-namespace schema list."); |
|
385 |
} |
|
386 |
else { |
|
387 |
logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered " |
|
388 |
+ " in the database but does not exist on the file system. So we don't add it to the registered no-namespace schema list."); |
|
389 |
} |
|
390 |
} |
|
391 |
} catch (SQLException e) { |
|
392 |
e.printStackTrace(); |
|
393 |
logMetacat.error("XMLService.populateRegisteredNoNamespaceSchemaList - SQL Error: " |
|
394 |
+ e.getMessage()); |
|
395 |
} finally { |
|
396 |
try { |
|
397 |
pstmt.close(); |
|
398 |
}// try |
|
399 |
catch (SQLException sqlE) { |
|
400 |
logMetacat.error("XMLSchemaService.populateRegisteredNoNamespaceSchemaList - Error in close the pstmt: " |
|
401 |
+ sqlE.getMessage()); |
|
402 |
} |
|
403 |
DBConnectionPool.returnDBConnection(conn, serialNumber); |
|
404 |
} |
|
405 |
} |
|
406 |
|
|
333 | 407 |
/** |
334 | 408 |
* create a space delimited string of all namespaces and locations |
335 | 409 |
* in the registered schema list. |
... | ... | |
414 | 488 |
return false; |
415 | 489 |
} |
416 | 490 |
|
491 |
/** |
|
492 |
* Test if the given namespace registered in Metacat |
|
493 |
* @param namespace the namespace will be tested |
|
494 |
* @return true if the namespace is registered; otherwise false. |
|
495 |
*/ |
|
496 |
public static boolean isNamespaceRegistered(String namespace) { |
|
497 |
boolean registered = false; |
|
498 |
if(namespace != null && !namespace.trim().equals("")) { |
|
499 |
if(nameSpaceList != null && !nameSpaceList.isEmpty()) { |
|
500 |
for (String registeredNamespace : nameSpaceList) { |
|
501 |
logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Loop the registered namespaces in Metacat: "+ |
|
502 |
registeredNamespace+" to compare the given namespace "+namespace); |
|
503 |
if (registeredNamespace != null && registeredNamespace.equals(namespace)) { |
|
504 |
registered = true; |
|
505 |
break; |
|
506 |
} |
|
507 |
} |
|
508 |
} else { |
|
509 |
logMetacat.error("XMLSchemaService.isNamespaceRegistered - The registered namespace list is null or empty! So we will reject any document which needs validataion"); |
|
510 |
} |
|
511 |
|
|
512 |
} else { |
|
513 |
logMetacat.debug("XMLSchemaService.isNamespaceRegistered - The given namespace is null or blank. So it is not registered."); |
|
514 |
} |
|
515 |
logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Is the namespace "+namespace+" registered in Metacat? "+registered); |
|
516 |
return registered; |
|
517 |
} |
|
518 |
|
|
519 |
/** |
|
520 |
* Get the namespace-schemaLocation pairs string based on given formatId and namespace. |
|
521 |
* The algorithm is: |
|
522 |
* 1. Look up all pairs of namespace--schemalocation for the given formatId in the xml_catalog table. If we find it, return all of the pairs. |
|
523 |
* 2. If we can't find anything on the step 1, look up the record for the given namespace. If we find it, return all of pairs namespace-location without formatid. |
|
524 |
* 3. Return null if we can't find anything. |
|
525 |
* @param formatId the given format id |
|
526 |
* @param namespace the given namespace |
|
527 |
* @return the string of the namespace-schemaLocation pairs (separated by white spaces). The null will be returned, if we can't find one. |
|
528 |
*/ |
|
529 |
public String findNamespaceAndSchemaLocalLocation(String formatId, String namespace) { |
|
530 |
String location = null; |
|
531 |
location = getNameSpaceAndLocation(formatId); |
|
532 |
logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based the format id "+formatId+" is "+location); |
|
533 |
if(location == null) { |
|
534 |
//can't find it for given formId. Now we look up namespace |
|
535 |
logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based on the format id "+formatId+" is null and we will lookup the given namespace "+namespace); |
|
536 |
if(isNamespaceRegistered(namespace)) { |
|
537 |
location = getNameSpaceAndLocationStringWithoutFormatId(); |
|
538 |
logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is registered in Metacat"); |
|
539 |
} else { |
|
540 |
logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is NOT registered in Metacat"); |
|
541 |
} |
|
542 |
} |
|
543 |
logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - The final location string for the namespace "+namespace+" and format id "+formatId+" is "+location); |
|
544 |
return location; |
|
545 |
} |
|
546 |
|
|
547 |
/** |
|
548 |
* Get the local (official) location for a no-namespace schema based on the given format id or no-name-space schema location uri. |
|
549 |
* The format id has the higher priority |
|
550 |
* 1. Compare the given format id with all registered no-namespace schema. If a match is found, return it. |
|
551 |
* 2. If the step 1 return null, compare the given noNamespaceSchemaLocationuri. |
|
552 |
* @param formatId |
|
553 |
* @param noNamespaceSchemaLocation |
|
554 |
* @return |
|
555 |
*/ |
|
556 |
public String findNoNamespaceSchemaLocalLocation(String formatId, String noNamespaceSchemaLocation) { |
|
557 |
String location = null; |
|
558 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id for determining the schema local location is "+formatId); |
|
559 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocationURI for determining the schema local location is "+noNamespaceSchemaLocation); |
|
560 |
if(registeredNoNamespaceSchemaList != null && !registeredNoNamespaceSchemaList.isEmpty()) { |
|
561 |
if((formatId != null && !formatId.trim().equals(""))) { |
|
562 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id "+formatId+ "is not null and let's compare format id first."); |
|
563 |
for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) { |
|
564 |
if(schema != null) { |
|
565 |
String registeredFormatId = schema.getFormatId(); |
|
566 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has the format id "+registeredFormatId); |
|
567 |
if(registeredFormatId != null && !registeredFormatId.trim().equals("")) { |
|
568 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered format id "+registeredFormatId+ "is not null as well. Compare it"); |
|
569 |
if(formatId.equals(registeredFormatId)) { |
|
570 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered format id is the same: "+formatId+". Match sucessfully!"); |
|
571 |
location = schema.getLocalFileUri(); |
|
572 |
break; |
|
573 |
} |
|
574 |
} |
|
575 |
} |
|
576 |
} |
|
577 |
} |
|
578 |
if(location == null) { |
|
579 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - we can't find any regisered no-namespace schema has the foramtid "+formatId+ |
|
580 |
" (if it is null, this means there is no given format id.) Let's compare the noNamespaceSchemaLocaionURL which the given value is "+noNamespaceSchemaLocation); |
|
581 |
if(noNamespaceSchemaLocation != null && !noNamespaceSchemaLocation.trim().equals("")) { |
|
582 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation+ "is not null and let's compare it."); |
|
583 |
for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) { |
|
584 |
if(schema != null) { |
|
585 |
String registeredSchemaLocationURI = schema.getNoNamespaceSchemaLocation(); |
|
586 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has noNamespaceSchemaLocation uri "+registeredSchemaLocationURI); |
|
587 |
if(registeredSchemaLocationURI != null && !registeredSchemaLocationURI.trim().equals("")) { |
|
588 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered registeredSchemaLocation URI "+registeredSchemaLocationURI+ "is not null as well. Compare it"); |
|
589 |
if(noNamespaceSchemaLocation.equals(registeredSchemaLocationURI)) { |
|
590 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered noNamespaceSchemaLocation is the same: "+noNamespaceSchemaLocation+". Match sucessfully!"); |
|
591 |
location = schema.getLocalFileUri(); |
|
592 |
break; |
|
593 |
} |
|
594 |
} |
|
595 |
} |
|
596 |
} |
|
597 |
} |
|
598 |
} |
|
599 |
|
|
600 |
} else { |
|
601 |
logMetacat.warn("XMLSchemaService.findNoNamespaceSchemaLocalLocation - there is no registered no-namespace schema in the Metacat"); |
|
602 |
} |
|
603 |
logMetacat.warn("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the schema location is "+location+" (if it is null, this means it is not registered) for the format id "+formatId+ |
|
604 |
" or noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation); |
|
605 |
return location; |
|
606 |
} |
|
607 |
|
|
417 | 608 |
/** |
418 | 609 |
* See if schemas have been specified in the xml:schemalocation attribute. |
419 | 610 |
* If so, return a vector of the system ids. |
... | ... | |
472 | 663 |
} |
473 | 664 |
} |
474 | 665 |
|
475 |
logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for eml are " + schemaList.toString());
|
|
666 |
logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for xml are " + schemaList.toString());
|
|
476 | 667 |
|
477 | 668 |
return schemaList; |
478 | 669 |
} |
479 | 670 |
|
480 | 671 |
/** |
481 |
* Returns all the namespace for an xml document. This is done by getting |
|
482 |
* the internal namespace declaration (prefix) and looking for xmlns:<prefix> |
|
483 |
* |
|
672 |
* Returns the namespace for an xml document. |
|
484 | 673 |
* @param xml |
485 | 674 |
* the document to search |
486 |
* @return a string holding the namespace |
|
675 |
* @return a string holding the namespace. Null will be returned if there is no namespace. |
|
676 |
* @throws SAXException |
|
677 |
* @throws PropertyNotFoundException |
|
487 | 678 |
*/ |
488 |
public static String findDocumentNamespace(StringReader xml) throws IOException { |
|
679 |
public static String findDocumentNamespace(StringReader xml) throws IOException, PropertyNotFoundException, SAXException {
|
|
489 | 680 |
String namespace = null; |
490 | 681 |
|
491 |
String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE; |
|
682 |
/*String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE;
|
|
492 | 683 |
String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE; |
493 | 684 |
String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE; |
494 |
String eml2_1_1NameSpace = DocumentImpl.EML2_1_1NAMESPACE; |
|
685 |
String eml2_1_1NameSpace = DocumentImpl.EML2_1_1NAMESPACE;*/
|
|
495 | 686 |
|
496 | 687 |
|
497 | 688 |
if (xml == null) { |
498 | 689 |
logMetacat.debug("XMLSchemaService.findDocumentNamespace - XML doc is null. There is no namespace."); |
499 | 690 |
return namespace; |
500 | 691 |
} |
692 |
XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml); |
|
693 |
namespaceParser.parse(); |
|
694 |
namespace = namespaceParser.getNamespace(); |
|
695 |
/*String targetLine = getSchemaLine(xml); |
|
501 | 696 |
|
502 |
String targetLine = getSchemaLine(xml); |
|
503 |
|
|
504 | 697 |
// the prefix is at the beginning of the doc |
505 | 698 |
String prefix = null; |
506 |
String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}* ";
|
|
699 |
String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}*\\s+";
|
|
507 | 700 |
Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE); |
508 | 701 |
Matcher matcher = pattern.matcher(targetLine); |
509 | 702 |
if (matcher.find()) { |
... | ... | |
511 | 704 |
} |
512 | 705 |
|
513 | 706 |
// if a prefix was found, we are looking for xmlns:<prefix>="namespace" |
514 |
// if no prefix was found, we grab the first namespace.
|
|
707 |
// if no prefix was found, we will look for the default namespace.
|
|
515 | 708 |
String regex2; |
516 | 709 |
if (prefix != null) { |
517 |
regex2 = "xmlns:" + prefix + "=['\"](.*)['\"]"; |
|
710 |
logMetacat.debug("XMLSchemaService.findDocumentNamespace - found the prefix for the document "+prefix); |
|
711 |
regex2 = "xmlns:" + prefix + "=['\"]([^\"])*['\"]"; |
|
518 | 712 |
} else { |
519 |
regex2 = "xmlns:.*=['\"](.*)['\"]"; |
|
713 |
//regex2 = "xmlns:.*=['\"](.*)['\"]"; |
|
714 |
logMetacat.debug("XMLSchemaService.findDocumentNamespace - can't found the prefix for the document, so we look for the default namespace"); |
|
715 |
regex2 = "xmlns=['\"](.*)['\"]"; |
|
520 | 716 |
} |
521 | 717 |
Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE); |
522 | 718 |
Matcher matcher2 = pattern2.matcher(targetLine); |
523 | 719 |
if (matcher2.find()) { |
720 |
logMetacat.debug("XMLSchemaService.findDocumentNamespace - it has either a prefix or a default namespace"); |
|
721 |
System.out.println("the match group 0"+" is "+matcher2.group()); |
|
524 | 722 |
namespace = matcher2.group(1); |
723 |
|
|
724 |
System.out.println("the match group "+" is "+namespace); |
|
525 | 725 |
|
526 | 726 |
if (namespace.indexOf(eml2_0_0NameSpace) != -1) { |
527 | 727 |
namespace = eml2_0_0NameSpace; |
... | ... | |
532 | 732 |
} else if (namespace.indexOf(eml2_1_1NameSpace) != -1) { |
533 | 733 |
namespace = eml2_1_1NameSpace; |
534 | 734 |
} |
535 |
} |
|
536 |
|
|
735 |
}*/
|
|
736 |
logMetacat.debug("XMLSchemaService.findDocumentNamespace - the namespace (null means no namespace) in the document is "+namespace); |
|
537 | 737 |
return namespace; |
538 | 738 |
} |
739 |
|
|
740 |
/** |
|
741 |
* Get the attribute value of the noNamespaceSchemaLcation of the given xml |
|
742 |
* @param xml the xml obect needs to be searched |
|
743 |
* @return the attribute value of the noNamespaceSchemaLcation. The null will return if it can't be found. |
|
744 |
* @throws SAXException |
|
745 |
* @throws PropertyNotFoundException |
|
746 |
* @throws IOException |
|
747 |
*/ |
|
748 |
public static String findNoNamespaceSchemaLocationAttr(StringReader xml) throws PropertyNotFoundException, SAXException, IOException { |
|
749 |
String noNamespaceSchemaLocation = null; |
|
750 |
XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml); |
|
751 |
namespaceParser.parse(); |
|
752 |
noNamespaceSchemaLocation = namespaceParser.getNoNamespaceSchemaLocation(); |
|
753 |
logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocation - the noNamespaceSchemaLocation (null means no namespace) in the document is "+noNamespaceSchemaLocation); |
|
754 |
return noNamespaceSchemaLocation; |
|
755 |
} |
|
539 | 756 |
|
540 | 757 |
/** |
541 | 758 |
* Return the line from xml that holds the metadata like namespace and |
Also available in: Unified diff
Add the methods to figure out the schema location base on namespace, format id or no-namespace-schema-location uri.