Project

General

Profile

1
package edu.ucsb.nceas.metacat;
2

    
3
import java.io.File;
4
import java.sql.Statement;
5

    
6
import org.apache.log4j.Logger;
7

    
8
import edu.ucsb.nceas.utilities.Options;
9

    
10
/**
11
 * Before Metacat 1.8.1 release, Metacat uses the eml201 schema with the tag
12
 * RELEASE_EML_2_0_1_UPDATE_5. Unfortunately, this tag points at wrong version
13
 * of eml-resource.xsd. In this schema, the element "references" has an attribute named
14
 * "system" and the attribute has a default value "document". Metacat will add 
15
 * the attribute system="document" to "references" element even the orginal eml didn't have it
16
 * (this is another bug and see bug 1601), so this causes metacat generated some invalid eml201
17
 * documents. This class provides a path to fix the existed invalid eml201 documents. It will
18
 * remove the attribute system="document" of the element "references" in xml_nodes and xml_index
19
 * tables. 
20
 * @author tao
21
 *
22
 */
23
public class EML201DocumentCorrector  
24
{
25
	private Logger logMetacat = Logger.getLogger(EML201DocumentCorrector.class);
26
	
27
	/**
28
	 * Default constructor
29
	 *
30
	 */
31
     public EML201DocumentCorrector()
32
     {
33
    	 
34
     }
35
     
36
     /**
37
      *  It will remove the records - attribute system="document" of element "refrence"
38
      *  in both xml_nodes and xml_index table. Since xml_index has a foreign key (nodeid)which
39
      *  references nodeid in xml_nodes table, we should delete records in xml_index table first.
40
      */
41
     public boolean run()
42
     {
43
    	 DBConnection dbconn = null;
44
    	 boolean success = false;
45
    	 int serialNumber = 0;
46
    	   try
47
    	      {
48

    
49
    	           //checkout the dbconnection
50
    	          dbconn = DBConnectionPool.getDBConnection("EML201DocumentCorrector.run");
51
    	          serialNumber = dbconn.getCheckOutSerialNumber();
52
    	          Statement deletingStatement = dbconn.createStatement();
53
    	         
54
    	          // delete the records in xml_index table 
55
    	          String deletingIndex = generateXML_IndexDeletingSQL();
56
    	          deletingStatement.execute(deletingIndex);
57
    	          
58
    	          // delete the records in xml_nodes table
59
    	          String deletingNode = generateXML_NodeDeletingSQL();
60
    	          deletingStatement.execute(deletingNode);
61
    	          
62
    	          // delete the records in xml_nodes_revisions table
63
    	          String deletingNodeRevision = generateXML_Node_RevisionsDeletingSQL();
64
    	          deletingStatement.execute(deletingNodeRevision);
65
    	          
66
    	          //close statement and connection
67
    	          deletingStatement.close();
68
    	          //dbconn.close();
69
    	          success = true;
70
    	      }
71
    	        catch (Exception ee)
72
    	        {
73
    	          logMetacat.error("EML201DocumentCorrector.run: "
74
    	                                   + ee.getMessage());
75
    	          ee.printStackTrace();
76
    	        }
77
    	        finally
78
    	        {
79
    	          DBConnectionPool.returnDBConnection(dbconn, serialNumber);
80
    	        } //finally
81
    	        return success;
82
     }
83
     
84
     /*
85
      * Generate the sql command to delete the records in xml_node table.
86
      * Since it is leaf node, so we can just delete it without any other side-effect.
87
      */
88
     private String generateXML_NodeDeletingSQL()
89
     {
90
    	 String sql ="delete from xml_nodes where nodetype='ATTRIBUTE' and nodename='system' and nodedata='document' "+
91
    	                     "and parentnodeid in (select nodeid from xml_nodes where  nodetype='ELEMENT' and nodename='references') and docid in "+
92
    	                     "(select docid from xml_documents where doctype ='eml://ecoinformatics.org/eml-2.0.1')";
93
    	 return sql;
94
     }
95
     
96
     /*
97
      * Generate the sql command to delete the records in xml_node table.
98
      * Since it is leaf node, so we can just delete it without any other side-effect.
99
      */
100
     private String generateXML_Node_RevisionsDeletingSQL()
101
     {
102
    	 String sql ="delete from xml_nodes_revisions where nodetype='ATTRIBUTE' and nodename='system' and nodedata='document' "+
103
    	                     "and parentnodeid in (select nodeid from xml_nodes where  nodetype='ELEMENT' and nodename='references') and docid in "+
104
    	                     "(select docid from xml_documents where doctype ='eml://ecoinformatics.org/eml-2.0.1')";
105
    	 return sql;
106
     }
107
     
108
     /*
109
      * Generate the sql command to delete the records in xml_nidex table;
110
      */
111
     private String generateXML_IndexDeletingSQL()
112
     {
113
    	 String sql ="delete from xml_index where doctype ='eml://ecoinformatics.org/eml-2.0.1' AND nodeid in "+ 
114
    	 "(select nodeid from xml_index where path ='references/@system')";
115
    	 return sql;
116
     }
117
     
118
     /**
119
      *  Runs the job to correct eml201 documents - deleting extral nodes in
120
      * @param argus
121
      * @throws Exception
122
      */
123
     public static void main(String[] argus) throws Exception
124
     {
125
    	 
126
    	 //initialize options and connection pool
127
    	 Options.initialize(new File("build/metacat.properties"));
128
    	 DBConnectionPool connPool = DBConnectionPool.getInstance();
129
    	 
130
    	 
131
    	 // run the thread
132
    	 EML201DocumentCorrector correct = new EML201DocumentCorrector();
133
    	 //Thread thread = new Thread(correct);
134
    	 //thread.start();
135
    	 correct.run();
136
     }
137
}
(32-32/66)