1
|
package edu.ucsb.nceas.metacat;
|
2
|
|
3
|
import java.sql.Statement;
|
4
|
|
5
|
import org.apache.log4j.Logger;
|
6
|
|
7
|
import edu.ucsb.nceas.metacat.database.DBConnection;
|
8
|
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
|
9
|
import edu.ucsb.nceas.metacat.properties.PropertyService;
|
10
|
|
11
|
/**
|
12
|
* Before Metacat 1.8.1 release, Metacat uses the eml201 schema with the tag
|
13
|
* RELEASE_EML_2_0_1_UPDATE_5. Unfortunately, this tag points at wrong version
|
14
|
* of eml-resource.xsd. In this schema, the element "references" has an attribute named
|
15
|
* "system" and the attribute has a default value "document". Metacat will add
|
16
|
* the attribute system="document" to "references" element even the orginal eml didn't have it
|
17
|
* (this is another bug and see bug 1601), so this causes metacat generated some invalid eml201
|
18
|
* documents. This class provides a path to fix the existed invalid eml201 documents. It will
|
19
|
* remove the attribute system="document" of the element "references" in xml_nodes and xml_index
|
20
|
* tables.
|
21
|
* @author tao
|
22
|
*
|
23
|
*/
|
24
|
public class EML201DocumentCorrector
|
25
|
{
|
26
|
private Logger logMetacat = Logger.getLogger(EML201DocumentCorrector.class);
|
27
|
|
28
|
/**
|
29
|
* Default constructor
|
30
|
*
|
31
|
*/
|
32
|
public EML201DocumentCorrector()
|
33
|
{
|
34
|
|
35
|
}
|
36
|
|
37
|
/**
|
38
|
* It will remove the records - attribute system="document" of element "refrence"
|
39
|
* in both xml_nodes and xml_index table. Since xml_index has a foreign key (nodeid)which
|
40
|
* references nodeid in xml_nodes table, we should delete records in xml_index table first.
|
41
|
*/
|
42
|
public boolean run()
|
43
|
{
|
44
|
DBConnection dbconn = null;
|
45
|
boolean success = false;
|
46
|
int serialNumber = 0;
|
47
|
try
|
48
|
{
|
49
|
|
50
|
//checkout the dbconnection
|
51
|
dbconn = DBConnectionPool.getDBConnection("EML201DocumentCorrector.run");
|
52
|
serialNumber = dbconn.getCheckOutSerialNumber();
|
53
|
Statement deletingStatement = dbconn.createStatement();
|
54
|
|
55
|
// delete the records in xml_index table
|
56
|
String deletingIndex = generateXML_IndexDeletingSQL();
|
57
|
logMetacat.debug("EML201DocumentCorrector.run - deleting the records in xml_index table with sql: " + deletingIndex);
|
58
|
deletingStatement.execute(deletingIndex);
|
59
|
|
60
|
// delete the records in xml_nodes table
|
61
|
String deletingNode = generateXML_NodeDeletingSQL();
|
62
|
logMetacat.debug("EML201DocumentCorrector.run - deleting the records in xml_nodes table with sql: " + deletingNode);
|
63
|
deletingStatement.execute(deletingNode);
|
64
|
|
65
|
// delete the records in xml_nodes_revisions table
|
66
|
String deletingNodeRevision = generateXML_Node_RevisionsDeletingSQL();
|
67
|
logMetacat.debug("EML201DocumentCorrector.run - deleting the records in xml_nodes_revisions table with sql: " + deletingNodeRevision);
|
68
|
deletingStatement.execute(deletingNodeRevision);
|
69
|
|
70
|
//close statement and connection
|
71
|
deletingStatement.close();
|
72
|
//dbconn.close();
|
73
|
success = true;
|
74
|
}
|
75
|
catch (Exception ee)
|
76
|
{
|
77
|
logMetacat.error("EML201DocumentCorrector.run: "
|
78
|
+ ee.getMessage());
|
79
|
ee.printStackTrace();
|
80
|
}
|
81
|
finally
|
82
|
{
|
83
|
DBConnectionPool.returnDBConnection(dbconn, serialNumber);
|
84
|
} //finally
|
85
|
return success;
|
86
|
}
|
87
|
|
88
|
/*
|
89
|
* Generate the sql command to delete the records in xml_node table.
|
90
|
* Since it is leaf node, so we can just delete it without any other side-effect.
|
91
|
*/
|
92
|
private String generateXML_NodeDeletingSQL()
|
93
|
{
|
94
|
String sql ="delete from xml_nodes where nodetype='ATTRIBUTE' and nodename='system' "+
|
95
|
"and parentnodeid in (select nodeid from xml_nodes where nodetype='ELEMENT' and nodename='references') and docid in "+
|
96
|
"(select docid from xml_documents where doctype ='eml://ecoinformatics.org/eml-2.0.1')";
|
97
|
return sql;
|
98
|
}
|
99
|
|
100
|
/*
|
101
|
* Generate the sql command to delete the records in xml_node table.
|
102
|
* Since it is leaf node, so we can just delete it without any other side-effect.
|
103
|
*/
|
104
|
private String generateXML_Node_RevisionsDeletingSQL()
|
105
|
{
|
106
|
String sql ="delete from xml_nodes_revisions where nodetype='ATTRIBUTE' and nodename='system' "+
|
107
|
"and parentnodeid in (select nodeid from xml_nodes_revisions where nodetype='ELEMENT' and nodename='references') and docid in "+
|
108
|
"(select docid from xml_revisions where doctype ='eml://ecoinformatics.org/eml-2.0.1')";
|
109
|
return sql;
|
110
|
}
|
111
|
|
112
|
/*
|
113
|
* Generate the sql command to delete the records in xml_nidex table;
|
114
|
*/
|
115
|
private String generateXML_IndexDeletingSQL()
|
116
|
{
|
117
|
String sql ="delete from xml_index where doctype ='eml://ecoinformatics.org/eml-2.0.1' AND nodeid in "+
|
118
|
"(select nodeid from xml_index where path ='references/@system')";
|
119
|
return sql;
|
120
|
}
|
121
|
|
122
|
/**
|
123
|
* Runs the job to correct eml201 documents - deleting extral nodes in
|
124
|
* @param argus
|
125
|
* @throws Exception
|
126
|
*/
|
127
|
public static void main(String[] args) throws Exception
|
128
|
{
|
129
|
|
130
|
//initialize options and connection pool
|
131
|
PropertyService.getInstance(args[0]);
|
132
|
DBConnectionPool connPool = DBConnectionPool.getInstance();
|
133
|
|
134
|
// run the thread
|
135
|
EML201DocumentCorrector correct = new EML201DocumentCorrector();
|
136
|
//Thread thread = new Thread(correct);
|
137
|
//thread.start();
|
138
|
correct.run();
|
139
|
}
|
140
|
}
|