|
1 |
package edu.ucsb.nceas.metacat.admin.upgrade;
|
|
2 |
/**
|
|
3 |
* '$RCSfile$'
|
|
4 |
* Copyright: 2012 Regents of the University of California and the
|
|
5 |
* National Center for Ecological Analysis and Synthesis
|
|
6 |
* Authors: Matthew Jones
|
|
7 |
*
|
|
8 |
* '$Author$'
|
|
9 |
* '$Date$'
|
|
10 |
* '$Revision$'
|
|
11 |
*
|
|
12 |
* This program is free software; you can redistribute it and/or modify
|
|
13 |
* it under the terms of the GNU General Public License as published by
|
|
14 |
* the Free Software Foundation; either version 2 of the License, or
|
|
15 |
* (at your option) any later version.
|
|
16 |
*
|
|
17 |
* This program is distributed in the hope that it will be useful,
|
|
18 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
20 |
* GNU General Public License for more details.
|
|
21 |
*
|
|
22 |
* You should have received a copy of the GNU General Public License
|
|
23 |
* along with this program; if not, write to the Free Software
|
|
24 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25 |
*/
|
|
26 |
import java.io.IOException;
|
|
27 |
import java.sql.Connection;
|
|
28 |
import java.sql.Driver;
|
|
29 |
import java.sql.DriverManager;
|
|
30 |
import java.sql.PreparedStatement;
|
|
31 |
import java.sql.ResultSet;
|
|
32 |
import java.sql.SQLException;
|
|
33 |
import java.sql.Statement;
|
|
34 |
|
|
35 |
import org.apache.commons.logging.Log;
|
|
36 |
import org.apache.commons.logging.LogFactory;
|
|
37 |
|
|
38 |
import edu.ucsb.nceas.metacat.admin.AdminException;
|
|
39 |
import edu.ucsb.nceas.metacat.properties.PropertyService;
|
|
40 |
import edu.ucsb.nceas.metacat.shared.ServiceException;
|
|
41 |
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
|
|
42 |
import edu.ucsb.nceas.utilities.SortedProperties;
|
|
43 |
|
|
44 |
/**
|
|
45 |
* Optionally upgrade all of the existing identifiers in the tables to be official
|
|
46 |
* DOIs that are assigned by a DOI registration authority. This class first checks
|
|
47 |
* if GUIDs should be converted to DOIs, and if so, updates all relevant
|
|
48 |
* tables and resources in Metacat, and then registers each of the newly minted
|
|
49 |
* identifiers with the EZID service. You must have an EZID account for this to
|
|
50 |
* work properly.
|
|
51 |
*
|
|
52 |
* Whether or not identifiers should be converted is determined by several metacat.properties.
|
|
53 |
* First, only upgrade if 'guid.assignGUIDs' is true. Next, for each server registered in
|
|
54 |
* the xml_replication table, only convert the documents belonging to that server if
|
|
55 |
* there is a corresponding property that gives the 'shoulder' or 'prefix'' to be used to
|
|
56 |
* convert existing IDs to GUIDs. For example, if servers 1 & 2 are registered in
|
|
57 |
* xml_replication, then only convert the IDs for server 1 to GUIDs if the
|
|
58 |
* property 'guid.ezid.doishoulder.1' defines a valid doi shoulder.
|
|
59 |
*
|
|
60 |
* @author jones
|
|
61 |
*/
|
|
62 |
public class GenerateGlobalIdentifiers implements UpgradeUtilityInterface {
|
|
63 |
|
|
64 |
private static Log log = LogFactory.getLog(GenerateGlobalIdentifiers.class);
|
|
65 |
private String driver = null;
|
|
66 |
private String url = null;
|
|
67 |
private String user = null;
|
|
68 |
private String password = null;
|
|
69 |
private String eziduser = null;
|
|
70 |
private String ezidPassword = null;
|
|
71 |
|
|
72 |
/**
|
|
73 |
* The main upgrade() procedure, which first upgrades identifiers in the database,
|
|
74 |
* then registers these with EZID.
|
|
75 |
*/
|
|
76 |
public boolean upgrade() throws AdminException {
|
|
77 |
boolean success = false;
|
|
78 |
log.debug("Upgrading identifiers to DOIs");
|
|
79 |
|
|
80 |
// Check if assignDOIs is true, and if EZID is configured
|
|
81 |
boolean shouldUpgrade = false;
|
|
82 |
try {
|
|
83 |
shouldUpgrade = new Boolean(PropertyService.getProperty("guid.assignGUIDs")).booleanValue();
|
|
84 |
if (shouldUpgrade) {
|
|
85 |
log.debug("Upgrading identifiers to DOIs");
|
|
86 |
|
|
87 |
success = updateIdentifierReferences();
|
|
88 |
|
|
89 |
// TODO: Query for local GUID identifiers & metadata, and for each
|
|
90 |
// register with EZID; in a separate thread; including relevant metadata
|
|
91 |
}
|
|
92 |
|
|
93 |
} catch (PropertyNotFoundException e) {
|
|
94 |
// When the guid properties are not found, do nothing
|
|
95 |
shouldUpgrade = false;
|
|
96 |
success = true;
|
|
97 |
}
|
|
98 |
|
|
99 |
return success;
|
|
100 |
}
|
|
101 |
|
|
102 |
/**
|
|
103 |
* Loop through the servers registered in this instance of Metacat, and for
|
|
104 |
* each server determine if we should convert to GUIDs for the documents associated
|
|
105 |
* with that server. If so, convert the identifier table to use a GUID for all
|
|
106 |
* documents for that server found in xml_documents and xml_replication.
|
|
107 |
* @return true if all conversions succeed
|
|
108 |
* @throws AdminException
|
|
109 |
*/
|
|
110 |
private boolean updateIdentifierReferences() throws AdminException {
|
|
111 |
|
|
112 |
log.debug("Updating identifier references...");
|
|
113 |
|
|
114 |
try {
|
|
115 |
driver = PropertyService.getProperty("database.driver");
|
|
116 |
url = PropertyService.getProperty("database.connectionURI");
|
|
117 |
user = PropertyService.getProperty("database.user");
|
|
118 |
password = PropertyService.getProperty("database.password");
|
|
119 |
eziduser = PropertyService.getProperty("guid.ezid.username");
|
|
120 |
ezidPassword = PropertyService.getProperty("guid.ezid.password");
|
|
121 |
|
|
122 |
} catch (PropertyNotFoundException e) {
|
|
123 |
String msg = "PropertyNotFound while trying to convert identifiers: " + e.getMessage();
|
|
124 |
log.error(msg, e);
|
|
125 |
throw new AdminException(msg);
|
|
126 |
}
|
|
127 |
|
|
128 |
// Create a connection, and start a transaction for which FK constraint checking is deferred
|
|
129 |
Connection con = openDatabaseConnection();
|
|
130 |
deferConstraints(con);
|
|
131 |
// Copy the identifier.guid column for later use
|
|
132 |
createDuplicateGUIDColumn(con);
|
|
133 |
|
|
134 |
// Look up the maximum server number from the database
|
|
135 |
int maxServerId = lookupMaxServers(con);
|
|
136 |
|
|
137 |
// Loop across each replica server, checking if it is to have DOIs assigned
|
|
138 |
// This is determined by the presence of a shoulder in the configuration file for
|
|
139 |
// that server number
|
|
140 |
for (int server = 1; server <= maxServerId; server++) {
|
|
141 |
|
|
142 |
// Get the server's shoulder, or skip if not found
|
|
143 |
try {
|
|
144 |
String shoulder = PropertyService.getProperty("guid.ezid.doishoulder." + server);
|
|
145 |
log.debug("Processing shoulder for server " + server + ": " + shoulder);
|
|
146 |
|
|
147 |
// Update identifiers table (but only for local documents, not replicated documents)
|
|
148 |
convertIdentifierTable(con, server, shoulder);
|
|
149 |
|
|
150 |
} catch (PropertyNotFoundException e) {
|
|
151 |
// The shoulder was not found for this server, so we just skip it
|
|
152 |
log.debug("No shoulder found: " + e.getMessage());
|
|
153 |
}
|
|
154 |
}
|
|
155 |
|
|
156 |
// Convert other tables once, after the identifier table conversion has established the new
|
|
157 |
// global identifiers to be used
|
|
158 |
// This incudes: xml_access table, including accessfileid field, systemmetadata,
|
|
159 |
// systemmetadatareplicationpolicy,and systemmetadatareplicationstatus tables
|
|
160 |
convertIdentifierField(con, "xml_access", "guid");
|
|
161 |
convertIdentifierField(con, "xml_access", "accessfileid");
|
|
162 |
convertIdentifierField(con, "systemmetadata", "guid");
|
|
163 |
convertIdentifierField(con, "systemmetadatareplicationpolicy", "guid");
|
|
164 |
convertIdentifierField(con, "systemmetadatareplicationstatus", "guid");
|
|
165 |
|
|
166 |
// Now clean up the temporary column we created, but note this makes it much
|
|
167 |
// harder to revert our changes
|
|
168 |
dropDuplicateGUIDColumn(con);
|
|
169 |
|
|
170 |
commitAndCloseConnection(con);
|
|
171 |
|
|
172 |
/*
|
|
173 |
// The following SQL commands will revert the changes made by this script as long as the identifier.old_guid column is intact
|
|
174 |
start transaction;
|
|
175 |
update systemmetadatareplicationstatus set guid = id.old_guid FROM identifier id WHERE systemmetadatareplicationstatus.guid = id.guid;
|
|
176 |
update systemmetadatareplicationpolicy set guid = id.old_guid FROM identifier id WHERE systemmetadatareplicationpolicy.guid = id.guid;
|
|
177 |
update systemmetadata set guid = id.old_guid FROM identifier id WHERE systemmetadata.guid = id.guid;
|
|
178 |
update xml_access set accessfileid = id.old_guid FROM identifier id WHERE xml_access.accessfileid = id.guid;
|
|
179 |
update xml_access set guid = id.old_guid FROM identifier id WHERE xml_access.guid = id.guid;
|
|
180 |
update identifier set guid = old_guid;
|
|
181 |
alter table identifier drop column old_guid;
|
|
182 |
commit;
|
|
183 |
*/
|
|
184 |
|
|
185 |
return true;
|
|
186 |
}
|
|
187 |
|
|
188 |
/**
|
|
189 |
* Open a JDBC connection that will be used for all of the subsequent methods that
|
|
190 |
* access or modify the database. All changes are done in one transaction, so need
|
|
191 |
* to be done using a single connection.
|
|
192 |
* @return the Connection created
|
|
193 |
* @throws AdminException
|
|
194 |
*/
|
|
195 |
private Connection openDatabaseConnection() throws AdminException {
|
|
196 |
Connection con = null;
|
|
197 |
try {
|
|
198 |
Driver d = (Driver) Class.forName(driver).newInstance();
|
|
199 |
DriverManager.registerDriver(d);
|
|
200 |
con = DriverManager.getConnection(url, user, password);
|
|
201 |
} catch (InstantiationException e) {
|
|
202 |
String msg = "InstantiationException updating creating DB connection: " + e.getMessage();
|
|
203 |
log.error(msg, e);
|
|
204 |
throw new AdminException(msg);
|
|
205 |
} catch (IllegalAccessException e) {
|
|
206 |
String msg = "IllegalAccessException updating creating DB connection: " + e.getMessage();
|
|
207 |
log.error(msg, e);
|
|
208 |
throw new AdminException(msg);
|
|
209 |
} catch (ClassNotFoundException e) {
|
|
210 |
String msg = "ClassNotFoundException updating creating DB connection: " + e.getMessage();
|
|
211 |
log.error(msg, e);
|
|
212 |
throw new AdminException(msg);
|
|
213 |
} catch (SQLException e) {
|
|
214 |
String msg = "SQLException updating creating DB connection: " + e.getMessage();
|
|
215 |
log.error(msg, e);
|
|
216 |
throw new AdminException(msg);
|
|
217 |
}
|
|
218 |
|
|
219 |
return con;
|
|
220 |
}
|
|
221 |
|
|
222 |
/**
|
|
223 |
* Start a transaction on the given connection, and defer all constraints so
|
|
224 |
* that changes to foreign keys will not be checked until after all of the
|
|
225 |
* identifiers have been updated in all tables. Without this, we would get
|
|
226 |
* FK constraint violations as we changed the tables.
|
|
227 |
* @param con the connection to the database
|
|
228 |
* @throws AdminException
|
|
229 |
*/
|
|
230 |
private void deferConstraints(Connection con) throws AdminException {
|
|
231 |
|
|
232 |
try {
|
|
233 |
con.setAutoCommit(false);
|
|
234 |
Statement sqlStatement = con.createStatement();
|
|
235 |
boolean hasResults = sqlStatement.execute("SET CONSTRAINTS ALL DEFERRED;");
|
|
236 |
log.debug("Constraints Deferred.");
|
|
237 |
} catch (SQLException e) {
|
|
238 |
String msg = "SQLException while disabling constraints: " + e.getMessage();
|
|
239 |
log.error(msg, e);
|
|
240 |
throw new AdminException(msg);
|
|
241 |
}
|
|
242 |
}
|
|
243 |
|
|
244 |
/**
|
|
245 |
* Temporarily save the current identifier.guid in a backup column (old_guid), to be used
|
|
246 |
* for updating additional tables.
|
|
247 |
* @param con the connection to the database with an open transaction
|
|
248 |
* @throws AdminException
|
|
249 |
*/
|
|
250 |
private void createDuplicateGUIDColumn(Connection con) throws AdminException {
|
|
251 |
try {
|
|
252 |
Statement sqlStatement = con.createStatement();
|
|
253 |
boolean hasResults = sqlStatement.execute("ALTER TABLE identifier ADD COLUMN old_guid TEXT;");
|
|
254 |
log.debug("old_guid column added.");
|
|
255 |
sqlStatement = con.createStatement();
|
|
256 |
hasResults = sqlStatement.execute("UPDATE identifier SET old_guid = guid;");
|
|
257 |
log.debug("Copied original identifiers to old_guid column.");
|
|
258 |
} catch (SQLException e) {
|
|
259 |
String msg = "SQLException while duplicating GUID column: " + e.getMessage();
|
|
260 |
log.error(msg, e);
|
|
261 |
throw new AdminException(msg);
|
|
262 |
}
|
|
263 |
}
|
|
264 |
|
|
265 |
/**
|
|
266 |
* Drop the temporary GUID backup column (old_guid) once upgrading identifiers
|
|
267 |
* is complete.
|
|
268 |
* @param con the connection to the database with an open transaction
|
|
269 |
* @throws AdminException
|
|
270 |
*/
|
|
271 |
private void dropDuplicateGUIDColumn(Connection con) throws AdminException {
|
|
272 |
try {
|
|
273 |
Statement sqlStatement = con.createStatement();
|
|
274 |
boolean hasResults = sqlStatement.execute("ALTER TABLE identifier DROP COLUMN old_guid;");
|
|
275 |
log.debug("old_guid column dropped.");
|
|
276 |
} catch (SQLException e) {
|
|
277 |
String msg = "SQLException dropping GUID column: " + e.getMessage();
|
|
278 |
log.error(msg, e);
|
|
279 |
throw new AdminException(msg);
|
|
280 |
}
|
|
281 |
}
|
|
282 |
|
|
283 |
/**
|
|
284 |
* Query the database to determine which servers are registered and can be
|
|
285 |
* inspected for potential identifier upgrades.
|
|
286 |
* @param con the connection to the database with an open transaction
|
|
287 |
* @throws AdminException
|
|
288 |
*/
|
|
289 |
private int lookupMaxServers(Connection con) throws AdminException {
|
|
290 |
int maxServers = 0;
|
|
291 |
try {
|
|
292 |
Statement sqlStatement = con.createStatement();
|
|
293 |
boolean hasResults = sqlStatement.execute("SELECT max(serverid) AS maxserverid FROM xml_replication;");
|
|
294 |
if (hasResults) {
|
|
295 |
ResultSet rs = sqlStatement.getResultSet();
|
|
296 |
if (rs.next()) {
|
|
297 |
maxServers = rs.getInt(1);
|
|
298 |
log.debug("MaxServerID: " + maxServers);
|
|
299 |
} else {
|
|
300 |
String msg = "Could not determine max serverid; database query cursor had zero rows.";
|
|
301 |
log.error(msg);
|
|
302 |
throw new AdminException(msg);
|
|
303 |
}
|
|
304 |
} else {
|
|
305 |
String msg = "Could not determine max serverid; database query failed to return results.";
|
|
306 |
log.error(msg);
|
|
307 |
throw new AdminException(msg);
|
|
308 |
}
|
|
309 |
|
|
310 |
} catch (SQLException e) {
|
|
311 |
String msg = "SQLException while looking up serverId: " + e.getMessage();
|
|
312 |
log.error(msg, e);
|
|
313 |
throw new AdminException(msg);
|
|
314 |
}
|
|
315 |
return maxServers;
|
|
316 |
}
|
|
317 |
|
|
318 |
/**
|
|
319 |
* Convert identifiers to DOI GUIDs in the identifier table for any documents found in
|
|
320 |
* xml_documents or xml_revisions that reside on a server for which a DOI shoulder prefix
|
|
321 |
* has been assigned.
|
|
322 |
*
|
|
323 |
* @param con the connection to the database with an open transaction
|
|
324 |
* @param server the id of the server whose documents are to be converted
|
|
325 |
* @param shoulder the DOI shoulder to be used for converting
|
|
326 |
* @return true if conversion is successful
|
|
327 |
* @throws AdminException
|
|
328 |
*/
|
|
329 |
private boolean convertIdentifierTable(Connection con, int server, String shoulder) throws AdminException {
|
|
330 |
PreparedStatement pstmt = null;
|
|
331 |
try {
|
|
332 |
|
|
333 |
String sql_1 =
|
|
334 |
"UPDATE identifier " +
|
|
335 |
"SET guid = p.doi " +
|
|
336 |
"FROM (" +
|
|
337 |
"SELECT guid, ? || guid as doi " +
|
|
338 |
"FROM identifier id2, ";
|
|
339 |
String sql_2 =
|
|
340 |
" xd " +
|
|
341 |
"WHERE id2.docid = xd.docid AND id2.rev = xd.rev " +
|
|
342 |
"AND xd.server_location = ? " +
|
|
343 |
") p " +
|
|
344 |
"WHERE identifier.guid = p.guid;";
|
|
345 |
|
|
346 |
// Convert identifiers found in xml_documents
|
|
347 |
pstmt = con.prepareStatement(sql_1 + "xml_documents" + sql_2);
|
|
348 |
pstmt.setString(1, shoulder);
|
|
349 |
pstmt.setInt(2, server);
|
|
350 |
pstmt.execute();
|
|
351 |
|
|
352 |
// Convert identifiers found in xml_revisions
|
|
353 |
pstmt = con.prepareStatement(sql_1 + "xml_revisions" + sql_2);
|
|
354 |
pstmt.setString(1, shoulder);
|
|
355 |
pstmt.setInt(2, server);
|
|
356 |
pstmt.execute();
|
|
357 |
|
|
358 |
log.debug("Finished shoulder for server " + server + ": " + shoulder);
|
|
359 |
|
|
360 |
} catch (SQLException e) {
|
|
361 |
String msg = "SQLException updating identifier table: " + e.getMessage();
|
|
362 |
log.error(msg, e);
|
|
363 |
throw new AdminException(msg);
|
|
364 |
}
|
|
365 |
|
|
366 |
return true;
|
|
367 |
}
|
|
368 |
|
|
369 |
/**
|
|
370 |
* Convert the given field in the given table to use the new identifier.guid that
|
|
371 |
* has been assigned.
|
|
372 |
*
|
|
373 |
* @param con the connection to the database with an open transaction
|
|
374 |
* @param table the name of the table with identifiers to be converted
|
|
375 |
* @param field the name of the field containing identifiers to be converted
|
|
376 |
* @return true if conversion was successful
|
|
377 |
* @throws AdminException
|
|
378 |
*/
|
|
379 |
private boolean convertIdentifierField(Connection con, String table, String field) throws AdminException {
|
|
380 |
PreparedStatement pstmt = null;
|
|
381 |
try {
|
|
382 |
|
|
383 |
String sql_1 =
|
|
384 |
"UPDATE " + table + " " +
|
|
385 |
"SET " + field + " = id.guid " +
|
|
386 |
"FROM identifier id " +
|
|
387 |
"WHERE " + table + "."+ field + " = id.old_guid;";
|
|
388 |
|
|
389 |
// Convert identifiers found in the given table and field
|
|
390 |
pstmt = con.prepareStatement(sql_1);
|
|
391 |
pstmt.execute();
|
|
392 |
|
|
393 |
log.debug("Finished converting table " + table + " (" + field +")" );
|
|
394 |
|
|
395 |
} catch (SQLException e) {
|
|
396 |
String msg = "SQLException while converting identifier field: " + e.getMessage();
|
|
397 |
log.error(msg, e);
|
|
398 |
throw new AdminException(msg);
|
|
399 |
}
|
|
400 |
|
|
401 |
return true;
|
|
402 |
}
|
|
403 |
|
|
404 |
/**
|
|
405 |
* Commit the current transaction on the connection, and close the connection.
|
|
406 |
* @param con the connection to the database with an open transaction
|
|
407 |
* @return true if the connection is successfully closed
|
|
408 |
* @throws AdminException
|
|
409 |
*/
|
|
410 |
private boolean commitAndCloseConnection(Connection con) throws AdminException {
|
|
411 |
try {
|
|
412 |
|
|
413 |
log.debug("Committing and closing connection.");
|
|
414 |
con.commit();
|
|
415 |
con.close();
|
|
416 |
} catch (SQLException e) {
|
|
417 |
String msg = "SQLException while commiting and closing connection: " + e.getMessage();
|
|
418 |
log.error(msg, e);
|
|
419 |
throw new AdminException(msg);
|
|
420 |
}
|
|
421 |
|
|
422 |
return true;
|
|
423 |
}
|
|
424 |
|
|
425 |
/**
|
|
426 |
* Main method, solely for testing. Not used in ordinary operation.
|
|
427 |
*/
|
|
428 |
public static void main(String [] ags){
|
|
429 |
|
|
430 |
try {
|
|
431 |
// set up the properties based on the test/deployed configuration of the workspace
|
|
432 |
SortedProperties testProperties =
|
|
433 |
new SortedProperties("test/test.properties");
|
|
434 |
testProperties.load();
|
|
435 |
String metacatContextDir = testProperties.getProperty("metacat.contextDir");
|
|
436 |
PropertyService.getInstance(metacatContextDir + "/WEB-INF");
|
|
437 |
// now run it
|
|
438 |
GenerateGlobalIdentifiers upgrader = new GenerateGlobalIdentifiers();
|
|
439 |
upgrader.upgrade();
|
|
440 |
} catch (IOException e) {
|
|
441 |
e.printStackTrace();
|
|
442 |
} catch (PropertyNotFoundException e) {
|
|
443 |
e.printStackTrace();
|
|
444 |
} catch (ServiceException e) {
|
|
445 |
e.printStackTrace();
|
|
446 |
} catch (AdminException e) {
|
|
447 |
e.printStackTrace();
|
|
448 |
}
|
|
449 |
}
|
|
450 |
}
|
Added DOI generation to the 2.0.0 upgrade process. To succeed, this script must be run on a fresh 2.0.0 database, or on a 1.9.5 version database, as those are the only ways to get the needed foreign keys to be marked as deferrable. The identifier conversion must be turned on by setting correct properties in metacat.properties. See the comments in GenerateGlobalIdentifiers for details. By default, conversion is set to false in the properties file. If you want to convert an instance to use DOIs, be sure to set metacat.properties up BEFORE running through the Metacat configuration and database upgrade.