Project

General

Profile

« Previous | Next » 

Revision 5378

Added by berkley over 14 years ago

working on getting systemmetadata creation working for legacy knb objects

View differences:

test/edu/ucsb/nceas/metacat/dataone/CrudServiceTest.java
108 108
	    try
109 109
	    {
110 110
	        CrudService cs = CrudService.getInstance();
111
	        cs.generateMissingSystemMetadata();
111
	        AuthToken token = getToken();
112
	        cs.generateMissingSystemMetadata(token);
112 113
	    }
113 114
	    catch(Exception e)
114 115
	    {
src/edu/ucsb/nceas/metacat/dataone/CrudService.java
30 30
import java.io.InputStream;
31 31
import java.io.OutputStream;
32 32
import java.io.PrintWriter;
33
import java.io.StringBufferInputStream;
34
import java.security.MessageDigest;
33 35
import java.sql.SQLException;
34 36
import java.util.*;
35 37
import java.text.DateFormat;
......
206 208
     * exempt.  This is a utility method for migration of existing object 
207 209
     * stores to DataONE where SystemMetadata is required for all objects.  See 
208 210
     * https://trac.dataone.org/ticket/591
211
     * 
212
     * @param token an authtoken with appropriate permissions to read all 
213
     * documents in the object store.  To work correctly, this should probably
214
     * be an adminstrative credential.
209 215
     */
210
    public void generateMissingSystemMetadata()
216
    public void generateMissingSystemMetadata(AuthToken token)
211 217
    {
218
        IdentifierManager im = IdentifierManager.getInstance();
212 219
        //get the list of ids with no SM
213
        List<String> l = IdentifierManager.getInstance().getLocalIdsWithNoSystemMetadata();
220
        List<String> l = im.getLocalIdsWithNoSystemMetadata();
214 221
        for(int i=0; i<l.size(); i++)
215 222
        { //for each id, add a system metadata doc
216
            String id = l.get(i);
217
            System.out.println("Creating SystemMetadata for localId " + id);
223
            String localId = l.get(i);
224
            System.out.println("Creating SystemMetadata for localId " + localId);
225
            //get the document
226
            try
227
            {
228
                //generate required system metadata fields from the document
229
                SystemMetadata sm = createSystemMetadata(localId, token);
230
                System.out.println("sm: " + sm.toString());
231
                //insert the systemmetadata object
232
                //associate the systemmetadata with the original id
233
            }
234
            catch(Exception e)
235
            {
236
                System.out.println("Exception generating missing system metadata: " + e.getMessage());
237
                logMetacat.error("Could not generate missing system metadata: " + e.getMessage());
238
            }
218 239
        }
219 240
    }
220 241
    
242
    /**
243
     * create system metadata with a specified id, doc and format
244
     */
245
    private SystemMetadata createSystemMetadata(String localId, AuthToken token)
246
      throws Exception
247
    {
248
        IdentifierManager im = IdentifierManager.getInstance();
249
        Hashtable<String, String> docInfo = im.getDocumentInfo(localId);
250
        
251
        //get the document text
252
        int rev = im.getLatestRevForLocalId(localId);
253
        Identifier identifier = new Identifier();
254
        identifier.setValue(im.getGUID(localId, rev));
255
        InputStream is = this.get(token, identifier);
256
        
257
        SystemMetadata sm = new SystemMetadata();
258
        //set the id
259
        sm.setIdentifier(identifier);
260
        
261
        //set the object format
262
        ObjectFormat format = ObjectFormat.convert(docInfo.get("doctype"));
263
        sm.setObjectFormat(format);
264
        
265
        //create the checksum
266
        String checksumS = checksum(is);
267
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
268
        Checksum checksum = new Checksum();
269
        checksum.setValue(checksumS);
270
        checksum.setAlgorithm(ca);
271
        sm.setChecksum(checksum);
272
        
273
        //set the size
274
        is = this.get(token, identifier);
275
        sm.setSize(sizeOfStream(is));
276
        
277
        //submitter
278
        Principal p = new Principal();
279
        p.setValue(docInfo.get("user_owner"));
280
        sm.setSubmitter(p);
281
        sm.setRightsHolder(p);
282
        try
283
        {
284
            Date dateCreated = parseMetacatDate(docInfo.get("date_created"));
285
            sm.setDateUploaded(dateCreated);
286
            Date dateUpdated = parseMetacatDate(docInfo.get("date_updated"));
287
            sm.setDateSysMetadataModified(dateUpdated);
288
        }
289
        catch(Exception e)
290
        {
291
            System.out.println("couldn't parse a date: " + e.getMessage());
292
            Date dateCreated = new Date();
293
            sm.setDateUploaded(dateCreated);
294
            Date dateUpdated = new Date();
295
            sm.setDateSysMetadataModified(dateUpdated);
296
        }
297
        NodeReference nr = new NodeReference();
298
        nr.setValue("metacat");
299
        sm.setOriginMemberNode(nr);
300
        sm.setAuthoritativeMemberNode(nr);
301
        return sm;
302
    }
303
    
221 304
    public Identifier create(AuthToken token, Identifier guid, 
222 305
            InputStream object, SystemMetadata sysmeta) throws InvalidToken, 
223 306
            ServiceFailure, NotAuthorized, IdentifierNotUnique, UnsupportedType, 
......
1068 1151
            throw new ServiceFailure("1190", "Failed to serialize and insert SystemMetadata: " + e.getMessage());
1069 1152
        }    
1070 1153
    }
1154
    
1155
    /**
1156
     * produce an md5 checksum for item
1157
     */
1158
    private String checksum(InputStream is)
1159
      throws Exception
1160
    {        
1161
        byte[] buffer = new byte[1024];
1162
        MessageDigest complete = MessageDigest.getInstance("MD5");
1163
        int numRead;
1164
        
1165
        do 
1166
        {
1167
          numRead = is.read(buffer);
1168
          if (numRead > 0) 
1169
          {
1170
            complete.update(buffer, 0, numRead);
1171
          }
1172
        } while (numRead != -1);
1173
        
1174
        
1175
        return getHex(complete.digest());
1176
    }
1177
    
1178
    /**
1179
     * convert a byte array to a hex string
1180
     */
1181
    private static String getHex( byte [] raw ) 
1182
    {
1183
        final String HEXES = "0123456789ABCDEF";
1184
        if ( raw == null ) {
1185
          return null;
1186
        }
1187
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
1188
        for ( final byte b : raw ) {
1189
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
1190
             .append(HEXES.charAt((b & 0x0F)));
1191
        }
1192
        return hex.toString();
1193
    }
1194
    
1195
    /**
1196
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
1197
     * a proper date object
1198
     * @param date
1199
     * @return
1200
     */
1201
    private Date parseMetacatDate(String date)
1202
    {
1203
        String year = date.substring(0, 4);
1204
        String month = date.substring(5, 7);
1205
        String day = date.substring(8, 10);
1206
        Calendar c = Calendar.getInstance();
1207
        c.set(new Integer(year).intValue(), 
1208
              new Integer(month).intValue(), 
1209
              new Integer(day).intValue());
1210
        return c.getTime();
1211
    }
1212
    
1213
    /**
1214
     * find the size (in bytes) of a stream
1215
     * @param is
1216
     * @return
1217
     * @throws IOException
1218
     */
1219
    private long sizeOfStream(InputStream is)
1220
        throws IOException
1221
    {
1222
        long size = 0;
1223
        byte[] b = new byte[1024];
1224
        int numread = is.read(b, 0, 1024);
1225
        while(numread != -1)
1226
        {
1227
            size += numread;
1228
            numread = is.read(b, 0, 1024);
1229
        }
1230
        return size;
1231
    }
1071 1232
}
src/edu/ucsb/nceas/metacat/IdentifierManager.java
80 80
    }
81 81
    
82 82
    /**
83
     * return information on the document with localId.  These are the fields
84
     * from the xml_documents table.  They can be used to contstruct metadata 
85
     * about the object that is stored.
86
     * @param localId
87
     * @return
88
     * @throws McdbDocNotFoundException
89
     */
90
    public Hashtable<String, String> getDocumentInfo(String localId)
91
        throws McdbDocNotFoundException
92
    {
93
        Hashtable<String, String> h = new Hashtable<String, String>();
94
        String sql = "select docname, doctype, user_owner, user_updated, " +
95
            "server_location, rev, date_created, date_updated from " + 
96
            "xml_documents where docid like '" + localId + "'";
97
        DBConnection dbConn = null;
98
        int serialNumber = -1;
99
        try 
100
        {
101
            // Get a database connection from the pool
102
            dbConn = DBConnectionPool.getDBConnection("IdentifierManager.getDocumentInfo");
103
            serialNumber = dbConn.getCheckOutSerialNumber();
104

  
105
            // Execute the insert statement
106
            PreparedStatement stmt = dbConn.prepareStatement(sql);
107
            ResultSet rs = stmt.executeQuery();
108
            if (rs.next()) 
109
            {
110
                String docname = rs.getString(1);
111
                String doctype = rs.getString(2);
112
                String user_owner = rs.getString(3);
113
                String user_updated = rs.getString(4);
114
                String server_location = rs.getString(5);
115
                int rev = rs.getInt(6);
116
                String date_created = rs.getString(7);
117
                String date_updated = rs.getString(8);
118
                h.put("docname", docname);
119
                h.put("doctype", doctype);
120
                h.put("user_owner", user_owner);
121
                h.put("user_updated", user_updated);
122
                h.put("server_location", server_location);
123
                h.put("rev", new Integer(rev).toString());
124
                h.put("date_created", date_created);
125
                h.put("date_updated", date_updated);
126
                
127
                stmt.close();
128
            } 
129
            else
130
            {
131
                stmt.close();
132
                DBConnectionPool.returnDBConnection(dbConn, serialNumber);
133
                throw new McdbDocNotFoundException("Could not find document " + localId);
134
            }
135
        } 
136
        catch (SQLException e) 
137
        {
138
            logMetacat.error("Error while getting document info for localid " + localId + " : "  
139
                    + e.getMessage());
140
        } 
141
        finally 
142
        {
143
            // Return database connection to the pool
144
            DBConnectionPool.returnDBConnection(dbConn, serialNumber);
145
        }
146
        return h;
147
    }
148
    
149
    /**
150
     * return the newest rev for a given localId
151
     * @param localId
152
     * @return
153
     */
154
    public int getLatestRevForLocalId(String localId)
155
        throws McdbDocNotFoundException
156
    {
157
        int rev = 0;
158
        String sql = "select rev from xml_documents where docid like '" + localId + "'";
159
        DBConnection dbConn = null;
160
        int serialNumber = -1;
161
        try 
162
        {
163
            // Get a database connection from the pool
164
            dbConn = DBConnectionPool.getDBConnection("IdentifierManager.getLatestRevForLocalId");
165
            serialNumber = dbConn.getCheckOutSerialNumber();
166

  
167
            // Execute the insert statement
168
            PreparedStatement stmt = dbConn.prepareStatement(sql);
169
            ResultSet rs = stmt.executeQuery();
170
            if (rs.next()) 
171
            {
172
                rev = rs.getInt(1);
173
                stmt.close();
174
            } 
175
            else
176
            {
177
                stmt.close();
178
                DBConnectionPool.returnDBConnection(dbConn, serialNumber);
179
                throw new McdbDocNotFoundException("Could not find document " + localId);
180
            }
181
        } 
182
        catch (SQLException e) 
183
        {
184
            logMetacat.error("Error while looking up the guid: " 
185
                    + e.getMessage());
186
        } 
187
        finally 
188
        {
189
            // Return database connection to the pool
190
            DBConnectionPool.returnDBConnection(dbConn, serialNumber);
191
        }
192
        return rev;
193
    }
194
    
195
    /**
83 196
     * return all local ids in the object store that do not have associated
84 197
     * system metadata and are not themselves system metadata
85 198
     */

Also available in: Unified diff