1
|
-- use default values initially
|
2
|
CREATE TABLE doi_registration AS
|
3
|
|
4
|
select
|
5
|
text('testing') as ezid_account,
|
6
|
doc.doctype,
|
7
|
id.guid,
|
8
|
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url,
|
9
|
text('Unknown') as title,
|
10
|
text(null) as creator,
|
11
|
text(null) as publisher,
|
12
|
to_char(doc.date_created, 'YYYY') as pub_date
|
13
|
from identifier id, xml_documents doc
|
14
|
where guid like 'doi%'
|
15
|
and id.docid = doc.docid
|
16
|
and id.rev = doc.rev
|
17
|
|
18
|
UNION ALL
|
19
|
|
20
|
select
|
21
|
text('testing') as ezid_account,
|
22
|
doc.doctype,
|
23
|
id.guid,
|
24
|
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url,
|
25
|
text('Unknown') as title,
|
26
|
text(null) as creator,
|
27
|
text(null) as publisher,
|
28
|
to_char(doc.date_created, 'YYYY') as pub_date
|
29
|
from identifier id, xml_revisions doc
|
30
|
where guid like 'doi%'
|
31
|
and id.docid = doc.docid
|
32
|
and id.rev = doc.rev;
|
33
|
|
34
|
--update defaults
|
35
|
update doi_registration
|
36
|
set title = 'Data file'
|
37
|
where doctype = 'BIN';
|
38
|
|
39
|
update doi_registration
|
40
|
set title = 'Legacy EML file'
|
41
|
where doctype like '%eml%2.0.0beta%';
|
42
|
|
43
|
-- update title using node information
|
44
|
update doi_registration doi
|
45
|
set title = child.nodedata
|
46
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
47
|
where doi.guid = id.guid
|
48
|
and id.docid = docs.docid
|
49
|
and id.rev = docs.rev
|
50
|
and docs.rootnodeid = nodes.rootnodeid
|
51
|
and nodes.nodeid = child.parentnodeid
|
52
|
and nodes.parentnodeid = parent.nodeid
|
53
|
and nodes.nodename = 'title'
|
54
|
and parent.nodename = 'dataset';
|
55
|
|
56
|
-- update pubDate using nodes
|
57
|
update doi_registration doi
|
58
|
set pub_date = child.nodedata
|
59
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
60
|
where doi.guid = id.guid
|
61
|
and id.docid = docs.docid
|
62
|
and id.rev = docs.rev
|
63
|
and docs.rootnodeid = nodes.rootnodeid
|
64
|
and nodes.nodeid = child.parentnodeid
|
65
|
and nodes.parentnodeid = parent.nodeid
|
66
|
and nodes.nodename = 'pubDate'
|
67
|
and parent.nodename = 'dataset';
|
68
|
|
69
|
-- update publisher using nodes
|
70
|
update doi_registration doi
|
71
|
set publisher = child.nodedata
|
72
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
73
|
where doi.guid = id.guid
|
74
|
and id.docid = docs.docid
|
75
|
and id.rev = docs.rev
|
76
|
and docs.rootnodeid = nodes.rootnodeid
|
77
|
and nodes.nodeid = child.parentnodeid
|
78
|
and nodes.parentnodeid = parent.nodeid
|
79
|
and nodes.nodename = 'organizationName'
|
80
|
and parent.nodename = 'publisher'
|
81
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
82
|
|
83
|
-- update publisher surName
|
84
|
update doi_registration doi
|
85
|
set publisher = child.nodedata
|
86
|
from identifier id,
|
87
|
xml_documents docs,
|
88
|
xml_nodes nodes,
|
89
|
xml_nodes parent,
|
90
|
xml_nodes child,
|
91
|
xml_nodes grandparent
|
92
|
where doi.guid = id.guid
|
93
|
and id.docid = docs.docid
|
94
|
and id.rev = docs.rev
|
95
|
and docs.rootnodeid = nodes.rootnodeid
|
96
|
and nodes.nodeid = child.parentnodeid
|
97
|
and nodes.parentnodeid = parent.nodeid
|
98
|
and parent.parentnodeid = grandparent.nodeid
|
99
|
and nodes.nodename = 'surName'
|
100
|
and parent.nodename = 'individualName'
|
101
|
and grandparent.nodename = 'publisher'
|
102
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
103
|
|
104
|
-- add the first name if we have it
|
105
|
update doi_registration doi
|
106
|
set publisher = child.nodedata ||' '|| creator
|
107
|
from identifier id,
|
108
|
xml_documents docs,
|
109
|
xml_nodes nodes,
|
110
|
xml_nodes parent,
|
111
|
xml_nodes child,
|
112
|
xml_nodes grandparent
|
113
|
where doi.guid = id.guid
|
114
|
and id.docid = docs.docid
|
115
|
and id.rev = docs.rev
|
116
|
and docs.rootnodeid = nodes.rootnodeid
|
117
|
and nodes.nodeid = child.parentnodeid
|
118
|
and nodes.parentnodeid = parent.nodeid
|
119
|
and parent.parentnodeid = grandparent.nodeid
|
120
|
and nodes.nodename = 'givenName'
|
121
|
and parent.nodename = 'individualName'
|
122
|
and grandparent.nodename = 'publisher'
|
123
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
124
|
|
125
|
-- update organization using nodes
|
126
|
update doi_registration doi
|
127
|
set creator = child.nodedata
|
128
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
129
|
where doi.guid = id.guid
|
130
|
and id.docid = docs.docid
|
131
|
and id.rev = docs.rev
|
132
|
and docs.rootnodeid = nodes.rootnodeid
|
133
|
and nodes.nodeid = child.parentnodeid
|
134
|
and nodes.parentnodeid = parent.nodeid
|
135
|
and nodes.nodename = 'organizationName'
|
136
|
and parent.nodename = 'creator'
|
137
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
138
|
|
139
|
-- update creator using nodes
|
140
|
-- creator/individualName/surName
|
141
|
-- creator/individualName/givenName
|
142
|
update doi_registration doi
|
143
|
set creator = child.nodedata
|
144
|
from identifier id,
|
145
|
xml_documents docs,
|
146
|
xml_nodes nodes,
|
147
|
xml_nodes parent,
|
148
|
xml_nodes child,
|
149
|
xml_nodes grandparent
|
150
|
where doi.guid = id.guid
|
151
|
and id.docid = docs.docid
|
152
|
and id.rev = docs.rev
|
153
|
and docs.rootnodeid = nodes.rootnodeid
|
154
|
and nodes.nodeid = child.parentnodeid
|
155
|
and nodes.parentnodeid = parent.nodeid
|
156
|
and parent.parentnodeid = grandparent.nodeid
|
157
|
and nodes.nodename = 'surName'
|
158
|
and parent.nodename = 'individualName'
|
159
|
and grandparent.nodename = 'creator'
|
160
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
161
|
|
162
|
-- add the first name if we have it
|
163
|
update doi_registration doi
|
164
|
set creator = child.nodedata ||' '|| creator
|
165
|
from identifier id,
|
166
|
xml_documents docs,
|
167
|
xml_nodes nodes,
|
168
|
xml_nodes parent,
|
169
|
xml_nodes child,
|
170
|
xml_nodes grandparent
|
171
|
where doi.guid = id.guid
|
172
|
and id.docid = docs.docid
|
173
|
and id.rev = docs.rev
|
174
|
and docs.rootnodeid = nodes.rootnodeid
|
175
|
and nodes.nodeid = child.parentnodeid
|
176
|
and nodes.parentnodeid = parent.nodeid
|
177
|
and parent.parentnodeid = grandparent.nodeid
|
178
|
and nodes.nodename = 'givenName'
|
179
|
and parent.nodename = 'individualName'
|
180
|
and grandparent.nodename = 'creator'
|
181
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
182
|
|
183
|
-- update title using revisions
|
184
|
update doi_registration doi
|
185
|
set title = child.nodedata
|
186
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
187
|
where doi.guid = id.guid
|
188
|
and id.docid = docs.docid
|
189
|
and id.rev = docs.rev
|
190
|
and docs.rootnodeid = nodes.rootnodeid
|
191
|
and nodes.nodeid = child.parentnodeid
|
192
|
and nodes.parentnodeid = parent.nodeid
|
193
|
and nodes.nodename = 'title'
|
194
|
and parent.nodename = 'dataset';
|
195
|
|
196
|
-- update pubDate using revisions
|
197
|
update doi_registration doi
|
198
|
set pub_date = child.nodedata
|
199
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
200
|
where doi.guid = id.guid
|
201
|
and id.docid = docs.docid
|
202
|
and id.rev = docs.rev
|
203
|
and docs.rootnodeid = nodes.rootnodeid
|
204
|
and nodes.nodeid = child.parentnodeid
|
205
|
and nodes.parentnodeid = parent.nodeid
|
206
|
and nodes.nodename = 'pubDate'
|
207
|
and parent.nodename = 'dataset';
|
208
|
|
209
|
-- update publisher using revisions
|
210
|
update doi_registration doi
|
211
|
set publisher = child.nodedata
|
212
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
213
|
where doi.guid = id.guid
|
214
|
and id.docid = docs.docid
|
215
|
and id.rev = docs.rev
|
216
|
and docs.rootnodeid = nodes.rootnodeid
|
217
|
and nodes.nodeid = child.parentnodeid
|
218
|
and nodes.parentnodeid = parent.nodeid
|
219
|
and nodes.nodename = 'organizationName'
|
220
|
and parent.nodename = 'publisher'
|
221
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
222
|
|
223
|
-- use publisher individual name if we have it
|
224
|
update doi_registration doi
|
225
|
set publisher = child.nodedata
|
226
|
from identifier id,
|
227
|
xml_revisions docs,
|
228
|
xml_nodes_revisions nodes,
|
229
|
xml_nodes_revisions parent,
|
230
|
xml_nodes_revisions child,
|
231
|
xml_nodes_revisions grandparent
|
232
|
where doi.guid = id.guid
|
233
|
and id.docid = docs.docid
|
234
|
and id.rev = docs.rev
|
235
|
and docs.rootnodeid = nodes.rootnodeid
|
236
|
and nodes.nodeid = child.parentnodeid
|
237
|
and nodes.parentnodeid = parent.nodeid
|
238
|
and parent.parentnodeid = grandparent.nodeid
|
239
|
and nodes.nodename = 'surName'
|
240
|
and parent.nodename = 'individualName'
|
241
|
and grandparent.nodename = 'publisher'
|
242
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
243
|
|
244
|
--include the first name (careful not to rerun this part)
|
245
|
update doi_registration doi
|
246
|
set publisher = child.nodedata ||' '|| creator
|
247
|
from identifier id,
|
248
|
xml_revisions docs,
|
249
|
xml_nodes_revisions nodes,
|
250
|
xml_nodes_revisions parent,
|
251
|
xml_nodes_revisions child,
|
252
|
xml_nodes_revisions grandparent
|
253
|
where doi.guid = id.guid
|
254
|
and id.docid = docs.docid
|
255
|
and id.rev = docs.rev
|
256
|
and docs.rootnodeid = nodes.rootnodeid
|
257
|
and nodes.nodeid = child.parentnodeid
|
258
|
and nodes.parentnodeid = parent.nodeid
|
259
|
and parent.parentnodeid = grandparent.nodeid
|
260
|
and nodes.nodename = 'givenName'
|
261
|
and parent.nodename = 'individualName'
|
262
|
and grandparent.nodename = 'publisher'
|
263
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
264
|
|
265
|
-- update organization using nodes
|
266
|
update doi_registration doi
|
267
|
set creator = child.nodedata
|
268
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
269
|
where doi.guid = id.guid
|
270
|
and id.docid = docs.docid
|
271
|
and id.rev = docs.rev
|
272
|
and docs.rootnodeid = nodes.rootnodeid
|
273
|
and nodes.nodeid = child.parentnodeid
|
274
|
and nodes.parentnodeid = parent.nodeid
|
275
|
and nodes.nodename = 'organizationName'
|
276
|
and parent.nodename = 'creator'
|
277
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
278
|
|
279
|
-- update creator using revisions
|
280
|
update doi_registration doi
|
281
|
set creator = child.nodedata
|
282
|
from identifier id,
|
283
|
xml_revisions docs,
|
284
|
xml_nodes_revisions nodes,
|
285
|
xml_nodes_revisions parent,
|
286
|
xml_nodes_revisions child,
|
287
|
xml_nodes_revisions grandparent
|
288
|
where doi.guid = id.guid
|
289
|
and id.docid = docs.docid
|
290
|
and id.rev = docs.rev
|
291
|
and docs.rootnodeid = nodes.rootnodeid
|
292
|
and nodes.nodeid = child.parentnodeid
|
293
|
and nodes.parentnodeid = parent.nodeid
|
294
|
and parent.parentnodeid = grandparent.nodeid
|
295
|
and nodes.nodename = 'surName'
|
296
|
and parent.nodename = 'individualName'
|
297
|
and grandparent.nodename = 'creator'
|
298
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
299
|
|
300
|
-- add the first name if we have it
|
301
|
update doi_registration doi
|
302
|
set creator = child.nodedata ||' '|| creator
|
303
|
from identifier id,
|
304
|
xml_revisions docs,
|
305
|
xml_nodes_revisions nodes,
|
306
|
xml_nodes_revisions parent,
|
307
|
xml_nodes_revisions child,
|
308
|
xml_nodes_revisions grandparent
|
309
|
where doi.guid = id.guid
|
310
|
and id.docid = docs.docid
|
311
|
and id.rev = docs.rev
|
312
|
and docs.rootnodeid = nodes.rootnodeid
|
313
|
and nodes.nodeid = child.parentnodeid
|
314
|
and nodes.parentnodeid = parent.nodeid
|
315
|
and parent.parentnodeid = grandparent.nodeid
|
316
|
and nodes.nodename = 'givenName'
|
317
|
and parent.nodename = 'individualName'
|
318
|
and grandparent.nodename = 'creator'
|
319
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
320
|
|
321
|
-- use xml_documents for defaults that are still missing
|
322
|
update doi_registration doi
|
323
|
set creator = doc.user_owner
|
324
|
from xml_documents doc, identifier id
|
325
|
where doi.guid = id.guid
|
326
|
and id.docid = doc.docid
|
327
|
and id.rev = doc.rev
|
328
|
and doi.creator is null;
|
329
|
|
330
|
update doi_registration doi
|
331
|
set creator = doc.user_owner
|
332
|
from xml_revisions doc, identifier id
|
333
|
where doi.guid = id.guid
|
334
|
and id.docid = doc.docid
|
335
|
and id.rev = doc.rev
|
336
|
and doi.creator is null;
|
337
|
|
338
|
-- set publisher
|
339
|
update doi_registration
|
340
|
set publisher = creator
|
341
|
where publisher is null or publisher = '';
|
342
|
|
343
|
-- clean up
|
344
|
update doi_registration
|
345
|
set title = replace(title, E'\n', ' ');
|
346
|
|
347
|
update doi_registration
|
348
|
set title = regexp_replace(title, E'\\s+', ' ', 'g');
|
349
|
|
350
|
update doi_registration
|
351
|
set title = regexp_replace(title, E'^\\s+', '', 'g');
|
352
|
|
353
|
update doi_registration
|
354
|
set ezid_account = 'sb_nceas'
|
355
|
where guid like 'doi:10.5063%';
|
356
|
--16988
|
357
|
|
358
|
update doi_registration
|
359
|
set ezid_account = 'sb_pisco'
|
360
|
where guid like 'doi:10.6085%';
|
361
|
--98078
|
362
|
|
363
|
update doi_registration
|
364
|
set ezid_account = 'lternet'
|
365
|
where guid like 'doi:10.6073%';
|
366
|
--49482
|
367
|
|
368
|
/**
|
369
|
* Additional modifications, July 5th, 2012
|
370
|
*/
|
371
|
ALTER TABLE doi_registration
|
372
|
ADD COLUMN resourceType text,
|
373
|
ADD COLUMN objectFormat text,
|
374
|
ADD COLUMN obsoletedBy text,
|
375
|
ADD COLUMN obsoletes text,
|
376
|
ADD COLUMN resourceMapId text,
|
377
|
ADD COLUMN resourceMapLocation text,
|
378
|
ADD COLUMN access text;
|
379
|
|
380
|
-- update access values
|
381
|
update doi_registration
|
382
|
set access = 'protected';
|
383
|
|
384
|
update doi_registration doi
|
385
|
set access = 'public'
|
386
|
from xml_access a
|
387
|
where doi.guid = a.guid
|
388
|
and a.principal_name = 'public'
|
389
|
and a.permission >= '4'
|
390
|
and a.perm_type = 'allow';
|
391
|
|
392
|
-- update the objectFormat from xml_documents
|
393
|
update doi_registration doi
|
394
|
set objectFormat = doc.doctype
|
395
|
from identifier id, xml_documents doc
|
396
|
where doi.guid = id.guid
|
397
|
and id.docid = doc.docid
|
398
|
and id.rev = doc.rev;
|
399
|
|
400
|
-- update the objectFormat from xml_revisions
|
401
|
update doi_registration doi
|
402
|
set objectFormat = rev.doctype
|
403
|
from identifier id, xml_revisions rev
|
404
|
where doi.guid = id.guid
|
405
|
and id.docid = rev.docid
|
406
|
and id.rev = rev.rev;
|
407
|
|
408
|
--update resourceType
|
409
|
update doi_registration
|
410
|
set resourceType = 'Dataset/METADATA';
|
411
|
|
412
|
update doi_registration
|
413
|
set resourceType = 'Dataset/DATA'
|
414
|
where objectFormat = 'BIN';
|
415
|
|
416
|
-- update the objectFormat from SM table (will be subset)
|
417
|
update doi_registration doi
|
418
|
set objectFormat = sm.object_format
|
419
|
from systemMetadata sm
|
420
|
where doi.guid = sm.guid;
|
421
|
--16938
|
422
|
|
423
|
--update revision history from SM
|
424
|
update doi_registration doi
|
425
|
set obsoletes = sm.obsoletes,
|
426
|
obsoletedBy = sm.obsoleted_by
|
427
|
from systemMetadata sm
|
428
|
where doi.guid = sm.guid;
|
429
|
|
430
|
/** use plain old revision history **/
|
431
|
-- update obsoletedby
|
432
|
update doi_registration doi
|
433
|
set obsoletedBy = newer.guid
|
434
|
from identifier id, identifier newer
|
435
|
where doi.guid = id.guid
|
436
|
and id.docid = newer.docid
|
437
|
and newer.rev = (select min(next.rev) from identifier next where next.docid = id.docid and next.rev > id.rev);
|
438
|
|
439
|
-- update the obsolets
|
440
|
update doi_registration doi
|
441
|
set obsoletes = older.guid
|
442
|
from identifier id, identifier older
|
443
|
where doi.guid = id.guid
|
444
|
and id.docid = older.docid
|
445
|
and older.rev = (select max(prev.rev) from identifier prev where prev.docid = id.docid and prev.rev < id.rev);
|
446
|
|
447
|
/**
|
448
|
select doi.guid, older.guid as obsoletes
|
449
|
from doi_registration doi, identifier id, identifier older
|
450
|
where doi.guid = id.guid
|
451
|
and id.docid = older.docid
|
452
|
and older.rev = (select max(prev.rev) from identifier prev where prev.docid = id.docid and prev.rev < id.rev)
|
453
|
and doi.guid like 'doi:10.5063/AA/ABS.4%';
|
454
|
**/
|
455
|
|
456
|
|
457
|
--update the resourcemapid for described data
|
458
|
CREATE TABLE ecogrid_docids AS
|
459
|
select distinct id.docid, id.rev, id.guid, substring(xpi.nodedata from 'ecogrid://knb/(.*)$') as data_docid
|
460
|
from xml_path_index xpi, identifier id, xml_documents xmld
|
461
|
where xpi.docid = xmld.docid
|
462
|
and xmld.docid = id.docid
|
463
|
and xmld.rev = id.rev
|
464
|
and xpi.path like 'dataset/%/physical/distribution/online/url'
|
465
|
and xpi.nodedata like 'ecogrid%'
|
466
|
and id.guid in (select guid from doi_registration);
|
467
|
|
468
|
-- include revisions for described data
|
469
|
INSERT INTO ecogrid_docids
|
470
|
select distinct id.docid, id.rev, id.guid, substring(child.nodedata from 'ecogrid://knb/(.*)$') as data_docid
|
471
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
472
|
where id.docid = docs.docid
|
473
|
and id.rev = docs.rev
|
474
|
and docs.rootnodeid = nodes.rootnodeid
|
475
|
and nodes.nodeid = child.parentnodeid
|
476
|
and nodes.parentnodeid = parent.nodeid
|
477
|
and nodes.nodename = 'url'
|
478
|
and parent.nodename = 'online'
|
479
|
and child.nodedata like 'ecogrid%'
|
480
|
and child.nodetype = 'TEXT'
|
481
|
and id.guid in (select guid from doi_registration);
|
482
|
|
483
|
-- Set the resource map for the data files (NOTE: some of thee maps might not actually exist on the system depending on how successful the ORE generation was)
|
484
|
update doi_registration doi
|
485
|
set resourceMapId = 'resourceMap_'||eco.docid||'.'||eco.rev
|
486
|
from identifier id, ecogrid_docids eco
|
487
|
where doi.guid = id.guid
|
488
|
and id.docid||'.'||id.rev = eco.data_docid;
|
489
|
|
490
|
-- set the resource map id for the metadata file that did most of the packaging work!
|
491
|
update doi_registration doi
|
492
|
set resourceMapId = 'resourceMap_'||eco.docid||'.'||eco.rev
|
493
|
from ecogrid_docids eco
|
494
|
where doi.guid = eco.guid;
|
495
|
|
496
|
select count(*)
|
497
|
from doi_registration
|
498
|
where resourceMapId is null;
|
499
|
|
500
|
--update the resource map location
|
501
|
update doi_registration
|
502
|
set resourceMapLocation = 'https://cn.dataone.org/cn/v1/resolve/' || resourceMapId
|
503
|
where resourceMapId is not null;
|
504
|
|
505
|
-- fix null values (should not have to have this now, but it will not hurt)
|
506
|
update doi_registration doi
|
507
|
set creator = user_owner
|
508
|
from identifier id, xml_documents docs
|
509
|
where doi.guid = id.guid
|
510
|
and id.docid = docs.docid
|
511
|
and id.rev= docs.rev
|
512
|
and (creator is null or trim(creator) = '');
|
513
|
|
514
|
update doi_registration doi
|
515
|
set publisher = user_owner
|
516
|
from identifier id, xml_documents docs
|
517
|
where doi.guid = id.guid
|
518
|
and id.docid = docs.docid
|
519
|
and id.rev= docs.rev
|
520
|
and (publisher is null or trim(publisher) = '');
|
521
|
|
522
|
update doi_registration doi
|
523
|
set creator = user_owner
|
524
|
from identifier id, xml_revisions docs
|
525
|
where doi.guid = id.guid
|
526
|
and id.docid = docs.docid
|
527
|
and id.rev= docs.rev
|
528
|
and (creator is null or trim(creator) = '');
|
529
|
|
530
|
update doi_registration doi
|
531
|
set publisher = user_owner
|
532
|
from identifier id, xml_revisions docs
|
533
|
where doi.guid = id.guid
|
534
|
and id.docid = docs.docid
|
535
|
and id.rev= docs.rev
|
536
|
and (publisher is null or trim(publisher) = '');
|
537
|
|
538
|
-- fix a previous mistake
|
539
|
update doi_registration
|
540
|
set publisher = null
|
541
|
where publisher = 'document';
|
542
|
|
543
|
--update creator usinig LDAP lookup
|
544
|
CREATE TABLE ecoinfo_dn (dn text, givenName text, sn text);
|
545
|
--TRUNCATE TABLE ecoinfo_dn;
|
546
|
COPY ecoinfo_dn FROM '/tmp/ecoinfo_dn.csv' WITH CSV HEADER;
|
547
|
|
548
|
update doi_registration
|
549
|
set creator =
|
550
|
case when givenName is null then
|
551
|
sn
|
552
|
else
|
553
|
givenName|| ' ' || sn
|
554
|
end
|
555
|
from ecoinfo_dn
|
556
|
where trim(both from lower(creator)) = lower(dn);
|
557
|
|
558
|
update doi_registration
|
559
|
set publisher =
|
560
|
case when givenName is null then
|
561
|
sn
|
562
|
else
|
563
|
givenName|| ' ' || sn
|
564
|
end
|
565
|
from ecoinfo_dn
|
566
|
where trim(both from lower(publisher)) = lower(dn);
|
567
|
|
568
|
select creator, count(*) as cnt
|
569
|
from doi_registration
|
570
|
where creator like 'uid=%'
|
571
|
group by creator
|
572
|
order by cnt desc;
|
573
|
|
574
|
-- update the data files with owner/publisher from EML entry
|
575
|
update doi_registration doi
|
576
|
set creator = meta.creator,
|
577
|
publisher = meta.publisher
|
578
|
from doi_registration meta, identifier id, ecogrid_docids eco
|
579
|
where doi.guid = id.guid
|
580
|
and id.docid||'.'||id.rev = eco.data_docid
|
581
|
and eco.guid = meta.guid;
|
582
|
|
583
|
-- clean up KBS entries that have empty user_owner
|
584
|
update doi_registration
|
585
|
set creator = 'Kellogg Biological Station'
|
586
|
where (creator is null or trim(creator) = '')
|
587
|
and guid like '%knb-lter-kbs%';
|
588
|
|
589
|
update doi_registration
|
590
|
set publisher = 'Kellogg Biological Station'
|
591
|
where (publisher is null or trim(publisher) = '')
|
592
|
and guid like '%knb-lter-kbs%';
|
593
|
|
594
|
-- more clean up
|
595
|
update doi_registration
|
596
|
set creator = trim(creator),
|
597
|
publisher = trim(publisher);
|
598
|
|
599
|
|
600
|
-- copy to the external file
|
601
|
COPY
|
602
|
(select ezid_account,
|
603
|
guid as dc_identifier,
|
604
|
url as datacite_url,
|
605
|
title as dc_title,
|
606
|
creator as dc_creator,
|
607
|
publisher as dc_publisher,
|
608
|
pub_date as datacite_publicationYear,
|
609
|
split_part(resourceType, '/', 1) as datacite_resourceTypeGeneral,
|
610
|
split_part(resourceType, '/', 2) as datacite_resourceType,
|
611
|
objectFormat as datacite_format,
|
612
|
obsoletedBy as datacite_relatedIdentifier_isPreviousVersionOf,
|
613
|
obsoletes as datacite_relatedIdentifier_isNewVersionOf,
|
614
|
resourceMapId as datacite_relatedIdentifier_isPartOf,
|
615
|
resourceMapLocation as d1_resourceMapLocation,
|
616
|
access as d1_read_access
|
617
|
from doi_registration
|
618
|
order by dc_identifier)
|
619
|
TO '/tmp/doi_registration.csv'
|
620
|
WITH CSV HEADER;
|
621
|
--164548
|
622
|
|
623
|
--drop table doi_registration;
|
624
|
--drop table ecogrid_docids;
|
625
|
--drop table ecoinfo_dn;
|
626
|
|