1
|
-- use default values initially
|
2
|
CREATE TABLE doi_registration AS
|
3
|
|
4
|
select
|
5
|
text('testing') as ezid_account,
|
6
|
doc.doctype,
|
7
|
id.guid,
|
8
|
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url,
|
9
|
text('Unknown') as title,
|
10
|
text(null) as creator,
|
11
|
text(null) as publisher,
|
12
|
to_char(doc.date_created, 'YYYY') as pub_date
|
13
|
from identifier id, xml_documents doc
|
14
|
where guid like 'doi%'
|
15
|
and id.docid = doc.docid
|
16
|
and id.rev = doc.rev
|
17
|
|
18
|
UNION ALL
|
19
|
|
20
|
select
|
21
|
text('testing') as ezid_account,
|
22
|
doc.doctype,
|
23
|
id.guid,
|
24
|
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url,
|
25
|
text('Unknown') as title,
|
26
|
text(null) as creator,
|
27
|
text(null) as publisher,
|
28
|
to_char(doc.date_created, 'YYYY') as pub_date
|
29
|
from identifier id, xml_revisions doc
|
30
|
where guid like 'doi%'
|
31
|
and id.docid = doc.docid
|
32
|
and id.rev = doc.rev;
|
33
|
|
34
|
--update defaults
|
35
|
update doi_registration
|
36
|
set title = 'Data file'
|
37
|
where doctype = 'BIN';
|
38
|
|
39
|
update doi_registration
|
40
|
set title = 'Legacy EML file'
|
41
|
where doctype like '%eml%2.0.0beta%';
|
42
|
|
43
|
-- update title using node information
|
44
|
update doi_registration doi
|
45
|
set title = child.nodedata
|
46
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
47
|
where doi.guid = id.guid
|
48
|
and id.docid = docs.docid
|
49
|
and id.rev = docs.rev
|
50
|
and docs.rootnodeid = nodes.rootnodeid
|
51
|
and nodes.nodeid = child.parentnodeid
|
52
|
and nodes.parentnodeid = parent.nodeid
|
53
|
and nodes.nodename = 'title'
|
54
|
and parent.nodename = 'dataset';
|
55
|
|
56
|
-- update pubDate using nodes
|
57
|
update doi_registration doi
|
58
|
set pub_date = child.nodedata
|
59
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
60
|
where doi.guid = id.guid
|
61
|
and id.docid = docs.docid
|
62
|
and id.rev = docs.rev
|
63
|
and docs.rootnodeid = nodes.rootnodeid
|
64
|
and nodes.nodeid = child.parentnodeid
|
65
|
and nodes.parentnodeid = parent.nodeid
|
66
|
and nodes.nodename = 'pubDate'
|
67
|
and parent.nodename = 'dataset';
|
68
|
|
69
|
-- update publisher using nodes
|
70
|
update doi_registration doi
|
71
|
set publisher = child.nodedata
|
72
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
73
|
where doi.guid = id.guid
|
74
|
and id.docid = docs.docid
|
75
|
and id.rev = docs.rev
|
76
|
and docs.rootnodeid = nodes.rootnodeid
|
77
|
and nodes.nodeid = child.parentnodeid
|
78
|
and nodes.parentnodeid = parent.nodeid
|
79
|
and nodes.nodename = 'publisher'
|
80
|
and parent.nodename = 'dataset'
|
81
|
and child.nodetype = 'TEXT';
|
82
|
|
83
|
-- update organization using nodes
|
84
|
update doi_registration doi
|
85
|
set creator = child.nodedata
|
86
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
87
|
where doi.guid = id.guid
|
88
|
and id.docid = docs.docid
|
89
|
and id.rev = docs.rev
|
90
|
and docs.rootnodeid = nodes.rootnodeid
|
91
|
and nodes.nodeid = child.parentnodeid
|
92
|
and nodes.parentnodeid = parent.nodeid
|
93
|
and nodes.nodename = 'organizationName'
|
94
|
and parent.nodename = 'creator'
|
95
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
96
|
|
97
|
-- update creator using nodes
|
98
|
-- creator/individualName/surName
|
99
|
-- creator/individualName/givenName
|
100
|
update doi_registration doi
|
101
|
set creator = child.nodedata
|
102
|
from identifier id,
|
103
|
xml_documents docs,
|
104
|
xml_nodes nodes,
|
105
|
xml_nodes parent,
|
106
|
xml_nodes child,
|
107
|
xml_nodes grandparent
|
108
|
where doi.guid = id.guid
|
109
|
and id.docid = docs.docid
|
110
|
and id.rev = docs.rev
|
111
|
and docs.rootnodeid = nodes.rootnodeid
|
112
|
and nodes.nodeid = child.parentnodeid
|
113
|
and nodes.parentnodeid = parent.nodeid
|
114
|
and parent.parentnodeid = grandparent.nodeid
|
115
|
and nodes.nodename = 'surName'
|
116
|
and parent.nodename = 'individualName'
|
117
|
and grandparent.nodename = 'creator'
|
118
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
119
|
|
120
|
-- add the first name if we have it
|
121
|
update doi_registration doi
|
122
|
set creator = child.nodedata ||' '|| creator
|
123
|
from identifier id,
|
124
|
xml_documents docs,
|
125
|
xml_nodes nodes,
|
126
|
xml_nodes parent,
|
127
|
xml_nodes child,
|
128
|
xml_nodes grandparent
|
129
|
where doi.guid = id.guid
|
130
|
and id.docid = docs.docid
|
131
|
and id.rev = docs.rev
|
132
|
and docs.rootnodeid = nodes.rootnodeid
|
133
|
and nodes.nodeid = child.parentnodeid
|
134
|
and nodes.parentnodeid = parent.nodeid
|
135
|
and parent.parentnodeid = grandparent.nodeid
|
136
|
and nodes.nodename = 'givenName'
|
137
|
and parent.nodename = 'individualName'
|
138
|
and grandparent.nodename = 'creator'
|
139
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
140
|
|
141
|
-- update title using revisions
|
142
|
update doi_registration doi
|
143
|
set title = child.nodedata
|
144
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
145
|
where doi.guid = id.guid
|
146
|
and id.docid = docs.docid
|
147
|
and id.rev = docs.rev
|
148
|
and docs.rootnodeid = nodes.rootnodeid
|
149
|
and nodes.nodeid = child.parentnodeid
|
150
|
and nodes.parentnodeid = parent.nodeid
|
151
|
and nodes.nodename = 'title'
|
152
|
and parent.nodename = 'dataset';
|
153
|
|
154
|
-- update pubDate using revisions
|
155
|
update doi_registration doi
|
156
|
set pub_date = child.nodedata
|
157
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
158
|
where doi.guid = id.guid
|
159
|
and id.docid = docs.docid
|
160
|
and id.rev = docs.rev
|
161
|
and docs.rootnodeid = nodes.rootnodeid
|
162
|
and nodes.nodeid = child.parentnodeid
|
163
|
and nodes.parentnodeid = parent.nodeid
|
164
|
and nodes.nodename = 'pubDate'
|
165
|
and parent.nodename = 'dataset';
|
166
|
|
167
|
-- update publisher using revisions
|
168
|
update doi_registration doi
|
169
|
set publisher = child.nodedata
|
170
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
171
|
where doi.guid = id.guid
|
172
|
and id.docid = docs.docid
|
173
|
and id.rev = docs.rev
|
174
|
and docs.rootnodeid = nodes.rootnodeid
|
175
|
and nodes.nodeid = child.parentnodeid
|
176
|
and nodes.parentnodeid = parent.nodeid
|
177
|
and nodes.nodename = 'publisher'
|
178
|
and parent.nodename = 'dataset'
|
179
|
and child.nodetype = 'TEXT';
|
180
|
|
181
|
-- update organization using nodes
|
182
|
update doi_registration doi
|
183
|
set creator = child.nodedata
|
184
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
185
|
where doi.guid = id.guid
|
186
|
and id.docid = docs.docid
|
187
|
and id.rev = docs.rev
|
188
|
and docs.rootnodeid = nodes.rootnodeid
|
189
|
and nodes.nodeid = child.parentnodeid
|
190
|
and nodes.parentnodeid = parent.nodeid
|
191
|
and nodes.nodename = 'organizationName'
|
192
|
and parent.nodename = 'creator'
|
193
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
194
|
|
195
|
-- update creator using revisions
|
196
|
update doi_registration doi
|
197
|
set creator = child.nodedata
|
198
|
from identifier id,
|
199
|
xml_revisions docs,
|
200
|
xml_nodes_revisions nodes,
|
201
|
xml_nodes_revisions parent,
|
202
|
xml_nodes_revisions child,
|
203
|
xml_nodes_revisions grandparent
|
204
|
where doi.guid = id.guid
|
205
|
and id.docid = docs.docid
|
206
|
and id.rev = docs.rev
|
207
|
and docs.rootnodeid = nodes.rootnodeid
|
208
|
and nodes.nodeid = child.parentnodeid
|
209
|
and nodes.parentnodeid = parent.nodeid
|
210
|
and parent.parentnodeid = grandparent.nodeid
|
211
|
and nodes.nodename = 'surName'
|
212
|
and parent.nodename = 'individualName'
|
213
|
and grandparent.nodename = 'creator'
|
214
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
215
|
|
216
|
-- add the first name if we have it
|
217
|
update doi_registration doi
|
218
|
set creator = child.nodedata ||' '|| creator
|
219
|
from identifier id,
|
220
|
xml_revisions docs,
|
221
|
xml_nodes_revisions nodes,
|
222
|
xml_nodes_revisions parent,
|
223
|
xml_nodes_revisions child,
|
224
|
xml_nodes_revisions grandparent
|
225
|
where doi.guid = id.guid
|
226
|
and id.docid = docs.docid
|
227
|
and id.rev = docs.rev
|
228
|
and docs.rootnodeid = nodes.rootnodeid
|
229
|
and nodes.nodeid = child.parentnodeid
|
230
|
and nodes.parentnodeid = parent.nodeid
|
231
|
and parent.parentnodeid = grandparent.nodeid
|
232
|
and nodes.nodename = 'givenName'
|
233
|
and parent.nodename = 'individualName'
|
234
|
and grandparent.nodename = 'creator'
|
235
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
236
|
|
237
|
-- use xml_documents for defaults that are still missing
|
238
|
update doi_registration doi
|
239
|
set creator = doc.user_owner
|
240
|
from xml_documents doc, identifier id
|
241
|
where doi.guid = id.guid
|
242
|
and id.docid = doc.docid
|
243
|
and id.rev = doc.rev
|
244
|
and doi.creator is null;
|
245
|
|
246
|
update doi_registration doi
|
247
|
set creator = doc.user_owner
|
248
|
from xml_revisions doc, identifier id
|
249
|
where doi.guid = id.guid
|
250
|
and id.docid = doc.docid
|
251
|
and id.rev = doc.rev
|
252
|
and doi.creator is null;
|
253
|
|
254
|
-- set publisher
|
255
|
update doi_registration
|
256
|
set publisher = creator
|
257
|
where publisher is null or publisher = '';
|
258
|
|
259
|
-- clean up
|
260
|
update doi_registration
|
261
|
set title = replace(title, E'\n', ' ');
|
262
|
|
263
|
update doi_registration
|
264
|
set title = regexp_replace(title, E'\\s+', ' ', 'g');
|
265
|
|
266
|
update doi_registration
|
267
|
set title = regexp_replace(title, E'^\\s+', '', 'g');
|
268
|
|
269
|
update doi_registration
|
270
|
set ezid_account = 'KNB'
|
271
|
where guid like 'doi:10.5063%';
|
272
|
--16988
|
273
|
|
274
|
update doi_registration
|
275
|
set ezid_account = 'PISCO'
|
276
|
where guid like 'doi:10.6085%';
|
277
|
--98078
|
278
|
|
279
|
update doi_registration
|
280
|
set ezid_account = 'LTER'
|
281
|
where guid like 'doi:10.6073%';
|
282
|
--49482
|
283
|
|
284
|
/**
|
285
|
* Additional modifications, July 5th, 2012
|
286
|
*/
|
287
|
ALTER TABLE doi_registration
|
288
|
ADD COLUMN resourceType text,
|
289
|
ADD COLUMN objectFormat text,
|
290
|
ADD COLUMN obsoletedBy text,
|
291
|
ADD COLUMN obsoletes text,
|
292
|
ADD COLUMN resourceMapId text,
|
293
|
ADD COLUMN resourceMapLocation text,
|
294
|
ADD COLUMN access text;
|
295
|
|
296
|
-- update access values
|
297
|
update doi_registration
|
298
|
set access = 'protected';
|
299
|
|
300
|
update doi_registration doi
|
301
|
set access = 'public'
|
302
|
from xml_access a
|
303
|
where doi.guid = a.guid
|
304
|
and a.principal_name = 'public'
|
305
|
and a.permission >= '4'
|
306
|
and a.perm_type = 'allow';
|
307
|
|
308
|
-- update the objectFormat from xml_documents
|
309
|
update doi_registration doi
|
310
|
set objectFormat = doc.doctype
|
311
|
from identifier id, xml_documents doc
|
312
|
where doi.guid = id.guid
|
313
|
and id.docid = doc.docid;
|
314
|
|
315
|
-- update the objectFormat from xml_revisions
|
316
|
update doi_registration doi
|
317
|
set objectFormat = rev.doctype
|
318
|
from identifier id, xml_revisions rev
|
319
|
where doi.guid = id.guid
|
320
|
and id.docid = rev.docid;
|
321
|
|
322
|
--update resourceType
|
323
|
update doi_registration
|
324
|
set resourceType = 'Dataset/METADATA';
|
325
|
|
326
|
update doi_registration
|
327
|
set resourceType = 'Dataset/DATA'
|
328
|
where objectFormat = 'BIN';
|
329
|
|
330
|
-- update the objectFormat from SM table (will be subset)
|
331
|
update doi_registration doi
|
332
|
set objectFormat = sm.object_format
|
333
|
from systemMetadata sm
|
334
|
where doi.guid = sm.guid;
|
335
|
--16938
|
336
|
|
337
|
--update revision history from SM
|
338
|
update doi_registration doi
|
339
|
set obsoletes = sm.obsoletes,
|
340
|
obsoletedBy = sm.obsoleted_by
|
341
|
from systemMetadata sm
|
342
|
where doi.guid = sm.guid;
|
343
|
|
344
|
/** use plain old revision history **/
|
345
|
-- update obsoletedby
|
346
|
update doi_registration doi
|
347
|
set obsoletedBy = newer.guid
|
348
|
from identifier id, identifier newer
|
349
|
where doi.guid = id.guid
|
350
|
and id.docid = newer.docid
|
351
|
and newer.rev = (select min(next.rev) from identifier next where next.docid = id.docid and next.rev > id.rev);
|
352
|
|
353
|
-- update the obsolets
|
354
|
update doi_registration doi
|
355
|
set obsoletes = older.guid
|
356
|
from identifier id, identifier older
|
357
|
where doi.guid = id.guid
|
358
|
and id.docid = older.docid
|
359
|
and older.rev = (select max(prev.rev) from identifier prev where prev.docid = id.docid and prev.rev < id.rev);
|
360
|
|
361
|
/**
|
362
|
select doi.guid, older.guid as obsoletes
|
363
|
from doi_registration doi, identifier id, identifier older
|
364
|
where doi.guid = id.guid
|
365
|
and id.docid = older.docid
|
366
|
and older.rev = (select max(prev.rev) from identifier prev where prev.docid = id.docid and prev.rev < id.rev)
|
367
|
and doi.guid like 'doi:10.5063/AA/ABS.4%';
|
368
|
**/
|
369
|
|
370
|
|
371
|
-- update resourceMap pointer
|
372
|
-- NOTE: some of these maps might not actually exist
|
373
|
update doi_registration doi
|
374
|
set resourceMapId = 'resourceMap_'||id.docid||'.'||id.rev
|
375
|
from identifier id
|
376
|
where doi.guid = id.guid
|
377
|
and id.docid = rev.docid
|
378
|
and objectFormat like 'eml%';
|
379
|
|
380
|
--update the resourcemapid for described data
|
381
|
CREATE TABLE ecogrid_docids AS
|
382
|
select distinct id.docid, id.rev, id.guid, substring(xpi.nodedata from 'ecogrid://knb/(.*)$') as data_docid
|
383
|
from xml_path_index xpi, identifier id, xml_documents xmld
|
384
|
where xpi.docid = xmld.docid
|
385
|
and xmld.docid = id.docid
|
386
|
and xmld.rev = id.rev
|
387
|
and xpi.path like 'dataset/%/physical/distribution/online/url'
|
388
|
and xpi.nodedata like 'ecogrid%'
|
389
|
and id.guid in (select guid from doi_registration);
|
390
|
|
391
|
-- include revisions for described data
|
392
|
INSERT INTO ecogrid_docids
|
393
|
select distinct id.docid, id.rev, id.guid, substring(child.nodedata from 'ecogrid://knb/(.*)$') as data_docid
|
394
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
395
|
where id.docid = docs.docid
|
396
|
and id.rev = docs.rev
|
397
|
and docs.rootnodeid = nodes.rootnodeid
|
398
|
and nodes.nodeid = child.parentnodeid
|
399
|
and nodes.parentnodeid = parent.nodeid
|
400
|
and nodes.nodename = 'url'
|
401
|
and parent.nodename = 'online'
|
402
|
and child.nodedata like 'ecogrid%'
|
403
|
and child.nodetype = 'TEXT'
|
404
|
and id.guid in (select guid from doi_registration);
|
405
|
|
406
|
-- NOTE: some of thee maps might not actually exist
|
407
|
update doi_registration doi
|
408
|
set resourceMapId = 'resourceMap_'||eco.docid||'.'||eco.rev
|
409
|
from identifier id, ecogrid_docids eco
|
410
|
where doi.guid = id.guid
|
411
|
and id.docid||'.'||id.rev = eco.data_docid;
|
412
|
|
413
|
select count(*)
|
414
|
from doi_registration
|
415
|
where resourceMapId is null;
|
416
|
|
417
|
--update the resource map location
|
418
|
update doi_registration
|
419
|
set resourceMapLocation = 'https://cn.dataone.org/cn/v1/resolve/' || resourceMapId
|
420
|
where resourceMapId is not null;
|
421
|
|
422
|
-- fix null values (should not have to have this now, but it will not hurt)
|
423
|
update doi_registration doi
|
424
|
set creator = user_owner
|
425
|
from identifier id, xml_documents docs
|
426
|
where doi.guid = id.guid
|
427
|
and id.docid = docs.docid
|
428
|
and id.rev= docs.rev
|
429
|
and creator is null;
|
430
|
|
431
|
update doi_registration doi
|
432
|
set publisher = user_owner
|
433
|
from identifier id, xml_documents docs
|
434
|
where doi.guid = id.guid
|
435
|
and id.docid = docs.docid
|
436
|
and id.rev= docs.rev
|
437
|
and publisher is null;
|
438
|
|
439
|
update doi_registration doi
|
440
|
set creator = user_owner
|
441
|
from identifier id, xml_revisions docs
|
442
|
where doi.guid = id.guid
|
443
|
and id.docid = docs.docid
|
444
|
and id.rev= docs.rev
|
445
|
and creator is null;
|
446
|
|
447
|
update doi_registration doi
|
448
|
set publisher = user_owner
|
449
|
from identifier id, xml_revisions docs
|
450
|
where doi.guid = id.guid
|
451
|
and id.docid = docs.docid
|
452
|
and id.rev= docs.rev
|
453
|
and publisher is null;
|
454
|
|
455
|
--update creator usinig LDAP lookup
|
456
|
CREATE TABLE ecoinfo_dn (dn text, givenName text, sn text);
|
457
|
COPY ecoinfo_dn FROM '/tmp/ecoinfo_dn.csv' WITH CSV HEADER;
|
458
|
|
459
|
update doi_registration
|
460
|
set creator =
|
461
|
case when givenName is null then
|
462
|
sn
|
463
|
else
|
464
|
givenName|| ' ' || sn
|
465
|
end
|
466
|
from ecoinfo_dn
|
467
|
where creator = dn;
|
468
|
|
469
|
update doi_registration
|
470
|
set publisher =
|
471
|
case when givenName is null then
|
472
|
sn
|
473
|
else
|
474
|
givenName|| ' ' || sn
|
475
|
end
|
476
|
from ecoinfo_dn
|
477
|
where publisher = dn;
|
478
|
|
479
|
select creator, count(*) as cnt
|
480
|
from doi_registration
|
481
|
where creator like 'uid=%'
|
482
|
group by creator
|
483
|
order by cnt desc;
|
484
|
|
485
|
-- copy to the external file
|
486
|
COPY
|
487
|
(select ezid_account,
|
488
|
guid as dc_identifier,
|
489
|
url as datacite_url,
|
490
|
title as dc_title,
|
491
|
creator as dc_creator,
|
492
|
publisher as dc_publisher,
|
493
|
pub_date as datacite_publicationYear,
|
494
|
resourceType,
|
495
|
objectFormat,
|
496
|
obsoletedBy,
|
497
|
obsoletes,
|
498
|
resourceMapId,
|
499
|
resourceMapLocation,
|
500
|
access
|
501
|
from doi_registration
|
502
|
order by dc_identifier)
|
503
|
TO '/tmp/doi_registration.csv'
|
504
|
WITH CSV HEADER;
|
505
|
--164548
|
506
|
|
507
|
--drop table doi_registration;
|
508
|
--drop table ecogrid_docids;
|
509
|
--drop table ecoinfo_dn;
|
510
|
|