1
|
-- use default values initially
|
2
|
CREATE TABLE doi_registration AS
|
3
|
|
4
|
select
|
5
|
text('testing') as ezid_account,
|
6
|
doc.doctype,
|
7
|
id.guid,
|
8
|
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url,
|
9
|
text('Unknown') as title,
|
10
|
text(null) as creator,
|
11
|
text(null) as publisher,
|
12
|
to_char(doc.date_created, 'YYYY') as pub_date
|
13
|
from identifier id, xml_documents doc
|
14
|
where guid like 'doi%'
|
15
|
and id.docid = doc.docid
|
16
|
and id.rev = doc.rev
|
17
|
|
18
|
UNION ALL
|
19
|
|
20
|
select
|
21
|
text('testing') as ezid_account,
|
22
|
doc.doctype,
|
23
|
id.guid,
|
24
|
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url,
|
25
|
text('Unknown') as title,
|
26
|
text(null) as creator,
|
27
|
text(null) as publisher,
|
28
|
to_char(doc.date_created, 'YYYY') as pub_date
|
29
|
from identifier id, xml_revisions doc
|
30
|
where guid like 'doi%'
|
31
|
and id.docid = doc.docid
|
32
|
and id.rev = doc.rev;
|
33
|
|
34
|
--update defaults
|
35
|
update doi_registration
|
36
|
set title = 'Data file'
|
37
|
where doctype = 'BIN';
|
38
|
|
39
|
update doi_registration
|
40
|
set title = 'Legacy EML file'
|
41
|
where doctype like '%eml%2.0.0beta%';
|
42
|
|
43
|
-- update title using node information
|
44
|
update doi_registration doi
|
45
|
set title = child.nodedata
|
46
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
47
|
where doi.guid = id.guid
|
48
|
and id.docid = docs.docid
|
49
|
and id.rev = docs.rev
|
50
|
and docs.rootnodeid = nodes.rootnodeid
|
51
|
and nodes.nodeid = child.parentnodeid
|
52
|
and nodes.parentnodeid = parent.nodeid
|
53
|
and nodes.nodename = 'title'
|
54
|
and parent.nodename = 'dataset';
|
55
|
|
56
|
-- update pubDate using nodes
|
57
|
update doi_registration doi
|
58
|
set pub_date = child.nodedata
|
59
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
60
|
where doi.guid = id.guid
|
61
|
and id.docid = docs.docid
|
62
|
and id.rev = docs.rev
|
63
|
and docs.rootnodeid = nodes.rootnodeid
|
64
|
and nodes.nodeid = child.parentnodeid
|
65
|
and nodes.parentnodeid = parent.nodeid
|
66
|
and nodes.nodename = 'pubDate'
|
67
|
and parent.nodename = 'dataset';
|
68
|
|
69
|
-- update publisher using nodes
|
70
|
update doi_registration doi
|
71
|
set publisher = child.nodedata
|
72
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
73
|
where doi.guid = id.guid
|
74
|
and id.docid = docs.docid
|
75
|
and id.rev = docs.rev
|
76
|
and docs.rootnodeid = nodes.rootnodeid
|
77
|
and nodes.nodeid = child.parentnodeid
|
78
|
and nodes.parentnodeid = parent.nodeid
|
79
|
and nodes.nodename = 'organizationName'
|
80
|
and parent.nodename = 'publisher'
|
81
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
82
|
|
83
|
-- update publisher surName
|
84
|
update doi_registration doi
|
85
|
set publisher = child.nodedata
|
86
|
from identifier id,
|
87
|
xml_documents docs,
|
88
|
xml_nodes nodes,
|
89
|
xml_nodes parent,
|
90
|
xml_nodes child,
|
91
|
xml_nodes grandparent
|
92
|
where doi.guid = id.guid
|
93
|
and id.docid = docs.docid
|
94
|
and id.rev = docs.rev
|
95
|
and docs.rootnodeid = nodes.rootnodeid
|
96
|
and nodes.nodeid = child.parentnodeid
|
97
|
and nodes.parentnodeid = parent.nodeid
|
98
|
and parent.parentnodeid = grandparent.nodeid
|
99
|
and nodes.nodename = 'surName'
|
100
|
and parent.nodename = 'individualName'
|
101
|
and grandparent.nodename = 'publisher'
|
102
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
103
|
|
104
|
-- add the first name if we have it
|
105
|
update doi_registration doi
|
106
|
set publisher = publisher ||', '|| child.nodedata
|
107
|
from identifier id,
|
108
|
xml_documents docs,
|
109
|
xml_nodes nodes,
|
110
|
xml_nodes parent,
|
111
|
xml_nodes child,
|
112
|
xml_nodes grandparent
|
113
|
where doi.guid = id.guid
|
114
|
and id.docid = docs.docid
|
115
|
and id.rev = docs.rev
|
116
|
and docs.rootnodeid = nodes.rootnodeid
|
117
|
and nodes.nodeid = child.parentnodeid
|
118
|
and nodes.parentnodeid = parent.nodeid
|
119
|
and parent.parentnodeid = grandparent.nodeid
|
120
|
and nodes.nodename = 'givenName'
|
121
|
and parent.nodename = 'individualName'
|
122
|
and grandparent.nodename = 'publisher'
|
123
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
124
|
|
125
|
-- update organization using nodes
|
126
|
update doi_registration doi
|
127
|
set creator = child.nodedata
|
128
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
129
|
where doi.guid = id.guid
|
130
|
and id.docid = docs.docid
|
131
|
and id.rev = docs.rev
|
132
|
and docs.rootnodeid = nodes.rootnodeid
|
133
|
and nodes.nodeid = child.parentnodeid
|
134
|
and nodes.parentnodeid = parent.nodeid
|
135
|
and nodes.nodename = 'organizationName'
|
136
|
and parent.nodename = 'creator'
|
137
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
138
|
|
139
|
-- update creator using nodes
|
140
|
-- creator/individualName/surName
|
141
|
-- creator/individualName/givenName
|
142
|
update doi_registration doi
|
143
|
set creator = child.nodedata
|
144
|
from identifier id,
|
145
|
xml_documents docs,
|
146
|
xml_nodes nodes,
|
147
|
xml_nodes parent,
|
148
|
xml_nodes child,
|
149
|
xml_nodes grandparent
|
150
|
where doi.guid = id.guid
|
151
|
and id.docid = docs.docid
|
152
|
and id.rev = docs.rev
|
153
|
and docs.rootnodeid = nodes.rootnodeid
|
154
|
and nodes.nodeid = child.parentnodeid
|
155
|
and nodes.parentnodeid = parent.nodeid
|
156
|
and parent.parentnodeid = grandparent.nodeid
|
157
|
and nodes.nodename = 'surName'
|
158
|
and parent.nodename = 'individualName'
|
159
|
and grandparent.nodename = 'creator'
|
160
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
161
|
|
162
|
-- add the first name if we have it
|
163
|
update doi_registration doi
|
164
|
set creator = creator ||', '|| child.nodedata
|
165
|
from identifier id,
|
166
|
xml_documents docs,
|
167
|
xml_nodes nodes,
|
168
|
xml_nodes parent,
|
169
|
xml_nodes child,
|
170
|
xml_nodes grandparent
|
171
|
where doi.guid = id.guid
|
172
|
and id.docid = docs.docid
|
173
|
and id.rev = docs.rev
|
174
|
and docs.rootnodeid = nodes.rootnodeid
|
175
|
and nodes.nodeid = child.parentnodeid
|
176
|
and nodes.parentnodeid = parent.nodeid
|
177
|
and parent.parentnodeid = grandparent.nodeid
|
178
|
and nodes.nodename = 'givenName'
|
179
|
and parent.nodename = 'individualName'
|
180
|
and grandparent.nodename = 'creator'
|
181
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
182
|
|
183
|
-- update title using revisions
|
184
|
update doi_registration doi
|
185
|
set title = child.nodedata
|
186
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
187
|
where doi.guid = id.guid
|
188
|
and id.docid = docs.docid
|
189
|
and id.rev = docs.rev
|
190
|
and docs.rootnodeid = nodes.rootnodeid
|
191
|
and nodes.nodeid = child.parentnodeid
|
192
|
and nodes.parentnodeid = parent.nodeid
|
193
|
and nodes.nodename = 'title'
|
194
|
and parent.nodename = 'dataset';
|
195
|
|
196
|
-- update pubDate using revisions
|
197
|
update doi_registration doi
|
198
|
set pub_date = child.nodedata
|
199
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
200
|
where doi.guid = id.guid
|
201
|
and id.docid = docs.docid
|
202
|
and id.rev = docs.rev
|
203
|
and docs.rootnodeid = nodes.rootnodeid
|
204
|
and nodes.nodeid = child.parentnodeid
|
205
|
and nodes.parentnodeid = parent.nodeid
|
206
|
and nodes.nodename = 'pubDate'
|
207
|
and parent.nodename = 'dataset';
|
208
|
|
209
|
-- update publisher using revisions
|
210
|
update doi_registration doi
|
211
|
set publisher = child.nodedata
|
212
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
213
|
where doi.guid = id.guid
|
214
|
and id.docid = docs.docid
|
215
|
and id.rev = docs.rev
|
216
|
and docs.rootnodeid = nodes.rootnodeid
|
217
|
and nodes.nodeid = child.parentnodeid
|
218
|
and nodes.parentnodeid = parent.nodeid
|
219
|
and nodes.nodename = 'organizationName'
|
220
|
and parent.nodename = 'publisher'
|
221
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
222
|
|
223
|
-- use publisher individual name if we have it
|
224
|
update doi_registration doi
|
225
|
set publisher = child.nodedata
|
226
|
from identifier id,
|
227
|
xml_revisions docs,
|
228
|
xml_nodes_revisions nodes,
|
229
|
xml_nodes_revisions parent,
|
230
|
xml_nodes_revisions child,
|
231
|
xml_nodes_revisions grandparent
|
232
|
where doi.guid = id.guid
|
233
|
and id.docid = docs.docid
|
234
|
and id.rev = docs.rev
|
235
|
and docs.rootnodeid = nodes.rootnodeid
|
236
|
and nodes.nodeid = child.parentnodeid
|
237
|
and nodes.parentnodeid = parent.nodeid
|
238
|
and parent.parentnodeid = grandparent.nodeid
|
239
|
and nodes.nodename = 'surName'
|
240
|
and parent.nodename = 'individualName'
|
241
|
and grandparent.nodename = 'publisher'
|
242
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
243
|
|
244
|
--include the first name (careful not to rerun this part)
|
245
|
update doi_registration doi
|
246
|
set publisher = publisher ||', '|| child.nodedata
|
247
|
from identifier id,
|
248
|
xml_revisions docs,
|
249
|
xml_nodes_revisions nodes,
|
250
|
xml_nodes_revisions parent,
|
251
|
xml_nodes_revisions child,
|
252
|
xml_nodes_revisions grandparent
|
253
|
where doi.guid = id.guid
|
254
|
and id.docid = docs.docid
|
255
|
and id.rev = docs.rev
|
256
|
and docs.rootnodeid = nodes.rootnodeid
|
257
|
and nodes.nodeid = child.parentnodeid
|
258
|
and nodes.parentnodeid = parent.nodeid
|
259
|
and parent.parentnodeid = grandparent.nodeid
|
260
|
and nodes.nodename = 'givenName'
|
261
|
and parent.nodename = 'individualName'
|
262
|
and grandparent.nodename = 'publisher'
|
263
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
264
|
|
265
|
-- update organization using nodes
|
266
|
update doi_registration doi
|
267
|
set creator = child.nodedata
|
268
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
269
|
where doi.guid = id.guid
|
270
|
and id.docid = docs.docid
|
271
|
and id.rev = docs.rev
|
272
|
and docs.rootnodeid = nodes.rootnodeid
|
273
|
and nodes.nodeid = child.parentnodeid
|
274
|
and nodes.parentnodeid = parent.nodeid
|
275
|
and nodes.nodename = 'organizationName'
|
276
|
and parent.nodename = 'creator'
|
277
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
278
|
|
279
|
-- update creator using revisions
|
280
|
update doi_registration doi
|
281
|
set creator = child.nodedata
|
282
|
from identifier id,
|
283
|
xml_revisions docs,
|
284
|
xml_nodes_revisions nodes,
|
285
|
xml_nodes_revisions parent,
|
286
|
xml_nodes_revisions child,
|
287
|
xml_nodes_revisions grandparent
|
288
|
where doi.guid = id.guid
|
289
|
and id.docid = docs.docid
|
290
|
and id.rev = docs.rev
|
291
|
and docs.rootnodeid = nodes.rootnodeid
|
292
|
and nodes.nodeid = child.parentnodeid
|
293
|
and nodes.parentnodeid = parent.nodeid
|
294
|
and parent.parentnodeid = grandparent.nodeid
|
295
|
and nodes.nodename = 'surName'
|
296
|
and parent.nodename = 'individualName'
|
297
|
and grandparent.nodename = 'creator'
|
298
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
299
|
|
300
|
-- add the first name if we have it
|
301
|
update doi_registration doi
|
302
|
set creator = creator ||', '|| child.nodedata
|
303
|
from identifier id,
|
304
|
xml_revisions docs,
|
305
|
xml_nodes_revisions nodes,
|
306
|
xml_nodes_revisions parent,
|
307
|
xml_nodes_revisions child,
|
308
|
xml_nodes_revisions grandparent
|
309
|
where doi.guid = id.guid
|
310
|
and id.docid = docs.docid
|
311
|
and id.rev = docs.rev
|
312
|
and docs.rootnodeid = nodes.rootnodeid
|
313
|
and nodes.nodeid = child.parentnodeid
|
314
|
and nodes.parentnodeid = parent.nodeid
|
315
|
and parent.parentnodeid = grandparent.nodeid
|
316
|
and nodes.nodename = 'givenName'
|
317
|
and parent.nodename = 'individualName'
|
318
|
and grandparent.nodename = 'creator'
|
319
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
320
|
|
321
|
-- use xml_documents for defaults that are still missing
|
322
|
update doi_registration doi
|
323
|
set creator = doc.user_owner
|
324
|
from xml_documents doc, identifier id
|
325
|
where doi.guid = id.guid
|
326
|
and id.docid = doc.docid
|
327
|
and id.rev = doc.rev
|
328
|
and doi.creator is null;
|
329
|
|
330
|
update doi_registration doi
|
331
|
set creator = doc.user_owner
|
332
|
from xml_revisions doc, identifier id
|
333
|
where doi.guid = id.guid
|
334
|
and id.docid = doc.docid
|
335
|
and id.rev = doc.rev
|
336
|
and doi.creator is null;
|
337
|
|
338
|
-- set publisher
|
339
|
update doi_registration
|
340
|
set publisher = creator
|
341
|
where publisher is null or publisher = '';
|
342
|
|
343
|
-- clean up
|
344
|
update doi_registration
|
345
|
set title = replace(title, E'\n', ' ');
|
346
|
|
347
|
update doi_registration
|
348
|
set title = regexp_replace(title, E'\\s+', ' ', 'g');
|
349
|
|
350
|
update doi_registration
|
351
|
set title = regexp_replace(title, E'^\\s+', '', 'g');
|
352
|
|
353
|
update doi_registration
|
354
|
set ezid_account = 'sb_nceas'
|
355
|
where guid like 'doi:10.5063%';
|
356
|
--16988
|
357
|
|
358
|
update doi_registration
|
359
|
set ezid_account = 'sb_pisco'
|
360
|
where guid like 'doi:10.6085%';
|
361
|
--98078
|
362
|
|
363
|
update doi_registration
|
364
|
set ezid_account = 'lternet'
|
365
|
where guid like 'doi:10.6073%';
|
366
|
--49482
|
367
|
|
368
|
/**
|
369
|
* Additional modifications, July 5th, 2012
|
370
|
*/
|
371
|
ALTER TABLE doi_registration
|
372
|
ADD COLUMN resourceType text,
|
373
|
ADD COLUMN objectFormat text,
|
374
|
ADD COLUMN obsoletedBy text,
|
375
|
ADD COLUMN obsoletes text,
|
376
|
ADD COLUMN resourceMapId text,
|
377
|
ADD COLUMN resourceMapLocation text,
|
378
|
ADD COLUMN access text;
|
379
|
|
380
|
-- update access values
|
381
|
update doi_registration
|
382
|
set access = 'protected';
|
383
|
|
384
|
update doi_registration doi
|
385
|
set access = 'public'
|
386
|
from xml_access a
|
387
|
where doi.guid = a.guid
|
388
|
and a.principal_name = 'public'
|
389
|
and a.permission >= '4'
|
390
|
and a.perm_type = 'allow';
|
391
|
|
392
|
-- update the objectFormat from xml_documents
|
393
|
update doi_registration doi
|
394
|
set objectFormat = doc.doctype
|
395
|
from identifier id, xml_documents doc
|
396
|
where doi.guid = id.guid
|
397
|
and id.docid = doc.docid
|
398
|
and id.rev = doc.rev;
|
399
|
|
400
|
-- update the objectFormat from xml_revisions
|
401
|
update doi_registration doi
|
402
|
set objectFormat = rev.doctype
|
403
|
from identifier id, xml_revisions rev
|
404
|
where doi.guid = id.guid
|
405
|
and id.docid = rev.docid
|
406
|
and id.rev = rev.rev;
|
407
|
|
408
|
--update resourceType
|
409
|
update doi_registration
|
410
|
set resourceType = 'Dataset/metadata';
|
411
|
|
412
|
update doi_registration
|
413
|
set resourceType = 'Dataset/data'
|
414
|
where objectFormat = 'BIN';
|
415
|
|
416
|
-- update the data file titles
|
417
|
update doi_registration doi
|
418
|
set title = 'Data file - ' || doc.docname
|
419
|
from identifier id, xml_documents doc
|
420
|
where doi.guid = id.guid
|
421
|
and id.docid = doc.docid
|
422
|
and id.rev = doc.rev
|
423
|
and doi.resourceType = 'Dataset/data';
|
424
|
|
425
|
-- update the data file titles from revisions
|
426
|
update doi_registration doi
|
427
|
set title = 'Data file - ' || doc.docname
|
428
|
from identifier id, xml_revisions doc
|
429
|
where doi.guid = id.guid
|
430
|
and id.docid = doc.docid
|
431
|
and id.rev = doc.rev
|
432
|
and doi.resourceType = 'Dataset/data';
|
433
|
|
434
|
-- update the objectFormat from SM table (will be subset)
|
435
|
update doi_registration doi
|
436
|
set objectFormat = sm.object_format
|
437
|
from systemMetadata sm
|
438
|
where doi.guid = sm.guid;
|
439
|
--16938
|
440
|
|
441
|
--update revision history from SM
|
442
|
update doi_registration doi
|
443
|
set obsoletes = sm.obsoletes,
|
444
|
obsoletedBy = sm.obsoleted_by
|
445
|
from systemMetadata sm
|
446
|
where doi.guid = sm.guid;
|
447
|
|
448
|
/** use plain old revision history **/
|
449
|
-- update obsoletedby
|
450
|
update doi_registration doi
|
451
|
set obsoletedBy = newer.guid
|
452
|
from identifier id, identifier newer
|
453
|
where doi.guid = id.guid
|
454
|
and id.docid = newer.docid
|
455
|
and newer.rev = (select min(next.rev) from identifier next where next.docid = id.docid and next.rev > id.rev);
|
456
|
|
457
|
-- update the obsolets
|
458
|
update doi_registration doi
|
459
|
set obsoletes = older.guid
|
460
|
from identifier id, identifier older
|
461
|
where doi.guid = id.guid
|
462
|
and id.docid = older.docid
|
463
|
and older.rev = (select max(prev.rev) from identifier prev where prev.docid = id.docid and prev.rev < id.rev);
|
464
|
|
465
|
/**
|
466
|
select doi.guid, older.guid as obsoletes
|
467
|
from doi_registration doi, identifier id, identifier older
|
468
|
where doi.guid = id.guid
|
469
|
and id.docid = older.docid
|
470
|
and older.rev = (select max(prev.rev) from identifier prev where prev.docid = id.docid and prev.rev < id.rev)
|
471
|
and doi.guid like 'doi:10.5063/AA/ABS.4%';
|
472
|
**/
|
473
|
|
474
|
|
475
|
--update the resourcemapid for described data
|
476
|
CREATE TABLE ecogrid_docids AS
|
477
|
select distinct id.docid, id.rev, id.guid, substring(xpi.nodedata from 'ecogrid://knb/(.*)$') as data_docid
|
478
|
from xml_path_index xpi, identifier id, xml_documents xmld
|
479
|
where xpi.docid = xmld.docid
|
480
|
and xmld.docid = id.docid
|
481
|
and xmld.rev = id.rev
|
482
|
and xpi.path like 'dataset/%/physical/distribution/online/url'
|
483
|
and xpi.nodedata like 'ecogrid%'
|
484
|
and id.guid in (select guid from doi_registration);
|
485
|
|
486
|
-- include revisions for described data
|
487
|
INSERT INTO ecogrid_docids
|
488
|
select distinct id.docid, id.rev, id.guid, substring(child.nodedata from 'ecogrid://knb/(.*)$') as data_docid
|
489
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
490
|
where id.docid = docs.docid
|
491
|
and id.rev = docs.rev
|
492
|
and docs.rootnodeid = nodes.rootnodeid
|
493
|
and nodes.nodeid = child.parentnodeid
|
494
|
and nodes.parentnodeid = parent.nodeid
|
495
|
and nodes.nodename = 'url'
|
496
|
and parent.nodename = 'online'
|
497
|
and child.nodedata like 'ecogrid%'
|
498
|
and child.nodetype = 'TEXT'
|
499
|
and id.guid in (select guid from doi_registration);
|
500
|
|
501
|
-- Set the resource map for the data files (NOTE: some of thee maps might not actually exist on the system depending on how successful the ORE generation was)
|
502
|
update doi_registration doi
|
503
|
set resourceMapId = 'resourceMap_'||eco.docid||'.'||eco.rev
|
504
|
from identifier id, ecogrid_docids eco
|
505
|
where doi.guid = id.guid
|
506
|
and id.docid||'.'||id.rev = eco.data_docid;
|
507
|
|
508
|
-- set the resource map id for the metadata file that did most of the packaging work!
|
509
|
update doi_registration doi
|
510
|
set resourceMapId = 'resourceMap_'||eco.docid||'.'||eco.rev
|
511
|
from ecogrid_docids eco
|
512
|
where doi.guid = eco.guid;
|
513
|
|
514
|
select count(*)
|
515
|
from doi_registration
|
516
|
where resourceMapId is null;
|
517
|
|
518
|
--update the resource map location
|
519
|
update doi_registration
|
520
|
set resourceMapLocation = 'https://cn.dataone.org/cn/v1/resolve/' || resourceMapId
|
521
|
where resourceMapId is not null;
|
522
|
|
523
|
-- fix null values (should not have to have this now, but it will not hurt)
|
524
|
update doi_registration doi
|
525
|
set creator = user_owner
|
526
|
from identifier id, xml_documents docs
|
527
|
where doi.guid = id.guid
|
528
|
and id.docid = docs.docid
|
529
|
and id.rev= docs.rev
|
530
|
and (creator is null or trim(creator) = '');
|
531
|
|
532
|
update doi_registration doi
|
533
|
set publisher = user_owner
|
534
|
from identifier id, xml_documents docs
|
535
|
where doi.guid = id.guid
|
536
|
and id.docid = docs.docid
|
537
|
and id.rev= docs.rev
|
538
|
and (publisher is null or trim(publisher) = '');
|
539
|
|
540
|
update doi_registration doi
|
541
|
set creator = user_owner
|
542
|
from identifier id, xml_revisions docs
|
543
|
where doi.guid = id.guid
|
544
|
and id.docid = docs.docid
|
545
|
and id.rev= docs.rev
|
546
|
and (creator is null or trim(creator) = '');
|
547
|
|
548
|
update doi_registration doi
|
549
|
set publisher = user_owner
|
550
|
from identifier id, xml_revisions docs
|
551
|
where doi.guid = id.guid
|
552
|
and id.docid = docs.docid
|
553
|
and id.rev= docs.rev
|
554
|
and (publisher is null or trim(publisher) = '');
|
555
|
|
556
|
-- fix a previous mistake
|
557
|
update doi_registration
|
558
|
set publisher = null
|
559
|
where publisher = 'document';
|
560
|
|
561
|
--update creator usinig LDAP lookup
|
562
|
CREATE TABLE ecoinfo_dn (dn text, givenName text, sn text);
|
563
|
--TRUNCATE TABLE ecoinfo_dn;
|
564
|
COPY ecoinfo_dn FROM '/tmp/ecoinfo_dn.csv' WITH CSV HEADER;
|
565
|
|
566
|
update doi_registration
|
567
|
set creator =
|
568
|
case when givenName is null then
|
569
|
sn
|
570
|
else
|
571
|
sn || ', ' || givenName
|
572
|
end
|
573
|
from ecoinfo_dn
|
574
|
where trim(both from lower(creator)) = lower(dn);
|
575
|
|
576
|
update doi_registration
|
577
|
set publisher =
|
578
|
case when givenName is null then
|
579
|
sn
|
580
|
else
|
581
|
sn || ', ' || givenName
|
582
|
end
|
583
|
from ecoinfo_dn
|
584
|
where trim(both from lower(publisher)) = lower(dn);
|
585
|
|
586
|
select creator, count(*) as cnt
|
587
|
from doi_registration
|
588
|
where creator like 'uid=%'
|
589
|
group by creator
|
590
|
order by cnt desc;
|
591
|
|
592
|
-- update the data files with owner/publisher from EML entry
|
593
|
update doi_registration doi
|
594
|
set creator = meta.creator,
|
595
|
publisher = meta.publisher
|
596
|
from doi_registration meta, identifier id, ecogrid_docids eco
|
597
|
where doi.guid = id.guid
|
598
|
and id.docid||'.'||id.rev = eco.data_docid
|
599
|
and eco.guid = meta.guid;
|
600
|
|
601
|
-- clean up KBS entries that have empty user_owner
|
602
|
update doi_registration
|
603
|
set creator = 'Kellogg Biological Station'
|
604
|
where (creator is null or trim(creator) = '')
|
605
|
and guid like '%knb-lter-kbs%';
|
606
|
|
607
|
update doi_registration
|
608
|
set publisher = 'Kellogg Biological Station'
|
609
|
where (publisher is null or trim(publisher) = '')
|
610
|
and guid like '%knb-lter-kbs%';
|
611
|
|
612
|
-- more clean up
|
613
|
update doi_registration
|
614
|
set creator = trim(creator),
|
615
|
publisher = trim(publisher);
|
616
|
|
617
|
-- set the publisher to the source system if it is the same as the creator
|
618
|
update doi_registration
|
619
|
set publisher =
|
620
|
case
|
621
|
when guid like 'doi:10.5063%' then
|
622
|
'Knowledge Network for Biocomplexity (KNB)'
|
623
|
when guid like 'doi:10.6085%' then
|
624
|
'Partnership for Interdisciplinary Studies of Coastal Oceans (PISCO)'
|
625
|
when guid like 'doi:10.6073%' then
|
626
|
'Long Term Ecological Research Network (LTER)'
|
627
|
else
|
628
|
publisher
|
629
|
end
|
630
|
where publisher = creator;
|
631
|
|
632
|
-- entity references
|
633
|
update doi_registration
|
634
|
set publisher = regexp_replace(publisher, '&', '&', 'g');
|
635
|
update doi_registration
|
636
|
set publisher = regexp_replace(publisher, ''', E'\'', 'g');
|
637
|
--accented i (í)
|
638
|
update doi_registration
|
639
|
set publisher = regexp_replace(publisher, 'í', 'í', 'g')
|
640
|
where publisher like '%í%';
|
641
|
update doi_registration
|
642
|
set creator = regexp_replace(creator, '&', '&', 'g');
|
643
|
update doi_registration
|
644
|
set creator = regexp_replace(creator, ''', E'\'', 'g');
|
645
|
|
646
|
--titles
|
647
|
update doi_registration
|
648
|
set title = regexp_replace(title, '&', '&', 'g');
|
649
|
update doi_registration
|
650
|
set title = regexp_replace(title, '∓', '&', 'g')
|
651
|
where title like '%∓%';
|
652
|
update doi_registration
|
653
|
set title = regexp_replace(title, ''', E'\'', 'g');
|
654
|
update doi_registration
|
655
|
set title = regexp_replace(title, '"', E'\'', 'g');
|
656
|
update doi_registration
|
657
|
set title = regexp_replace(title, '>', '>', 'g');
|
658
|
update doi_registration
|
659
|
set title = regexp_replace(title, '<', '<', 'g');
|
660
|
update doi_registration
|
661
|
set title = regexp_replace(title, 'í', 'í', 'g')
|
662
|
where title like '%í%';
|
663
|
update doi_registration
|
664
|
set title = regexp_replace(title, 'í', 'í', 'g')
|
665
|
where title like '%í%';
|
666
|
update doi_registration
|
667
|
set title = regexp_replace(title, 'ñ', 'í', 'g')
|
668
|
where title like '%ñ%';
|
669
|
update doi_registration
|
670
|
set title = regexp_replace(title, 'ó', 'ó', 'g')
|
671
|
where title like '%ó%';
|
672
|
update doi_registration
|
673
|
set title = regexp_replace(title, 'ã', 'ã', 'g')
|
674
|
where title like '%ã%';
|
675
|
update doi_registration
|
676
|
set title = regexp_replace(title, 'ç', 'ç', 'g')
|
677
|
where title like '%ç%';
|
678
|
update doi_registration
|
679
|
set title = regexp_replace(title, 'é', 'é', 'g')
|
680
|
where title like '%é%';
|
681
|
update doi_registration
|
682
|
set title = regexp_replace(title, 'ñ', 'ñ', 'g')
|
683
|
where title like '%ñ%';
|
684
|
update doi_registration
|
685
|
set title = regexp_replace(title, '–', '-', 'g')
|
686
|
where title like '%–%';
|
687
|
update doi_registration
|
688
|
set title = regexp_replace(title, '’', E'\'', 'g')
|
689
|
where title like '%’%';
|
690
|
update doi_registration
|
691
|
set title = regexp_replace(title, '–', '/', 'g')
|
692
|
where title like '%–%';
|
693
|
update doi_registration
|
694
|
set title = regexp_replace(title, 'á', 'á', 'g')
|
695
|
where title like '%á%';
|
696
|
update doi_registration
|
697
|
set title = regexp_replace(title, 'ó', 'ó', 'g')
|
698
|
where title like '%ó%';
|
699
|
--格式範例 (格式范例)
|
700
|
update doi_registration
|
701
|
set title = regexp_replace(title, '格式範例', '格式范例', 'g')
|
702
|
where title like '%格式範例%';
|
703
|
|
704
|
--directional quotes
|
705
|
update doi_registration
|
706
|
set title = regexp_replace(title, '”', '"', 'g')
|
707
|
where title like '%”%';
|
708
|
update doi_registration
|
709
|
set title = regexp_replace(title, '“', '"', 'g')
|
710
|
where title like '%“%';
|
711
|
--one off apostrophe
|
712
|
update doi_registration
|
713
|
set title = regexp_replace(title, 'Õ', E'\'', 'g')
|
714
|
where title like '%Õ%';
|
715
|
update doi_registration
|
716
|
set title = regexp_replace(title, '–', '-', 'g')
|
717
|
where title like '%–%';
|
718
|
|
719
|
--after reading the complete metadata, I could interpret these as "greater than"
|
720
|
update doi_registration
|
721
|
set title = regexp_replace(title, '&t;', '>', 'g')
|
722
|
where title like '%&t;%'
|
723
|
and guid in ('doi:10.6073/AA/knb-lter-luq.15.1', 'doi:10.6073/AA/knb-lter-luq.39.1', 'doi:10.6073/AA/knb-lter-luq.37.1', 'doi:10.6073/AA/knb-lter-luq.40.1', 'doi:10.6073/AA/knb-lter-luq.18.1' );
|
724
|
|
725
|
|
726
|
--special characters
|
727
|
update doi_registration
|
728
|
set creator = regexp_replace(creator, 'é', 'é', 'g')
|
729
|
where creator like '%é%';
|
730
|
update doi_registration
|
731
|
set creator = regexp_replace(creator, 'ç', 'ç', 'g')
|
732
|
where creator like '%ç%';
|
733
|
update doi_registration
|
734
|
set creator = regexp_replace(creator, 'ñ', 'ñ', 'g')
|
735
|
where creator like '%ñ%';
|
736
|
-- Gutiérrez, Ralph, J.
|
737
|
-- Liébault, Frédéric
|
738
|
update doi_registration
|
739
|
set creator = regexp_replace(creator, 'é', 'é', 'g')
|
740
|
where creator like '%é%';
|
741
|
-- Ramírez, Alonso
|
742
|
update doi_registration
|
743
|
set creator = regexp_replace(creator, 'í', 'í', 'g')
|
744
|
where creator like '%í%';
|
745
|
-- González, Grizelle
|
746
|
update doi_registration
|
747
|
set creator = regexp_replace(creator, 'á', 'á', 'g')
|
748
|
where creator like '%á%';
|
749
|
--王, (王, 名, 姓)
|
750
|
update doi_registration
|
751
|
set creator = regexp_replace(creator, '王', '王', 'g')
|
752
|
where creator like '%王%';
|
753
|
--名
|
754
|
update doi_registration
|
755
|
set creator = regexp_replace(creator, '名', '名', 'g')
|
756
|
where creator like '%名%';
|
757
|
--姓
|
758
|
update doi_registration
|
759
|
set creator = regexp_replace(creator, '姓', '姓', 'g')
|
760
|
where creator like '%姓%';
|
761
|
-- These are left
|
762
|
-- Helmbrecht, S ’rai
|
763
|
-- lkñlkl, Fgfggf
|
764
|
|
765
|
-- clean up
|
766
|
update doi_registration
|
767
|
set publisher = replace(publisher, E'\n', ' ');
|
768
|
update doi_registration
|
769
|
set publisher = regexp_replace(publisher, E'\\s+', ' ', 'g');
|
770
|
update doi_registration
|
771
|
set publisher = regexp_replace(publisher, E'^\\s+', '', 'g');
|
772
|
|
773
|
-- use only year for pub_date
|
774
|
update doi_registration
|
775
|
set pub_date = to_char(date(pub_date), 'YYYY')
|
776
|
where length(pub_date) > 4
|
777
|
and pub_date like '%-%-%';
|
778
|
|
779
|
--set the rest to upload date, xml_documents
|
780
|
update doi_registration doi
|
781
|
set pub_date = to_char(date_created, 'YYYY')
|
782
|
from xml_documents x, identifier id
|
783
|
where length(pub_date) != 4
|
784
|
and doi.guid = id.guid
|
785
|
and id.docid = x.docid
|
786
|
and id.rev = x.rev;
|
787
|
-- revisions
|
788
|
update doi_registration doi
|
789
|
set pub_date = to_char(date_created, 'YYYY')
|
790
|
from xml_revisions x, identifier id
|
791
|
where length(pub_date) != 4
|
792
|
and doi.guid = id.guid
|
793
|
and id.docid = x.docid
|
794
|
and id.rev = x.rev;
|
795
|
|
796
|
update doi_registration
|
797
|
set creator = replace(creator, E'\n', ' ');
|
798
|
|
799
|
-- some entries got in with blank spaces
|
800
|
update doi_registration
|
801
|
set title = 'unknown'
|
802
|
where length(trim(title)) = 0;
|
803
|
|
804
|
update doi_registration
|
805
|
set creator = 'unknown'
|
806
|
where length(trim(creator)) = 0;
|
807
|
|
808
|
-- BES has very long publisher fields, so use this default for them all
|
809
|
update doi_registration
|
810
|
set publisher = 'Baltimore Ecosystem Study LTER'
|
811
|
where guid like '%knb-lter-bes%';
|
812
|
|
813
|
-- need URL identifiers for DataCite when they are not DOIs
|
814
|
update doi_registration
|
815
|
set obsoletedBy = 'https://cn.dataone.org/cn/v1/resolve/' || obsoletedBy
|
816
|
where obsoletedBy not like 'doi%';
|
817
|
|
818
|
update doi_registration
|
819
|
set obsoletes = 'https://cn.dataone.org/cn/v1/resolve/' || obsoletes
|
820
|
where obsoletes not like 'doi%';
|
821
|
|
822
|
ALTER TABLE doi_registration
|
823
|
ADD COLUMN obsoletedByIdType text,
|
824
|
ADD COLUMN obsoletesIdType text,
|
825
|
ADD COLUMN resourceMapLocationIdType text;
|
826
|
|
827
|
update doi_registration
|
828
|
set obsoletedByIdType = 'DOI'
|
829
|
where obsoletedBy is not null
|
830
|
and obsoletedBy like 'doi%';
|
831
|
|
832
|
update doi_registration
|
833
|
set obsoletedByIdType = 'URL'
|
834
|
where obsoletedBy is not null
|
835
|
and obsoletedBy not like 'doi%';
|
836
|
|
837
|
update doi_registration
|
838
|
set obsoletesIdType = 'DOI'
|
839
|
where obsoletes is not null
|
840
|
and obsoletes like 'doi%';
|
841
|
|
842
|
update doi_registration
|
843
|
set obsoletesIdType = 'URL'
|
844
|
where obsoletes is not null
|
845
|
and obsoletes not like 'doi%';
|
846
|
|
847
|
update doi_registration
|
848
|
set resourceMapLocationIdType = 'URL'
|
849
|
where resourceMapLocation is not null;
|
850
|
|
851
|
-- copy to the external file
|
852
|
COPY
|
853
|
(select ezid_account,
|
854
|
guid as dc_identifier,
|
855
|
url as datacite_url,
|
856
|
title as dc_title,
|
857
|
creator as dc_creator,
|
858
|
publisher as dc_publisher,
|
859
|
pub_date as datacite_publicationYear,
|
860
|
split_part(resourceType, '/', 1) as datacite_resourceTypeGeneral,
|
861
|
split_part(resourceType, '/', 2) as datacite_resourceType,
|
862
|
objectFormat as datacite_format,
|
863
|
obsoletedBy as datacite_relatedIdentifier_isPreviousVersionOf,
|
864
|
obsoletedByIdType as datacite_relatedIdentifier_isPreviousVersionOfType,
|
865
|
obsoletes as datacite_relatedIdentifier_isNewVersionOf,
|
866
|
obsoletesIdType as datacite_relatedIdentifier_isNewVersionOfType,
|
867
|
resourceMapLocation as datacite_relatedIdentifier_isPartOf,
|
868
|
resourceMapLocationIdType as datacite_relatedIdentifier_isPartOfType,
|
869
|
access as d1_read_access
|
870
|
from doi_registration
|
871
|
where access = 'public'
|
872
|
order by dc_identifier)
|
873
|
TO '/tmp/doi_registration.csv'
|
874
|
WITH CSV HEADER;
|
875
|
--164548
|
876
|
|
877
|
--drop table doi_registration;
|
878
|
--drop table ecogrid_docids;
|
879
|
--drop table ecoinfo_dn;
|
880
|
|