1
|
-- use default values initially
|
2
|
CREATE TABLE doi_registration AS
|
3
|
|
4
|
select
|
5
|
text('testing') as ezid_account,
|
6
|
doc.doctype,
|
7
|
id.guid,
|
8
|
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url,
|
9
|
text('Unknown') as title,
|
10
|
text(null) as creator,
|
11
|
text(null) as publisher,
|
12
|
to_char(doc.date_created, 'YYYY') as pub_date
|
13
|
from identifier id, xml_documents doc
|
14
|
where guid like 'doi%'
|
15
|
and id.docid = doc.docid
|
16
|
and id.rev = doc.rev
|
17
|
|
18
|
UNION ALL
|
19
|
|
20
|
select
|
21
|
text('testing') as ezid_account,
|
22
|
doc.doctype,
|
23
|
id.guid,
|
24
|
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url,
|
25
|
text('Unknown') as title,
|
26
|
text(null) as creator,
|
27
|
text(null) as publisher,
|
28
|
to_char(doc.date_created, 'YYYY') as pub_date
|
29
|
from identifier id, xml_revisions doc
|
30
|
where guid like 'doi%'
|
31
|
and id.docid = doc.docid
|
32
|
and id.rev = doc.rev;
|
33
|
|
34
|
--update defaults
|
35
|
update doi_registration
|
36
|
set title = 'Data file'
|
37
|
where doctype = 'BIN';
|
38
|
|
39
|
update doi_registration
|
40
|
set title = 'Legacy EML file'
|
41
|
where doctype like '%eml%2.0.0beta%';
|
42
|
|
43
|
-- update title using node information
|
44
|
update doi_registration doi
|
45
|
set title = child.nodedata
|
46
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
47
|
where doi.guid = id.guid
|
48
|
and id.docid = docs.docid
|
49
|
and id.rev = docs.rev
|
50
|
and docs.rootnodeid = nodes.rootnodeid
|
51
|
and nodes.nodeid = child.parentnodeid
|
52
|
and nodes.parentnodeid = parent.nodeid
|
53
|
and nodes.nodename = 'title'
|
54
|
and parent.nodename = 'dataset';
|
55
|
|
56
|
-- update pubDate using nodes
|
57
|
update doi_registration doi
|
58
|
set pub_date = child.nodedata
|
59
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
60
|
where doi.guid = id.guid
|
61
|
and id.docid = docs.docid
|
62
|
and id.rev = docs.rev
|
63
|
and docs.rootnodeid = nodes.rootnodeid
|
64
|
and nodes.nodeid = child.parentnodeid
|
65
|
and nodes.parentnodeid = parent.nodeid
|
66
|
and nodes.nodename = 'pubDate'
|
67
|
and parent.nodename = 'dataset';
|
68
|
|
69
|
-- update publisher using nodes
|
70
|
update doi_registration doi
|
71
|
set publisher = child.nodedata
|
72
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
73
|
where doi.guid = id.guid
|
74
|
and id.docid = docs.docid
|
75
|
and id.rev = docs.rev
|
76
|
and docs.rootnodeid = nodes.rootnodeid
|
77
|
and nodes.nodeid = child.parentnodeid
|
78
|
and nodes.parentnodeid = parent.nodeid
|
79
|
and nodes.nodename = 'publisher'
|
80
|
and parent.nodename = 'dataset';
|
81
|
|
82
|
-- update organization using nodes
|
83
|
update doi_registration doi
|
84
|
set creator = child.nodedata
|
85
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
86
|
where doi.guid = id.guid
|
87
|
and id.docid = docs.docid
|
88
|
and id.rev = docs.rev
|
89
|
and docs.rootnodeid = nodes.rootnodeid
|
90
|
and nodes.nodeid = child.parentnodeid
|
91
|
and nodes.parentnodeid = parent.nodeid
|
92
|
and nodes.nodename = 'organizationName'
|
93
|
and parent.nodename = 'creator'
|
94
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
95
|
|
96
|
-- update creator using nodes
|
97
|
-- creator/individualName/surName
|
98
|
-- creator/individualName/givenName
|
99
|
update doi_registration doi
|
100
|
set creator = child.nodedata
|
101
|
from identifier id,
|
102
|
xml_documents docs,
|
103
|
xml_nodes nodes,
|
104
|
xml_nodes parent,
|
105
|
xml_nodes child,
|
106
|
xml_nodes grandparent
|
107
|
where doi.guid = id.guid
|
108
|
and id.docid = docs.docid
|
109
|
and id.rev = docs.rev
|
110
|
and docs.rootnodeid = nodes.rootnodeid
|
111
|
and nodes.nodeid = child.parentnodeid
|
112
|
and nodes.parentnodeid = parent.nodeid
|
113
|
and parent.parentnodeid = grandparent.nodeid
|
114
|
and nodes.nodename = 'surName'
|
115
|
and parent.nodename = 'individualName'
|
116
|
and grandparent.nodename = 'creator'
|
117
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
118
|
|
119
|
-- add the first name if we have it
|
120
|
update doi_registration doi
|
121
|
set creator = child.nodedata ||' '|| creator
|
122
|
from identifier id,
|
123
|
xml_documents docs,
|
124
|
xml_nodes nodes,
|
125
|
xml_nodes parent,
|
126
|
xml_nodes child,
|
127
|
xml_nodes grandparent
|
128
|
where doi.guid = id.guid
|
129
|
and id.docid = docs.docid
|
130
|
and id.rev = docs.rev
|
131
|
and docs.rootnodeid = nodes.rootnodeid
|
132
|
and nodes.nodeid = child.parentnodeid
|
133
|
and nodes.parentnodeid = parent.nodeid
|
134
|
and parent.parentnodeid = grandparent.nodeid
|
135
|
and nodes.nodename = 'givenName'
|
136
|
and parent.nodename = 'individualName'
|
137
|
and grandparent.nodename = 'creator'
|
138
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
139
|
|
140
|
-- update title using revisions
|
141
|
update doi_registration doi
|
142
|
set title = child.nodedata
|
143
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
144
|
where doi.guid = id.guid
|
145
|
and id.docid = docs.docid
|
146
|
and id.rev = docs.rev
|
147
|
and docs.rootnodeid = nodes.rootnodeid
|
148
|
and nodes.nodeid = child.parentnodeid
|
149
|
and nodes.parentnodeid = parent.nodeid
|
150
|
and nodes.nodename = 'title'
|
151
|
and parent.nodename = 'dataset';
|
152
|
|
153
|
-- update pubDate using revisions
|
154
|
update doi_registration doi
|
155
|
set pub_date = child.nodedata
|
156
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
157
|
where doi.guid = id.guid
|
158
|
and id.docid = docs.docid
|
159
|
and id.rev = docs.rev
|
160
|
and docs.rootnodeid = nodes.rootnodeid
|
161
|
and nodes.nodeid = child.parentnodeid
|
162
|
and nodes.parentnodeid = parent.nodeid
|
163
|
and nodes.nodename = 'pubDate'
|
164
|
and parent.nodename = 'dataset';
|
165
|
|
166
|
-- update publisher using revisions
|
167
|
update doi_registration doi
|
168
|
set publisher = child.nodedata
|
169
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
170
|
where doi.guid = id.guid
|
171
|
and id.docid = docs.docid
|
172
|
and id.rev = docs.rev
|
173
|
and docs.rootnodeid = nodes.rootnodeid
|
174
|
and nodes.nodeid = child.parentnodeid
|
175
|
and nodes.parentnodeid = parent.nodeid
|
176
|
and nodes.nodename = 'publisher'
|
177
|
and parent.nodename = 'dataset';
|
178
|
|
179
|
-- update organization using nodes
|
180
|
update doi_registration doi
|
181
|
set creator = child.nodedata
|
182
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
183
|
where doi.guid = id.guid
|
184
|
and id.docid = docs.docid
|
185
|
and id.rev = docs.rev
|
186
|
and docs.rootnodeid = nodes.rootnodeid
|
187
|
and nodes.nodeid = child.parentnodeid
|
188
|
and nodes.parentnodeid = parent.nodeid
|
189
|
and nodes.nodename = 'organizationName'
|
190
|
and parent.nodename = 'creator'
|
191
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
192
|
|
193
|
-- update creator using revisions
|
194
|
update doi_registration doi
|
195
|
set creator = child.nodedata
|
196
|
from identifier id,
|
197
|
xml_revisions docs,
|
198
|
xml_nodes_revisions nodes,
|
199
|
xml_nodes_revisions parent,
|
200
|
xml_nodes_revisions child,
|
201
|
xml_nodes_revisions grandparent
|
202
|
where doi.guid = id.guid
|
203
|
and id.docid = docs.docid
|
204
|
and id.rev = docs.rev
|
205
|
and docs.rootnodeid = nodes.rootnodeid
|
206
|
and nodes.nodeid = child.parentnodeid
|
207
|
and nodes.parentnodeid = parent.nodeid
|
208
|
and parent.parentnodeid = grandparent.nodeid
|
209
|
and nodes.nodename = 'surName'
|
210
|
and parent.nodename = 'individualName'
|
211
|
and grandparent.nodename = 'creator'
|
212
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
213
|
|
214
|
-- add the first name if we have it
|
215
|
update doi_registration doi
|
216
|
set creator = child.nodedata ||' '|| creator
|
217
|
from identifier id,
|
218
|
xml_revisions docs,
|
219
|
xml_nodes_revisions nodes,
|
220
|
xml_nodes_revisions parent,
|
221
|
xml_nodes_revisions child,
|
222
|
xml_nodes_revisions grandparent
|
223
|
where doi.guid = id.guid
|
224
|
and id.docid = docs.docid
|
225
|
and id.rev = docs.rev
|
226
|
and docs.rootnodeid = nodes.rootnodeid
|
227
|
and nodes.nodeid = child.parentnodeid
|
228
|
and nodes.parentnodeid = parent.nodeid
|
229
|
and parent.parentnodeid = grandparent.nodeid
|
230
|
and nodes.nodename = 'givenName'
|
231
|
and parent.nodename = 'individualName'
|
232
|
and grandparent.nodename = 'creator'
|
233
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
234
|
|
235
|
-- use xml_documents for defaults that are still missing
|
236
|
update doi_registration doi
|
237
|
set creator = doc.user_owner
|
238
|
from xml_documents doc, identifier id
|
239
|
where doi.guid = id.guid
|
240
|
and id.docid = doc.docid
|
241
|
and id.rev = doc.rev
|
242
|
and doi.creator is null;
|
243
|
|
244
|
update doi_registration doi
|
245
|
set creator = doc.user_owner
|
246
|
from xml_revisions doc, identifier id
|
247
|
where doi.guid = id.guid
|
248
|
and id.docid = doc.docid
|
249
|
and id.rev = doc.rev
|
250
|
and doi.creator is null;
|
251
|
|
252
|
-- set publisher
|
253
|
update doi_registration
|
254
|
set publisher = creator
|
255
|
where publisher is null or publisher = '';
|
256
|
|
257
|
-- clean up
|
258
|
update doi_registration
|
259
|
set title = replace(title, E'\n', ' ');
|
260
|
|
261
|
update doi_registration
|
262
|
set title = regexp_replace(title, E'\\s+', ' ', 'g');
|
263
|
|
264
|
update doi_registration
|
265
|
set title = regexp_replace(title, E'^\\s+', '', 'g');
|
266
|
|
267
|
update doi_registration
|
268
|
set ezid_account = 'KNB'
|
269
|
where guid like 'doi:10.5063%';
|
270
|
--16988
|
271
|
|
272
|
update doi_registration
|
273
|
set ezid_account = 'PISCO'
|
274
|
where guid like 'doi:10.6085%';
|
275
|
--98078
|
276
|
|
277
|
update doi_registration
|
278
|
set ezid_account = 'LTER'
|
279
|
where guid like 'doi:10.6073%';
|
280
|
--49482
|
281
|
|
282
|
/**
|
283
|
* Additional modifications, July 5th, 2012
|
284
|
*/
|
285
|
ALTER TABLE doi_registration
|
286
|
ADD COLUMN resourceType text,
|
287
|
ADD COLUMN objectFormat text,
|
288
|
ADD COLUMN obsoletedBy text,
|
289
|
ADD COLUMN obsoletes text,
|
290
|
ADD COLUMN resourceMapId text,
|
291
|
ADD COLUMN resourceMapLocation text,
|
292
|
ADD COLUMN access text;
|
293
|
|
294
|
-- update access values
|
295
|
update doi_registration
|
296
|
set access = 'protected';
|
297
|
|
298
|
update doi_registration doi
|
299
|
set access = 'public'
|
300
|
from xml_access a
|
301
|
where doi.guid = a.guid
|
302
|
and a.principal_name = 'public'
|
303
|
and a.permission >= '4'
|
304
|
and a.perm_type = 'allow';
|
305
|
|
306
|
-- update the objectFormat from xml_documents
|
307
|
update doi_registration doi
|
308
|
set objectFormat = doc.doctype
|
309
|
from identifier id, xml_documents doc
|
310
|
where doi.guid = id.guid
|
311
|
and id.docid = doc.docid;
|
312
|
|
313
|
-- update the objectFormat from xml_revisions
|
314
|
update doi_registration doi
|
315
|
set objectFormat = rev.doctype
|
316
|
from identifier id, xml_revisions rev
|
317
|
where doi.guid = id.guid
|
318
|
and id.docid = rev.docid;
|
319
|
|
320
|
--update resourceType
|
321
|
update doi_registration
|
322
|
set resourceType = 'Dataset/METADATA';
|
323
|
|
324
|
update doi_registration
|
325
|
set resourceType = 'Dataset/DATA'
|
326
|
where objectFormat = 'BIN';
|
327
|
|
328
|
-- update the objectFormat from SM table (will be subset)
|
329
|
update doi_registration doi
|
330
|
set objectFormat = sm.object_format
|
331
|
from systemMetadata sm
|
332
|
where doi.guid = sm.guid;
|
333
|
--16938
|
334
|
|
335
|
--update revision history from SM
|
336
|
update doi_registration doi
|
337
|
set obsoletes = sm.obsoletes,
|
338
|
obsoletedBy = sm.obsoleted_by
|
339
|
from systemMetadata sm
|
340
|
where doi.guid = sm.guid;
|
341
|
|
342
|
--update resourceMap pointer TBD
|
343
|
-- NOTE: some of these maps might not actually exist
|
344
|
update doi_registration doi
|
345
|
set resourceMapId = 'resourceMap_'||id.docid||'.'||id.rev
|
346
|
from identifier id
|
347
|
where doi.guid = id.guid
|
348
|
and id.docid = rev.docid
|
349
|
and objectFormat like 'eml%';
|
350
|
|
351
|
--update the resourcemapid for described data
|
352
|
CREATE TABLE ecogrid_docids AS
|
353
|
|
354
|
select distinct id.docid, id.rev, id.guid, substring(xpi.nodedata from 'ecogrid://knb/(.*)$') as data_docid
|
355
|
from xml_path_index xpi, identifier id, xml_documents xmld
|
356
|
where xpi.docid = xmld.docid
|
357
|
and xmld.docid = id.docid
|
358
|
and xmld.rev = id.rev
|
359
|
and xpi.path like 'dataset/%/physical/distribution/online/url'
|
360
|
and xpi.nodedata like 'ecogrid%';
|
361
|
|
362
|
-- TODO: include revisions for described data
|
363
|
|
364
|
-- NOTE: some of thee maps might not actually exist
|
365
|
update doi_registration doi
|
366
|
set resourceMapId = 'resourceMap_'||eco.docid||'.'||eco.rev
|
367
|
from identifier id, ecogrid_docids eco
|
368
|
where doi.guid = id.guid
|
369
|
and id.docid||'.'||id.rev = eco.data_docid;
|
370
|
|
371
|
--update the resource map location
|
372
|
update doi_registration
|
373
|
set resourceMapLocation = 'https://cn.dataone.org/cn/v1/resolve/' || resourceMapId
|
374
|
where resourceMapId is not null;
|
375
|
|
376
|
--update creator usinig LDAP lookup
|
377
|
CREATE TABLE ecoinfo_dn (dn text, givenName text, sn text);
|
378
|
COPY ecoinfo_dn FROM '/tmp/ecoinfo_dn.csv' WITH CSV HEADER;
|
379
|
|
380
|
update doi_registration
|
381
|
set creator = givenName|| ' ' || sn
|
382
|
from ecoinfo_dn
|
383
|
where creator = dn;
|
384
|
|
385
|
update doi_registration
|
386
|
set publisher = givenName|| ' ' || sn
|
387
|
from ecoinfo_dn
|
388
|
where publisher = dn;
|
389
|
|
390
|
select creator, count(*) as cnt
|
391
|
from doi_registration
|
392
|
where creator like 'uid=%'
|
393
|
group by creator
|
394
|
order by cnt desc;
|
395
|
|
396
|
-- copy to the external file
|
397
|
COPY
|
398
|
(select ezid_account,
|
399
|
guid as dc_identifier,
|
400
|
url as datacite_url,
|
401
|
title as dc_title,
|
402
|
creator as dc_creator,
|
403
|
publisher as dc_publisher,
|
404
|
pub_date as datacite_publicationYear,
|
405
|
resourceType,
|
406
|
objectFormat,
|
407
|
obsoletedBy,
|
408
|
obsoletes,
|
409
|
resourceMapId,
|
410
|
resourceMapLocation,
|
411
|
access
|
412
|
from doi_registration
|
413
|
order by dc_identifier)
|
414
|
TO '/tmp/doi_registration.csv'
|
415
|
WITH CSV HEADER;
|
416
|
--164548
|
417
|
|
418
|
--drop table doi_registration;
|
419
|
--drop table ecogrid_docids;
|
420
|
--drop table ecoinfo_dn;
|
421
|
|