1
|
-- use default values initially
|
2
|
CREATE TABLE doi_registration AS
|
3
|
|
4
|
select
|
5
|
text('testing') as ezid_account,
|
6
|
doc.doctype,
|
7
|
id.guid,
|
8
|
'https://cn-stage.dataone.org/cn/v1/resolve/' || id.guid as url,
|
9
|
text('Unknown') as title,
|
10
|
text(null) as creator,
|
11
|
text(null) as publisher,
|
12
|
to_char(doc.date_created, 'YYYY') as pub_date
|
13
|
from identifier id, xml_documents doc
|
14
|
where guid like 'doi%'
|
15
|
and id.docid = doc.docid
|
16
|
and id.rev = doc.rev
|
17
|
|
18
|
UNION ALL
|
19
|
|
20
|
select
|
21
|
text('testing') as ezid_account,
|
22
|
doc.doctype,
|
23
|
id.guid,
|
24
|
'https://cn-stage.dataone.org/cn/v1/resolve/' || id.guid as url,
|
25
|
text('Unknown') as title,
|
26
|
text(null) as creator,
|
27
|
text(null) as publisher,
|
28
|
to_char(doc.date_created, 'YYYY') as pub_date
|
29
|
from identifier id, xml_revisions doc
|
30
|
where guid like 'doi%'
|
31
|
and id.docid = doc.docid
|
32
|
and id.rev = doc.rev;
|
33
|
|
34
|
--update defaults
|
35
|
update doi_registration
|
36
|
set title = 'Data file'
|
37
|
where doctype = 'BIN';
|
38
|
|
39
|
update doi_registration
|
40
|
set title = 'Legacy EML file'
|
41
|
where doctype like '%eml%2.0.0beta%';
|
42
|
|
43
|
-- update title using node information
|
44
|
update doi_registration doi
|
45
|
set title = child.nodedata
|
46
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
47
|
where doi.guid = id.guid
|
48
|
and id.docid = docs.docid
|
49
|
and id.rev = docs.rev
|
50
|
and docs.rootnodeid = nodes.rootnodeid
|
51
|
and nodes.nodeid = child.parentnodeid
|
52
|
and nodes.parentnodeid = parent.nodeid
|
53
|
and nodes.nodename = 'title'
|
54
|
and parent.nodename = 'dataset';
|
55
|
|
56
|
-- update pubDate using nodes
|
57
|
update doi_registration doi
|
58
|
set pub_date = child.nodedata
|
59
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
60
|
where doi.guid = id.guid
|
61
|
and id.docid = docs.docid
|
62
|
and id.rev = docs.rev
|
63
|
and docs.rootnodeid = nodes.rootnodeid
|
64
|
and nodes.nodeid = child.parentnodeid
|
65
|
and nodes.parentnodeid = parent.nodeid
|
66
|
and nodes.nodename = 'pubDate'
|
67
|
and parent.nodename = 'dataset';
|
68
|
|
69
|
-- update publisher using nodes
|
70
|
update doi_registration doi
|
71
|
set publisher = child.nodedata
|
72
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
73
|
where doi.guid = id.guid
|
74
|
and id.docid = docs.docid
|
75
|
and id.rev = docs.rev
|
76
|
and docs.rootnodeid = nodes.rootnodeid
|
77
|
and nodes.nodeid = child.parentnodeid
|
78
|
and nodes.parentnodeid = parent.nodeid
|
79
|
and nodes.nodename = 'publisher'
|
80
|
and parent.nodename = 'dataset';
|
81
|
|
82
|
-- update organization using nodes
|
83
|
update doi_registration doi
|
84
|
set creator = child.nodedata
|
85
|
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
|
86
|
where doi.guid = id.guid
|
87
|
and id.docid = docs.docid
|
88
|
and id.rev = docs.rev
|
89
|
and docs.rootnodeid = nodes.rootnodeid
|
90
|
and nodes.nodeid = child.parentnodeid
|
91
|
and nodes.parentnodeid = parent.nodeid
|
92
|
and nodes.nodename = 'organizationName'
|
93
|
and parent.nodename = 'creator'
|
94
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
95
|
|
96
|
-- update creator using nodes
|
97
|
-- creator/individualName/surName
|
98
|
-- creator/individualName/givenName
|
99
|
update doi_registration doi
|
100
|
set creator = child.nodedata
|
101
|
from identifier id,
|
102
|
xml_documents docs,
|
103
|
xml_nodes nodes,
|
104
|
xml_nodes parent,
|
105
|
xml_nodes child,
|
106
|
xml_nodes grandparent
|
107
|
where doi.guid = id.guid
|
108
|
and id.docid = docs.docid
|
109
|
and id.rev = docs.rev
|
110
|
and docs.rootnodeid = nodes.rootnodeid
|
111
|
and nodes.nodeid = child.parentnodeid
|
112
|
and nodes.parentnodeid = parent.nodeid
|
113
|
and parent.parentnodeid = grandparent.nodeid
|
114
|
and nodes.nodename = 'surName'
|
115
|
and parent.nodename = 'individualName'
|
116
|
and grandparent.nodename = 'creator'
|
117
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
118
|
|
119
|
-- add the first name if we have it
|
120
|
update doi_registration doi
|
121
|
set creator = child.nodedata ||' '|| creator
|
122
|
from identifier id,
|
123
|
xml_documents docs,
|
124
|
xml_nodes nodes,
|
125
|
xml_nodes parent,
|
126
|
xml_nodes child,
|
127
|
xml_nodes grandparent
|
128
|
where doi.guid = id.guid
|
129
|
and id.docid = docs.docid
|
130
|
and id.rev = docs.rev
|
131
|
and docs.rootnodeid = nodes.rootnodeid
|
132
|
and nodes.nodeid = child.parentnodeid
|
133
|
and nodes.parentnodeid = parent.nodeid
|
134
|
and parent.parentnodeid = grandparent.nodeid
|
135
|
and nodes.nodename = 'givenName'
|
136
|
and parent.nodename = 'individualName'
|
137
|
and grandparent.nodename = 'creator'
|
138
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
139
|
|
140
|
-- update title using revisions
|
141
|
update doi_registration doi
|
142
|
set title = child.nodedata
|
143
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
144
|
where doi.guid = id.guid
|
145
|
and id.docid = docs.docid
|
146
|
and id.rev = docs.rev
|
147
|
and docs.rootnodeid = nodes.rootnodeid
|
148
|
and nodes.nodeid = child.parentnodeid
|
149
|
and nodes.parentnodeid = parent.nodeid
|
150
|
and nodes.nodename = 'title'
|
151
|
and parent.nodename = 'dataset';
|
152
|
|
153
|
-- update pubDate using revisions
|
154
|
update doi_registration doi
|
155
|
set pub_date = child.nodedata
|
156
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
157
|
where doi.guid = id.guid
|
158
|
and id.docid = docs.docid
|
159
|
and id.rev = docs.rev
|
160
|
and docs.rootnodeid = nodes.rootnodeid
|
161
|
and nodes.nodeid = child.parentnodeid
|
162
|
and nodes.parentnodeid = parent.nodeid
|
163
|
and nodes.nodename = 'pubDate'
|
164
|
and parent.nodename = 'dataset';
|
165
|
|
166
|
-- update publisher using revisions
|
167
|
update doi_registration doi
|
168
|
set publisher = child.nodedata
|
169
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
170
|
where doi.guid = id.guid
|
171
|
and id.docid = docs.docid
|
172
|
and id.rev = docs.rev
|
173
|
and docs.rootnodeid = nodes.rootnodeid
|
174
|
and nodes.nodeid = child.parentnodeid
|
175
|
and nodes.parentnodeid = parent.nodeid
|
176
|
and nodes.nodename = 'publisher'
|
177
|
and parent.nodename = 'dataset';
|
178
|
|
179
|
-- update organization using nodes
|
180
|
update doi_registration doi
|
181
|
set creator = child.nodedata
|
182
|
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
|
183
|
where doi.guid = id.guid
|
184
|
and id.docid = docs.docid
|
185
|
and id.rev = docs.rev
|
186
|
and docs.rootnodeid = nodes.rootnodeid
|
187
|
and nodes.nodeid = child.parentnodeid
|
188
|
and nodes.parentnodeid = parent.nodeid
|
189
|
and nodes.nodename = 'organizationName'
|
190
|
and parent.nodename = 'creator'
|
191
|
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
|
192
|
|
193
|
-- update creator using revisions
|
194
|
update doi_registration doi
|
195
|
set creator = child.nodedata
|
196
|
from identifier id,
|
197
|
xml_revisions docs,
|
198
|
xml_nodes_revisions nodes,
|
199
|
xml_nodes_revisions parent,
|
200
|
xml_nodes_revisions child,
|
201
|
xml_nodes_revisions grandparent
|
202
|
where doi.guid = id.guid
|
203
|
and id.docid = docs.docid
|
204
|
and id.rev = docs.rev
|
205
|
and docs.rootnodeid = nodes.rootnodeid
|
206
|
and nodes.nodeid = child.parentnodeid
|
207
|
and nodes.parentnodeid = parent.nodeid
|
208
|
and parent.parentnodeid = grandparent.nodeid
|
209
|
and nodes.nodename = 'surName'
|
210
|
and parent.nodename = 'individualName'
|
211
|
and grandparent.nodename = 'creator'
|
212
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
213
|
|
214
|
-- add the first name if we have it
|
215
|
update doi_registration doi
|
216
|
set creator = child.nodedata ||' '|| creator
|
217
|
from identifier id,
|
218
|
xml_revisions docs,
|
219
|
xml_nodes_revisions nodes,
|
220
|
xml_nodes_revisions parent,
|
221
|
xml_nodes_revisions child,
|
222
|
xml_nodes_revisions grandparent
|
223
|
where doi.guid = id.guid
|
224
|
and id.docid = docs.docid
|
225
|
and id.rev = docs.rev
|
226
|
and docs.rootnodeid = nodes.rootnodeid
|
227
|
and nodes.nodeid = child.parentnodeid
|
228
|
and nodes.parentnodeid = parent.nodeid
|
229
|
and parent.parentnodeid = grandparent.nodeid
|
230
|
and nodes.nodename = 'givenName'
|
231
|
and parent.nodename = 'individualName'
|
232
|
and grandparent.nodename = 'creator'
|
233
|
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
|
234
|
|
235
|
-- use xml_documents for defaults that are still missing
|
236
|
update doi_registration doi
|
237
|
set creator = doc.user_owner
|
238
|
from xml_documents doc, identifier id
|
239
|
where doi.guid = id.guid
|
240
|
and id.docid = doc.docid
|
241
|
and id.rev = doc.rev
|
242
|
and doi.creator is null;
|
243
|
|
244
|
update doi_registration doi
|
245
|
set creator = doc.user_owner
|
246
|
from xml_revisions doc, identifier id
|
247
|
where doi.guid = id.guid
|
248
|
and id.docid = doc.docid
|
249
|
and id.rev = doc.rev
|
250
|
and doi.creator is null;
|
251
|
|
252
|
-- set publisher
|
253
|
update doi_registration
|
254
|
set publisher = creator
|
255
|
where publisher is null or publisher = '';
|
256
|
|
257
|
-- clean up
|
258
|
update doi_registration
|
259
|
set title = replace(title, E'\n', ' ');
|
260
|
|
261
|
update doi_registration
|
262
|
set title = regexp_replace(title, E'\\s+', ' ');
|
263
|
|
264
|
update doi_registration
|
265
|
set title = regexp_replace(title, E'^\\s+', '');
|
266
|
|
267
|
COPY (select ezid_account, guid as doi, url, title, creator, publisher, pub_date from doi_registration order by doi)
|
268
|
TO '/tmp/doi_registration.csv'
|
269
|
WITH CSV HEADER;
|
270
|
|
271
|
--drop table doi_registration;
|
272
|
|