Project

General

Profile

1
-- use default values initially
2
CREATE TABLE doi_registration AS
3

    
4
select 
5
text('testing') as ezid_account,
6
doc.doctype,
7
id.guid, 
8
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url, 
9
text('Unknown') as title, 
10
text(null) as creator,
11
text(null) as publisher,
12
to_char(doc.date_created, 'YYYY') as pub_date
13
from identifier id, xml_documents doc
14
where guid like 'doi%'
15
and id.docid = doc.docid
16
and id.rev = doc.rev
17

    
18
UNION ALL
19

    
20
select 
21
text('testing') as ezid_account,
22
doc.doctype,
23
id.guid, 
24
'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url, 
25
text('Unknown') as title, 
26
text(null) as creator,
27
text(null) as publisher,
28
to_char(doc.date_created, 'YYYY') as pub_date
29
from identifier id, xml_revisions doc
30
where guid like 'doi%'
31
and id.docid = doc.docid
32
and id.rev = doc.rev;
33

    
34
--update defaults
35
update doi_registration
36
set title = 'Data file'
37
where doctype = 'BIN';
38

    
39
update doi_registration
40
set title = 'Legacy EML file'
41
where doctype like '%eml%2.0.0beta%';
42

    
43
-- update title using node information
44
update doi_registration doi
45
set title = child.nodedata
46
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
47
where doi.guid = id.guid
48
and id.docid = docs.docid
49
and id.rev = docs.rev
50
and docs.rootnodeid = nodes.rootnodeid
51
and nodes.nodeid = child.parentnodeid
52
and nodes.parentnodeid = parent.nodeid
53
and nodes.nodename = 'title'
54
and parent.nodename = 'dataset';
55

    
56
-- update pubDate using nodes
57
update doi_registration doi
58
set pub_date = child.nodedata
59
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
60
where doi.guid = id.guid
61
and id.docid = docs.docid
62
and id.rev = docs.rev
63
and docs.rootnodeid = nodes.rootnodeid
64
and nodes.nodeid = child.parentnodeid
65
and nodes.parentnodeid = parent.nodeid
66
and nodes.nodename = 'pubDate'
67
and parent.nodename = 'dataset';
68

    
69
-- update publisher using nodes
70
update doi_registration doi
71
set publisher = child.nodedata
72
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
73
where doi.guid = id.guid
74
and id.docid = docs.docid
75
and id.rev = docs.rev
76
and docs.rootnodeid = nodes.rootnodeid
77
and nodes.nodeid = child.parentnodeid
78
and nodes.parentnodeid = parent.nodeid
79
and nodes.nodename = 'publisher'
80
and parent.nodename = 'dataset';
81

    
82
-- update organization using nodes
83
update doi_registration doi
84
set creator = child.nodedata
85
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
86
where doi.guid = id.guid
87
and id.docid = docs.docid
88
and id.rev = docs.rev
89
and docs.rootnodeid = nodes.rootnodeid
90
and nodes.nodeid = child.parentnodeid
91
and nodes.parentnodeid = parent.nodeid
92
and nodes.nodename = 'organizationName'
93
and parent.nodename = 'creator'
94
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
95

    
96
-- update creator using nodes
97
-- creator/individualName/surName
98
-- creator/individualName/givenName
99
update doi_registration doi
100
set creator = child.nodedata
101
from identifier id, 
102
xml_documents docs, 
103
xml_nodes nodes, 
104
xml_nodes parent, 
105
xml_nodes child, 
106
xml_nodes grandparent
107
where doi.guid = id.guid
108
and id.docid = docs.docid
109
and id.rev = docs.rev
110
and docs.rootnodeid = nodes.rootnodeid
111
and nodes.nodeid = child.parentnodeid
112
and nodes.parentnodeid = parent.nodeid
113
and parent.parentnodeid = grandparent.nodeid
114
and nodes.nodename = 'surName'
115
and parent.nodename = 'individualName'
116
and grandparent.nodename = 'creator'
117
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
118

    
119
-- add the first name if we have it
120
update doi_registration doi
121
set creator = child.nodedata ||' '|| creator
122
from identifier id, 
123
xml_documents docs, 
124
xml_nodes nodes, 
125
xml_nodes parent, 
126
xml_nodes child, 
127
xml_nodes grandparent
128
where doi.guid = id.guid
129
and id.docid = docs.docid
130
and id.rev = docs.rev
131
and docs.rootnodeid = nodes.rootnodeid
132
and nodes.nodeid = child.parentnodeid
133
and nodes.parentnodeid = parent.nodeid
134
and parent.parentnodeid = grandparent.nodeid
135
and nodes.nodename = 'givenName'
136
and parent.nodename = 'individualName'
137
and grandparent.nodename = 'creator'
138
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
139

    
140
-- update title using revisions
141
update doi_registration doi
142
set title = child.nodedata
143
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
144
where doi.guid = id.guid
145
and id.docid = docs.docid
146
and id.rev = docs.rev
147
and docs.rootnodeid = nodes.rootnodeid
148
and nodes.nodeid = child.parentnodeid
149
and nodes.parentnodeid = parent.nodeid
150
and nodes.nodename = 'title'
151
and parent.nodename = 'dataset';
152

    
153
-- update pubDate using revisions
154
update doi_registration doi
155
set pub_date = child.nodedata
156
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
157
where doi.guid = id.guid
158
and id.docid = docs.docid
159
and id.rev = docs.rev
160
and docs.rootnodeid = nodes.rootnodeid
161
and nodes.nodeid = child.parentnodeid
162
and nodes.parentnodeid = parent.nodeid
163
and nodes.nodename = 'pubDate'
164
and parent.nodename = 'dataset';
165

    
166
-- update publisher using revisions
167
update doi_registration doi
168
set publisher = child.nodedata
169
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
170
where doi.guid = id.guid
171
and id.docid = docs.docid
172
and id.rev = docs.rev
173
and docs.rootnodeid = nodes.rootnodeid
174
and nodes.nodeid = child.parentnodeid
175
and nodes.parentnodeid = parent.nodeid
176
and nodes.nodename = 'publisher'
177
and parent.nodename = 'dataset';
178

    
179
-- update organization using nodes
180
update doi_registration doi
181
set creator = child.nodedata
182
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
183
where doi.guid = id.guid
184
and id.docid = docs.docid
185
and id.rev = docs.rev
186
and docs.rootnodeid = nodes.rootnodeid
187
and nodes.nodeid = child.parentnodeid
188
and nodes.parentnodeid = parent.nodeid
189
and nodes.nodename = 'organizationName'
190
and parent.nodename = 'creator'
191
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
192

    
193
-- update creator using revisions
194
update doi_registration doi
195
set creator = child.nodedata
196
from identifier id, 
197
xml_revisions docs, 
198
xml_nodes_revisions nodes, 
199
xml_nodes_revisions parent, 
200
xml_nodes_revisions child, 
201
xml_nodes_revisions grandparent
202
where doi.guid = id.guid
203
and id.docid = docs.docid
204
and id.rev = docs.rev
205
and docs.rootnodeid = nodes.rootnodeid
206
and nodes.nodeid = child.parentnodeid
207
and nodes.parentnodeid = parent.nodeid
208
and parent.parentnodeid = grandparent.nodeid
209
and nodes.nodename = 'surName'
210
and parent.nodename = 'individualName'
211
and grandparent.nodename = 'creator'
212
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
213

    
214
-- add the first name if we have it
215
update doi_registration doi
216
set creator = child.nodedata ||' '|| creator
217
from identifier id, 
218
xml_revisions docs, 
219
xml_nodes_revisions nodes, 
220
xml_nodes_revisions parent, 
221
xml_nodes_revisions child, 
222
xml_nodes_revisions grandparent
223
where doi.guid = id.guid
224
and id.docid = docs.docid
225
and id.rev = docs.rev
226
and docs.rootnodeid = nodes.rootnodeid
227
and nodes.nodeid = child.parentnodeid
228
and nodes.parentnodeid = parent.nodeid
229
and parent.parentnodeid = grandparent.nodeid
230
and nodes.nodename = 'givenName'
231
and parent.nodename = 'individualName'
232
and grandparent.nodename = 'creator'
233
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
234

    
235
-- use xml_documents for defaults that are still missing 
236
update doi_registration doi
237
set creator = doc.user_owner
238
from xml_documents doc, identifier id
239
where doi.guid = id.guid
240
and id.docid = doc.docid
241
and id.rev = doc.rev
242
and doi.creator is null;
243

    
244
update doi_registration doi
245
set creator = doc.user_owner
246
from xml_revisions doc, identifier id
247
where doi.guid = id.guid
248
and id.docid = doc.docid
249
and id.rev = doc.rev
250
and doi.creator is null;
251

    
252
-- set publisher
253
update doi_registration
254
set publisher = creator
255
where publisher is null or publisher = '';
256

    
257
-- clean up
258
update doi_registration
259
set title = replace(title, E'\n', ' ');
260

    
261
update doi_registration
262
set title = regexp_replace(title, E'\\s+', ' ', 'g');
263

    
264
update doi_registration
265
set title = regexp_replace(title, E'^\\s+', '', 'g');
266

    
267
update doi_registration
268
set ezid_account = 'KNB'
269
where guid like 'doi:10.5063%';
270
--16988
271

    
272
update doi_registration
273
set ezid_account = 'PISCO'
274
where guid like 'doi:10.6085%';
275
--98078
276

    
277
update doi_registration
278
set ezid_account = 'LTER'
279
where guid like 'doi:10.6073%';
280
--49482
281

    
282
COPY (select ezid_account, guid as doi, url, title, creator, publisher, pub_date from doi_registration order by doi) 
283
TO '/tmp/doi_registration.csv'
284
WITH CSV HEADER;
285
--164548
286

    
287
--drop table doi_registration;
288

    
(9-9/60)