Project

General

Profile

« Previous | Next » 

Revision 7191

include revisions table in the initial temp table population.
use the "first" creator listed in the EML (either org or person).
use other reasonable default values as needed to fully populate the spreadsheet columns
https://redmine.dataone.org/issues/2815

View differences:

src/doi_registration.sql
1 1
-- use default values initially
2
CREATE TABLE doi_registration AS
3

  
2 4
select 
3 5
text('testing') as ezid_account,
4 6
doc.doctype,
5 7
id.guid, 
6 8
'https://cn-stage.dataone.org/cn/v1/resolve/' || id.guid as url, 
7 9
text('Unknown') as title, 
8
text(doc.user_owner) as creator,
9
text(doc.user_owner) as publisher,
10
text(null) as creator,
11
text(null) as publisher,
10 12
to_char(doc.date_created, 'YYYY') as pub_date
11
into table doi_registration 
12 13
from identifier id, xml_documents doc
13 14
where guid like 'doi%'
14
and id.docid = doc.docid;
15
and id.docid = doc.docid
16
and id.rev = doc.rev
15 17

  
18
UNION ALL
19

  
20
select 
21
text('testing') as ezid_account,
22
doc.doctype,
23
id.guid, 
24
'https://cn-stage.dataone.org/cn/v1/resolve/' || id.guid as url, 
25
text('Unknown') as title, 
26
text(null) as creator,
27
text(null) as publisher,
28
to_char(doc.date_created, 'YYYY') as pub_date
29
from identifier id, xml_revisions doc
30
where guid like 'doi%'
31
and id.docid = doc.docid
32
and id.rev = doc.rev;
33

  
16 34
--update defaults
17 35
update doi_registration
18 36
set title = 'Data file'
......
35 53
and nodes.nodename = 'title'
36 54
and parent.nodename = 'dataset';
37 55

  
38
-- update pubDate using revisions
56
-- update pubDate using nodes
39 57
update doi_registration doi
40 58
set pub_date = child.nodedata
41 59
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
......
48 66
and nodes.nodename = 'pubDate'
49 67
and parent.nodename = 'dataset';
50 68

  
51
-- update publisher using revisions
69
-- update publisher using nodes
52 70
update doi_registration doi
53 71
set publisher = child.nodedata
54 72
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
......
61 79
and nodes.nodename = 'publisher'
62 80
and parent.nodename = 'dataset';
63 81

  
82
-- update organization using nodes
83
update doi_registration doi
84
set creator = child.nodedata
85
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
86
where doi.guid = id.guid
87
and id.docid = docs.docid
88
and id.rev = docs.rev
89
and docs.rootnodeid = nodes.rootnodeid
90
and nodes.nodeid = child.parentnodeid
91
and nodes.parentnodeid = parent.nodeid
92
and nodes.nodename = 'organizationName'
93
and parent.nodename = 'creator'
94
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
95

  
64 96
-- update creator using nodes
65 97
-- creator/individualName/surName
66 98
-- creator/individualName/givenName
67
-- creator/organizationName
68
-- TODO: use only first creator based on nodeid sequence
69 99
update doi_registration doi
70
set creator = siblingchild.nodedata ||' '|| child.nodedata
100
set creator = child.nodedata
71 101
from identifier id, 
72 102
xml_documents docs, 
73 103
xml_nodes nodes, 
74 104
xml_nodes parent, 
75 105
xml_nodes child, 
76
xml_nodes grandparent, 
77
xml_nodes sibling, 
78
xml_nodes siblingchild
106
xml_nodes grandparent
79 107
where doi.guid = id.guid
80 108
and id.docid = docs.docid
81 109
and id.rev = docs.rev
......
86 114
and nodes.nodename = 'surName'
87 115
and parent.nodename = 'individualName'
88 116
and grandparent.nodename = 'creator'
89
and parent.nodeid = sibling.parentnodeid
90
and sibling.nodeid = siblingchild.parentnodeid
91
and sibling.nodename = 'givenName';
117
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
92 118

  
119
-- add the first name if we have it
120
update doi_registration doi
121
set creator = child.nodedata ||' '|| creator
122
from identifier id, 
123
xml_documents docs, 
124
xml_nodes nodes, 
125
xml_nodes parent, 
126
xml_nodes child, 
127
xml_nodes grandparent
128
where doi.guid = id.guid
129
and id.docid = docs.docid
130
and id.rev = docs.rev
131
and docs.rootnodeid = nodes.rootnodeid
132
and nodes.nodeid = child.parentnodeid
133
and nodes.parentnodeid = parent.nodeid
134
and parent.parentnodeid = grandparent.nodeid
135
and nodes.nodename = 'givenName'
136
and parent.nodename = 'individualName'
137
and grandparent.nodename = 'creator'
138
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
93 139

  
94 140
-- update title using revisions
95 141
update doi_registration doi
......
130 176
and nodes.nodename = 'publisher'
131 177
and parent.nodename = 'dataset';
132 178

  
179
-- update organization using nodes
180
update doi_registration doi
181
set creator = child.nodedata
182
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
183
where doi.guid = id.guid
184
and id.docid = docs.docid
185
and id.rev = docs.rev
186
and docs.rootnodeid = nodes.rootnodeid
187
and nodes.nodeid = child.parentnodeid
188
and nodes.parentnodeid = parent.nodeid
189
and nodes.nodename = 'organizationName'
190
and parent.nodename = 'creator'
191
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT');
133 192

  
134 193
-- update creator using revisions
135 194
update doi_registration doi
136
set creator = siblingchild.nodedata ||' '|| child.nodedata
195
set creator = child.nodedata
137 196
from identifier id, 
138 197
xml_revisions docs, 
139 198
xml_nodes_revisions nodes, 
140 199
xml_nodes_revisions parent, 
141 200
xml_nodes_revisions child, 
142
xml_nodes_revisions grandparent, 
143
xml_nodes_revisions sibling, 
144
xml_nodes_revisions siblingchild
201
xml_nodes_revisions grandparent
145 202
where doi.guid = id.guid
146 203
and id.docid = docs.docid
147 204
and id.rev = docs.rev
......
152 209
and nodes.nodename = 'surName'
153 210
and parent.nodename = 'individualName'
154 211
and grandparent.nodename = 'creator'
155
and parent.nodeid = sibling.parentnodeid
156
and sibling.nodeid = siblingchild.parentnodeid
157
and sibling.nodename = 'givenName';
212
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
158 213

  
214
-- add the first name if we have it
215
update doi_registration doi
216
set creator = child.nodedata ||' '|| creator
217
from identifier id, 
218
xml_revisions docs, 
219
xml_nodes_revisions nodes, 
220
xml_nodes_revisions parent, 
221
xml_nodes_revisions child, 
222
xml_nodes_revisions grandparent
223
where doi.guid = id.guid
224
and id.docid = docs.docid
225
and id.rev = docs.rev
226
and docs.rootnodeid = nodes.rootnodeid
227
and nodes.nodeid = child.parentnodeid
228
and nodes.parentnodeid = parent.nodeid
229
and parent.parentnodeid = grandparent.nodeid
230
and nodes.nodename = 'givenName'
231
and parent.nodename = 'individualName'
232
and grandparent.nodename = 'creator'
233
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT');
234

  
235
-- use xml_documents for defaults that are still missing 
236
update doi_registration doi
237
set creator = doc.user_owner
238
from xml_documents doc, identifier id
239
where doi.guid = id.guid
240
and id.docid = doc.docid
241
and id.rev = doc.rev
242
and doi.creator is null;
243

  
244
update doi_registration doi
245
set creator = doc.user_owner
246
from xml_revisions doc, identifier id
247
where doi.guid = id.guid
248
and id.docid = doc.docid
249
and id.rev = doc.rev
250
and doi.creator is null;
251

  
252
-- set publisher
253
update doi_registration
254
set publisher = creator
255
where publisher is null or publisher = '';
256

  
159 257
-- clean up
160 258
update doi_registration
161 259
set title = replace(title, E'\n', ' ');
......
171 269
WITH CSV HEADER;
172 270

  
173 271
--drop table doi_registration;
272

  

Also available in: Unified diff