Revision 7191
Added by ben leinfelder over 12 years ago
src/doi_registration.sql | ||
---|---|---|
1 | 1 |
-- use default values initially |
2 |
CREATE TABLE doi_registration AS |
|
3 |
|
|
2 | 4 |
select |
3 | 5 |
text('testing') as ezid_account, |
4 | 6 |
doc.doctype, |
5 | 7 |
id.guid, |
6 | 8 |
'https://cn-stage.dataone.org/cn/v1/resolve/' || id.guid as url, |
7 | 9 |
text('Unknown') as title, |
8 |
text(doc.user_owner) as creator,
|
|
9 |
text(doc.user_owner) as publisher,
|
|
10 |
text(null) as creator,
|
|
11 |
text(null) as publisher,
|
|
10 | 12 |
to_char(doc.date_created, 'YYYY') as pub_date |
11 |
into table doi_registration |
|
12 | 13 |
from identifier id, xml_documents doc |
13 | 14 |
where guid like 'doi%' |
14 |
and id.docid = doc.docid; |
|
15 |
and id.docid = doc.docid |
|
16 |
and id.rev = doc.rev |
|
15 | 17 |
|
18 |
UNION ALL |
|
19 |
|
|
20 |
select |
|
21 |
text('testing') as ezid_account, |
|
22 |
doc.doctype, |
|
23 |
id.guid, |
|
24 |
'https://cn-stage.dataone.org/cn/v1/resolve/' || id.guid as url, |
|
25 |
text('Unknown') as title, |
|
26 |
text(null) as creator, |
|
27 |
text(null) as publisher, |
|
28 |
to_char(doc.date_created, 'YYYY') as pub_date |
|
29 |
from identifier id, xml_revisions doc |
|
30 |
where guid like 'doi%' |
|
31 |
and id.docid = doc.docid |
|
32 |
and id.rev = doc.rev; |
|
33 |
|
|
16 | 34 |
--update defaults |
17 | 35 |
update doi_registration |
18 | 36 |
set title = 'Data file' |
... | ... | |
35 | 53 |
and nodes.nodename = 'title' |
36 | 54 |
and parent.nodename = 'dataset'; |
37 | 55 |
|
38 |
-- update pubDate using revisions
|
|
56 |
-- update pubDate using nodes
|
|
39 | 57 |
update doi_registration doi |
40 | 58 |
set pub_date = child.nodedata |
41 | 59 |
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child |
... | ... | |
48 | 66 |
and nodes.nodename = 'pubDate' |
49 | 67 |
and parent.nodename = 'dataset'; |
50 | 68 |
|
51 |
-- update publisher using revisions
|
|
69 |
-- update publisher using nodes
|
|
52 | 70 |
update doi_registration doi |
53 | 71 |
set publisher = child.nodedata |
54 | 72 |
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child |
... | ... | |
61 | 79 |
and nodes.nodename = 'publisher' |
62 | 80 |
and parent.nodename = 'dataset'; |
63 | 81 |
|
82 |
-- update organization using nodes |
|
83 |
update doi_registration doi |
|
84 |
set creator = child.nodedata |
|
85 |
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child |
|
86 |
where doi.guid = id.guid |
|
87 |
and id.docid = docs.docid |
|
88 |
and id.rev = docs.rev |
|
89 |
and docs.rootnodeid = nodes.rootnodeid |
|
90 |
and nodes.nodeid = child.parentnodeid |
|
91 |
and nodes.parentnodeid = parent.nodeid |
|
92 |
and nodes.nodename = 'organizationName' |
|
93 |
and parent.nodename = 'creator' |
|
94 |
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT'); |
|
95 |
|
|
64 | 96 |
-- update creator using nodes |
65 | 97 |
-- creator/individualName/surName |
66 | 98 |
-- creator/individualName/givenName |
67 |
-- creator/organizationName |
|
68 |
-- TODO: use only first creator based on nodeid sequence |
|
69 | 99 |
update doi_registration doi |
70 |
set creator = siblingchild.nodedata ||' '|| child.nodedata
|
|
100 |
set creator = child.nodedata |
|
71 | 101 |
from identifier id, |
72 | 102 |
xml_documents docs, |
73 | 103 |
xml_nodes nodes, |
74 | 104 |
xml_nodes parent, |
75 | 105 |
xml_nodes child, |
76 |
xml_nodes grandparent, |
|
77 |
xml_nodes sibling, |
|
78 |
xml_nodes siblingchild |
|
106 |
xml_nodes grandparent |
|
79 | 107 |
where doi.guid = id.guid |
80 | 108 |
and id.docid = docs.docid |
81 | 109 |
and id.rev = docs.rev |
... | ... | |
86 | 114 |
and nodes.nodename = 'surName' |
87 | 115 |
and parent.nodename = 'individualName' |
88 | 116 |
and grandparent.nodename = 'creator' |
89 |
and parent.nodeid = sibling.parentnodeid |
|
90 |
and sibling.nodeid = siblingchild.parentnodeid |
|
91 |
and sibling.nodename = 'givenName'; |
|
117 |
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); |
|
92 | 118 |
|
119 |
-- add the first name if we have it |
|
120 |
update doi_registration doi |
|
121 |
set creator = child.nodedata ||' '|| creator |
|
122 |
from identifier id, |
|
123 |
xml_documents docs, |
|
124 |
xml_nodes nodes, |
|
125 |
xml_nodes parent, |
|
126 |
xml_nodes child, |
|
127 |
xml_nodes grandparent |
|
128 |
where doi.guid = id.guid |
|
129 |
and id.docid = docs.docid |
|
130 |
and id.rev = docs.rev |
|
131 |
and docs.rootnodeid = nodes.rootnodeid |
|
132 |
and nodes.nodeid = child.parentnodeid |
|
133 |
and nodes.parentnodeid = parent.nodeid |
|
134 |
and parent.parentnodeid = grandparent.nodeid |
|
135 |
and nodes.nodename = 'givenName' |
|
136 |
and parent.nodename = 'individualName' |
|
137 |
and grandparent.nodename = 'creator' |
|
138 |
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); |
|
93 | 139 |
|
94 | 140 |
-- update title using revisions |
95 | 141 |
update doi_registration doi |
... | ... | |
130 | 176 |
and nodes.nodename = 'publisher' |
131 | 177 |
and parent.nodename = 'dataset'; |
132 | 178 |
|
179 |
-- update organization using nodes |
|
180 |
update doi_registration doi |
|
181 |
set creator = child.nodedata |
|
182 |
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child |
|
183 |
where doi.guid = id.guid |
|
184 |
and id.docid = docs.docid |
|
185 |
and id.rev = docs.rev |
|
186 |
and docs.rootnodeid = nodes.rootnodeid |
|
187 |
and nodes.nodeid = child.parentnodeid |
|
188 |
and nodes.parentnodeid = parent.nodeid |
|
189 |
and nodes.nodename = 'organizationName' |
|
190 |
and parent.nodename = 'creator' |
|
191 |
and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT'); |
|
133 | 192 |
|
134 | 193 |
-- update creator using revisions |
135 | 194 |
update doi_registration doi |
136 |
set creator = siblingchild.nodedata ||' '|| child.nodedata
|
|
195 |
set creator = child.nodedata |
|
137 | 196 |
from identifier id, |
138 | 197 |
xml_revisions docs, |
139 | 198 |
xml_nodes_revisions nodes, |
140 | 199 |
xml_nodes_revisions parent, |
141 | 200 |
xml_nodes_revisions child, |
142 |
xml_nodes_revisions grandparent, |
|
143 |
xml_nodes_revisions sibling, |
|
144 |
xml_nodes_revisions siblingchild |
|
201 |
xml_nodes_revisions grandparent |
|
145 | 202 |
where doi.guid = id.guid |
146 | 203 |
and id.docid = docs.docid |
147 | 204 |
and id.rev = docs.rev |
... | ... | |
152 | 209 |
and nodes.nodename = 'surName' |
153 | 210 |
and parent.nodename = 'individualName' |
154 | 211 |
and grandparent.nodename = 'creator' |
155 |
and parent.nodeid = sibling.parentnodeid |
|
156 |
and sibling.nodeid = siblingchild.parentnodeid |
|
157 |
and sibling.nodename = 'givenName'; |
|
212 |
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); |
|
158 | 213 |
|
214 |
-- add the first name if we have it |
|
215 |
update doi_registration doi |
|
216 |
set creator = child.nodedata ||' '|| creator |
|
217 |
from identifier id, |
|
218 |
xml_revisions docs, |
|
219 |
xml_nodes_revisions nodes, |
|
220 |
xml_nodes_revisions parent, |
|
221 |
xml_nodes_revisions child, |
|
222 |
xml_nodes_revisions grandparent |
|
223 |
where doi.guid = id.guid |
|
224 |
and id.docid = docs.docid |
|
225 |
and id.rev = docs.rev |
|
226 |
and docs.rootnodeid = nodes.rootnodeid |
|
227 |
and nodes.nodeid = child.parentnodeid |
|
228 |
and nodes.parentnodeid = parent.nodeid |
|
229 |
and parent.parentnodeid = grandparent.nodeid |
|
230 |
and nodes.nodename = 'givenName' |
|
231 |
and parent.nodename = 'individualName' |
|
232 |
and grandparent.nodename = 'creator' |
|
233 |
and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); |
|
234 |
|
|
235 |
-- use xml_documents for defaults that are still missing |
|
236 |
update doi_registration doi |
|
237 |
set creator = doc.user_owner |
|
238 |
from xml_documents doc, identifier id |
|
239 |
where doi.guid = id.guid |
|
240 |
and id.docid = doc.docid |
|
241 |
and id.rev = doc.rev |
|
242 |
and doi.creator is null; |
|
243 |
|
|
244 |
update doi_registration doi |
|
245 |
set creator = doc.user_owner |
|
246 |
from xml_revisions doc, identifier id |
|
247 |
where doi.guid = id.guid |
|
248 |
and id.docid = doc.docid |
|
249 |
and id.rev = doc.rev |
|
250 |
and doi.creator is null; |
|
251 |
|
|
252 |
-- set publisher |
|
253 |
update doi_registration |
|
254 |
set publisher = creator |
|
255 |
where publisher is null or publisher = ''; |
|
256 |
|
|
159 | 257 |
-- clean up |
160 | 258 |
update doi_registration |
161 | 259 |
set title = replace(title, E'\n', ' '); |
... | ... | |
171 | 269 |
WITH CSV HEADER; |
172 | 270 |
|
173 | 271 |
--drop table doi_registration; |
272 |
|
Also available in: Unified diff
include revisions table in the initial temp table population.
use the "first" creator listed in the EML (either org or person).
use other reasonable default values as needed to fully populate the spreadsheet columns
https://redmine.dataone.org/issues/2815