Revision 7190
Added by ben leinfelder over 12 years ago
src/doi_registration.sql | ||
---|---|---|
1 |
-- use default values initially |
|
1 | 2 |
select |
2 | 3 |
text('testing') as ezid_account, |
3 | 4 |
doc.doctype, |
4 | 5 |
id.guid, |
5 | 6 |
'https://cn-stage.dataone.org/cn/v1/resolve/' || id.guid as url, |
6 |
text(null) as title, |
|
7 |
text(null) as creator |
|
7 |
text('Unknown') as title, |
|
8 |
text(doc.user_owner) as creator, |
|
9 |
text(doc.user_owner) as publisher, |
|
10 |
to_char(doc.date_created, 'YYYY') as pub_date |
|
8 | 11 |
into table doi_registration |
9 | 12 |
from identifier id, xml_documents doc |
10 | 13 |
where guid like 'doi%' |
11 | 14 |
and id.docid = doc.docid; |
12 | 15 |
|
13 |
-- update title using nodes |
|
16 |
--update defaults |
|
17 |
update doi_registration |
|
18 |
set title = 'Data file' |
|
19 |
where doctype = 'BIN'; |
|
20 |
|
|
21 |
update doi_registration |
|
22 |
set title = 'Legacy EML file' |
|
23 |
where doctype like '%eml%2.0.0beta%'; |
|
24 |
|
|
25 |
-- update title using node information |
|
14 | 26 |
update doi_registration doi |
15 | 27 |
set title = child.nodedata |
16 | 28 |
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child |
... | ... | |
23 | 35 |
and nodes.nodename = 'title' |
24 | 36 |
and parent.nodename = 'dataset'; |
25 | 37 |
|
38 |
-- update pubDate using revisions |
|
39 |
update doi_registration doi |
|
40 |
set pub_date = child.nodedata |
|
41 |
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child |
|
42 |
where doi.guid = id.guid |
|
43 |
and id.docid = docs.docid |
|
44 |
and id.rev = docs.rev |
|
45 |
and docs.rootnodeid = nodes.rootnodeid |
|
46 |
and nodes.nodeid = child.parentnodeid |
|
47 |
and nodes.parentnodeid = parent.nodeid |
|
48 |
and nodes.nodename = 'pubDate' |
|
49 |
and parent.nodename = 'dataset'; |
|
50 |
|
|
51 |
-- update publisher using revisions |
|
52 |
update doi_registration doi |
|
53 |
set publisher = child.nodedata |
|
54 |
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child |
|
55 |
where doi.guid = id.guid |
|
56 |
and id.docid = docs.docid |
|
57 |
and id.rev = docs.rev |
|
58 |
and docs.rootnodeid = nodes.rootnodeid |
|
59 |
and nodes.nodeid = child.parentnodeid |
|
60 |
and nodes.parentnodeid = parent.nodeid |
|
61 |
and nodes.nodename = 'publisher' |
|
62 |
and parent.nodename = 'dataset'; |
|
63 |
|
|
26 | 64 |
-- update creator using nodes |
27 | 65 |
-- creator/individualName/surName |
28 | 66 |
-- creator/individualName/givenName |
29 |
-- TODO: use only surname since it is required? |
|
67 |
-- creator/organizationName |
|
68 |
-- TODO: use only first creator based on nodeid sequence |
|
30 | 69 |
update doi_registration doi |
31 | 70 |
set creator = siblingchild.nodedata ||' '|| child.nodedata |
32 | 71 |
from identifier id, |
... | ... | |
65 | 104 |
and nodes.nodename = 'title' |
66 | 105 |
and parent.nodename = 'dataset'; |
67 | 106 |
|
107 |
-- update pubDate using revisions |
|
108 |
update doi_registration doi |
|
109 |
set pub_date = child.nodedata |
|
110 |
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child |
|
111 |
where doi.guid = id.guid |
|
112 |
and id.docid = docs.docid |
|
113 |
and id.rev = docs.rev |
|
114 |
and docs.rootnodeid = nodes.rootnodeid |
|
115 |
and nodes.nodeid = child.parentnodeid |
|
116 |
and nodes.parentnodeid = parent.nodeid |
|
117 |
and nodes.nodename = 'pubDate' |
|
118 |
and parent.nodename = 'dataset'; |
|
68 | 119 |
|
120 |
-- update publisher using revisions |
|
121 |
update doi_registration doi |
|
122 |
set publisher = child.nodedata |
|
123 |
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child |
|
124 |
where doi.guid = id.guid |
|
125 |
and id.docid = docs.docid |
|
126 |
and id.rev = docs.rev |
|
127 |
and docs.rootnodeid = nodes.rootnodeid |
|
128 |
and nodes.nodeid = child.parentnodeid |
|
129 |
and nodes.parentnodeid = parent.nodeid |
|
130 |
and nodes.nodename = 'publisher' |
|
131 |
and parent.nodename = 'dataset'; |
|
132 |
|
|
133 |
|
|
69 | 134 |
-- update creator using revisions |
70 | 135 |
update doi_registration doi |
71 | 136 |
set creator = siblingchild.nodedata ||' '|| child.nodedata |
... | ... | |
95 | 160 |
update doi_registration |
96 | 161 |
set title = replace(title, E'\n', ' '); |
97 | 162 |
|
98 |
COPY (select ezid_account, guid as doi, url, title, creator from doi_registration order by doi) |
|
163 |
update doi_registration |
|
164 |
set title = regexp_replace(title, E'\\s+', ' '); |
|
165 |
|
|
166 |
update doi_registration |
|
167 |
set title = regexp_replace(title, E'^\\s+', ''); |
|
168 |
|
|
169 |
COPY (select ezid_account, guid as doi, url, title, creator, publisher, pub_date from doi_registration order by doi) |
|
99 | 170 |
TO '/tmp/doi_registration.csv' |
100 | 171 |
WITH CSV HEADER; |
101 | 172 |
|
Also available in: Unified diff
add columns: publisher and pub_date. include default values for all columns - even data files should have title.
still a few todos but closer.
https://redmine.dataone.org/issues/2815