Project

General

Profile

« Previous | Next » 

Revision 7190

add columns: publisher and pub_date. include default values for all columns - even data files should have title.
still a few todos but closer.
https://redmine.dataone.org/issues/2815

View differences:

src/doi_registration.sql
1
-- use default values initially
1 2
select 
2 3
text('testing') as ezid_account,
3 4
doc.doctype,
4 5
id.guid, 
5 6
'https://cn-stage.dataone.org/cn/v1/resolve/' || id.guid as url, 
6
text(null) as title, 
7
text(null) as creator
7
text('Unknown') as title, 
8
text(doc.user_owner) as creator,
9
text(doc.user_owner) as publisher,
10
to_char(doc.date_created, 'YYYY') as pub_date
8 11
into table doi_registration 
9 12
from identifier id, xml_documents doc
10 13
where guid like 'doi%'
11 14
and id.docid = doc.docid;
12 15

  
13
-- update title using nodes
16
--update defaults
17
update doi_registration
18
set title = 'Data file'
19
where doctype = 'BIN';
20

  
21
update doi_registration
22
set title = 'Legacy EML file'
23
where doctype like '%eml%2.0.0beta%';
24

  
25
-- update title using node information
14 26
update doi_registration doi
15 27
set title = child.nodedata
16 28
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
......
23 35
and nodes.nodename = 'title'
24 36
and parent.nodename = 'dataset';
25 37

  
38
-- update pubDate using revisions
39
update doi_registration doi
40
set pub_date = child.nodedata
41
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
42
where doi.guid = id.guid
43
and id.docid = docs.docid
44
and id.rev = docs.rev
45
and docs.rootnodeid = nodes.rootnodeid
46
and nodes.nodeid = child.parentnodeid
47
and nodes.parentnodeid = parent.nodeid
48
and nodes.nodename = 'pubDate'
49
and parent.nodename = 'dataset';
50

  
51
-- update publisher using revisions
52
update doi_registration doi
53
set publisher = child.nodedata
54
from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child
55
where doi.guid = id.guid
56
and id.docid = docs.docid
57
and id.rev = docs.rev
58
and docs.rootnodeid = nodes.rootnodeid
59
and nodes.nodeid = child.parentnodeid
60
and nodes.parentnodeid = parent.nodeid
61
and nodes.nodename = 'publisher'
62
and parent.nodename = 'dataset';
63

  
26 64
-- update creator using nodes
27 65
-- creator/individualName/surName
28 66
-- creator/individualName/givenName
29
-- TODO: use only surname since it is required?
67
-- creator/organizationName
68
-- TODO: use only first creator based on nodeid sequence
30 69
update doi_registration doi
31 70
set creator = siblingchild.nodedata ||' '|| child.nodedata
32 71
from identifier id, 
......
65 104
and nodes.nodename = 'title'
66 105
and parent.nodename = 'dataset';
67 106

  
107
-- update pubDate using revisions
108
update doi_registration doi
109
set pub_date = child.nodedata
110
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
111
where doi.guid = id.guid
112
and id.docid = docs.docid
113
and id.rev = docs.rev
114
and docs.rootnodeid = nodes.rootnodeid
115
and nodes.nodeid = child.parentnodeid
116
and nodes.parentnodeid = parent.nodeid
117
and nodes.nodename = 'pubDate'
118
and parent.nodename = 'dataset';
68 119

  
120
-- update publisher using revisions
121
update doi_registration doi
122
set publisher = child.nodedata
123
from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child
124
where doi.guid = id.guid
125
and id.docid = docs.docid
126
and id.rev = docs.rev
127
and docs.rootnodeid = nodes.rootnodeid
128
and nodes.nodeid = child.parentnodeid
129
and nodes.parentnodeid = parent.nodeid
130
and nodes.nodename = 'publisher'
131
and parent.nodename = 'dataset';
132

  
133

  
69 134
-- update creator using revisions
70 135
update doi_registration doi
71 136
set creator = siblingchild.nodedata ||' '|| child.nodedata
......
95 160
update doi_registration
96 161
set title = replace(title, E'\n', ' ');
97 162

  
98
COPY (select ezid_account, guid as doi, url, title, creator from doi_registration order by doi) 
163
update doi_registration
164
set title = regexp_replace(title, E'\\s+', ' ');
165

  
166
update doi_registration
167
set title = regexp_replace(title, E'^\\s+', '');
168

  
169
COPY (select ezid_account, guid as doi, url, title, creator, publisher, pub_date from doi_registration order by doi) 
99 170
TO '/tmp/doi_registration.csv'
100 171
WITH CSV HEADER;
101 172

  

Also available in: Unified diff