Revision 7414
Added by ben leinfelder about 12 years ago
src/doi_registration.sql | ||
---|---|---|
629 | 629 |
end |
630 | 630 |
where publisher = creator; |
631 | 631 |
|
632 |
-- entity references |
|
633 |
update doi_registration |
|
634 |
set publisher = regexp_replace(publisher, '&', '&', 'g'); |
|
635 |
update doi_registration |
|
636 |
set publisher = regexp_replace(publisher, ''', E'\'', 'g'); |
|
637 |
--accented i (í) |
|
638 |
update doi_registration |
|
639 |
set publisher = regexp_replace(publisher, 'í', 'í', 'g') |
|
640 |
where publisher like '%í%'; |
|
641 |
update doi_registration |
|
642 |
set creator = regexp_replace(creator, '&', '&', 'g'); |
|
643 |
update doi_registration |
|
644 |
set creator = regexp_replace(creator, ''', E'\'', 'g'); |
|
645 |
|
|
646 |
--titles |
|
647 |
update doi_registration |
|
648 |
set title = regexp_replace(title, '&', '&', 'g'); |
|
649 |
update doi_registration |
|
650 |
set title = regexp_replace(title, '∓', '&', 'g') |
|
651 |
where title like '%∓%'; |
|
652 |
update doi_registration |
|
653 |
set title = regexp_replace(title, ''', E'\'', 'g'); |
|
654 |
update doi_registration |
|
655 |
set title = regexp_replace(title, '"', E'\'', 'g'); |
|
656 |
update doi_registration |
|
657 |
set title = regexp_replace(title, '>', '>', 'g'); |
|
658 |
update doi_registration |
|
659 |
set title = regexp_replace(title, '<', '<', 'g'); |
|
660 |
update doi_registration |
|
661 |
set title = regexp_replace(title, 'í', 'í', 'g') |
|
662 |
where title like '%í%'; |
|
663 |
update doi_registration |
|
664 |
set title = regexp_replace(title, 'í', 'í', 'g') |
|
665 |
where title like '%í%'; |
|
666 |
update doi_registration |
|
667 |
set title = regexp_replace(title, 'ñ', 'í', 'g') |
|
668 |
where title like '%ñ%'; |
|
669 |
update doi_registration |
|
670 |
set title = regexp_replace(title, 'ó', 'ó', 'g') |
|
671 |
where title like '%ó%'; |
|
672 |
update doi_registration |
|
673 |
set title = regexp_replace(title, 'ã', 'ã', 'g') |
|
674 |
where title like '%ã%'; |
|
675 |
update doi_registration |
|
676 |
set title = regexp_replace(title, 'ç', 'ç', 'g') |
|
677 |
where title like '%ç%'; |
|
678 |
update doi_registration |
|
679 |
set title = regexp_replace(title, 'é', 'é', 'g') |
|
680 |
where title like '%é%'; |
|
681 |
update doi_registration |
|
682 |
set title = regexp_replace(title, 'ñ', 'ñ', 'g') |
|
683 |
where title like '%ñ%'; |
|
684 |
update doi_registration |
|
685 |
set title = regexp_replace(title, '–', '-', 'g') |
|
686 |
where title like '%–%'; |
|
687 |
update doi_registration |
|
688 |
set title = regexp_replace(title, '’', E'\'', 'g') |
|
689 |
where title like '%’%'; |
|
690 |
update doi_registration |
|
691 |
set title = regexp_replace(title, '–', '/', 'g') |
|
692 |
where title like '%–%'; |
|
693 |
update doi_registration |
|
694 |
set title = regexp_replace(title, 'á', 'á', 'g') |
|
695 |
where title like '%á%'; |
|
696 |
update doi_registration |
|
697 |
set title = regexp_replace(title, 'ó', 'ó', 'g') |
|
698 |
where title like '%ó%'; |
|
699 |
--格式範例 (格式范例) |
|
700 |
update doi_registration |
|
701 |
set title = regexp_replace(title, '格式範例', '格式范例', 'g') |
|
702 |
where title like '%格式範例%'; |
|
703 |
|
|
704 |
--directional quotes |
|
705 |
update doi_registration |
|
706 |
set title = regexp_replace(title, '”', '"', 'g') |
|
707 |
where title like '%”%'; |
|
708 |
update doi_registration |
|
709 |
set title = regexp_replace(title, '“', '"', 'g') |
|
710 |
where title like '%“%'; |
|
711 |
--one off apostrophe |
|
712 |
update doi_registration |
|
713 |
set title = regexp_replace(title, 'Õ', E'\'', 'g') |
|
714 |
where title like '%Õ%'; |
|
715 |
update doi_registration |
|
716 |
set title = regexp_replace(title, '–', '-', 'g') |
|
717 |
where title like '%–%'; |
|
718 |
|
|
719 |
--after reading the complete metadata, I could interpret these as "greater than" |
|
720 |
update doi_registration |
|
721 |
set title = regexp_replace(title, '&t;', '>', 'g') |
|
722 |
where title like '%&t;%' |
|
723 |
and guid in ('doi:10.6073/AA/knb-lter-luq.15.1', 'doi:10.6073/AA/knb-lter-luq.39.1', 'doi:10.6073/AA/knb-lter-luq.37.1', 'doi:10.6073/AA/knb-lter-luq.40.1', 'doi:10.6073/AA/knb-lter-luq.18.1' ); |
|
724 |
|
|
725 |
|
|
726 |
--special characters |
|
727 |
update doi_registration |
|
728 |
set creator = regexp_replace(creator, 'é', 'é', 'g') |
|
729 |
where creator like '%é%'; |
|
730 |
update doi_registration |
|
731 |
set creator = regexp_replace(creator, 'ç', 'ç', 'g') |
|
732 |
where creator like '%ç%'; |
|
733 |
update doi_registration |
|
734 |
set creator = regexp_replace(creator, 'ñ', 'ñ', 'g') |
|
735 |
where creator like '%ñ%'; |
|
736 |
-- Gutiérrez, Ralph, J. |
|
737 |
-- Liébault, Frédéric |
|
738 |
update doi_registration |
|
739 |
set creator = regexp_replace(creator, 'é', 'é', 'g') |
|
740 |
where creator like '%é%'; |
|
741 |
-- Ramírez, Alonso |
|
742 |
update doi_registration |
|
743 |
set creator = regexp_replace(creator, 'í', 'í', 'g') |
|
744 |
where creator like '%í%'; |
|
745 |
-- González, Grizelle |
|
746 |
update doi_registration |
|
747 |
set creator = regexp_replace(creator, 'á', 'á', 'g') |
|
748 |
where creator like '%á%'; |
|
749 |
--王, (王, 名, 姓) |
|
750 |
update doi_registration |
|
751 |
set creator = regexp_replace(creator, '王', '王', 'g') |
|
752 |
where creator like '%王%'; |
|
753 |
--名 |
|
754 |
update doi_registration |
|
755 |
set creator = regexp_replace(creator, '名', '名', 'g') |
|
756 |
where creator like '%名%'; |
|
757 |
--姓 |
|
758 |
update doi_registration |
|
759 |
set creator = regexp_replace(creator, '姓', '姓', 'g') |
|
760 |
where creator like '%姓%'; |
|
761 |
-- These are left |
|
762 |
-- Helmbrecht, S ’rai |
|
763 |
-- lkñlkl, Fgfggf |
|
764 |
|
|
765 |
-- clean up |
|
766 |
update doi_registration |
|
767 |
set publisher = replace(publisher, E'\n', ' '); |
|
768 |
update doi_registration |
|
769 |
set publisher = regexp_replace(publisher, E'\\s+', ' ', 'g'); |
|
770 |
update doi_registration |
|
771 |
set publisher = regexp_replace(publisher, E'^\\s+', '', 'g'); |
|
772 |
|
|
773 |
-- use only year for pub_date |
|
774 |
update doi_registration |
|
775 |
set pub_date = to_char(date(pub_date), 'YYYY') |
|
776 |
where length(pub_date) > 4 |
|
777 |
and pub_date like '%-%-%'; |
|
778 |
|
|
779 |
--set the rest to upload date, xml_documents |
|
780 |
update doi_registration doi |
|
781 |
set pub_date = to_char(date_created, 'YYYY') |
|
782 |
from xml_documents x, identifier id |
|
783 |
where length(pub_date) > 4 |
|
784 |
and doi.guid = id.guid |
|
785 |
and id.docid = x.docid |
|
786 |
and id.rev = x.rev; |
|
787 |
-- revisions |
|
788 |
update doi_registration doi |
|
789 |
set pub_date = to_char(date_created, 'YYYY') |
|
790 |
from xml_revisions x, identifier id |
|
791 |
where length(pub_date) > 4 |
|
792 |
and doi.guid = id.guid |
|
793 |
and id.docid = x.docid |
|
794 |
and id.rev = x.rev; |
|
795 |
|
|
632 | 796 |
-- copy to the external file |
633 | 797 |
COPY |
634 | 798 |
(select ezid_account, |
Also available in: Unified diff
include John Kunze's latest suggestions for improved metadata -- a lot of clean-up, especially on characters in the file. Note UTF-8 encoding of the script.