EG install scripts: nonprod-28 -> nonprod
[sitka/sitka-tools.git] / reindexing / reindex.sql
CommitLineData
3b257416
JF
1
2DROP TABLE reindex.to_do;
3CREATE TABLE reindex.to_do (id int);
4
5DROP TABLE reindex.failed;
6CREATE TABLE reindex.failed (id int, reason TEXT);
7
8-- INSERT ALL THE BIBS
69800a8e 9INSERT INTO reindex.to_do SELECT id from biblio.record_entry where not deleted;
3b257416
JF
10
11CREATE OR REPLACE FUNCTION reindex.cleanup_marc(TEXT) RETURNS TEXT as $func$
12use MARC::Record;
13use MARC::File::XML (BinaryEncoding => 'UTF-8');
14use MARC::Charset;
15use Unicode::Normalize;
69800a8e 16use Encode qw(decode_utf8);
3b257416
JF
17use strict;
18
19MARC::Charset->assume_unicode(1);
20
21my $xml = shift;
22my $r = MARC::Record->new_from_xml( $xml );
23
24for my $f ( $r->field('901') ) {
25 $r->delete_field($f)
26}
27
28$xml = $r->as_xml_record();
29
30$xml =~ s/\n//sgo;
31$xml =~ s/^<\?xml.+\?\s*>//go;
32$xml =~ s/>\s+</></go;
33$xml =~ s/\p{Cc}//go;
34
35$xml = decode_utf8($xml);
36
37$xml = NFC($xml);
38
39# Convert raw ampersands to entities
40$xml =~ s/&(?!\S+;)/&amp;/gso;
41
42# Convert Unicode characters to entities
43$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
44
45$xml =~ s/[\x00-\x1f]//go;
46
47
48return $xml;
49$func$ LANGUAGE PLPERLU;
50
51CREATE OR REPLACE FUNCTION reindex.reindex(count INTEGER) RETURNS timestamp AS $$
52DECLARE
53 myid INTEGER := 0;
54 i INTEGER := 0;
55BEGIN
56 FOR myid IN SELECT reindex.to_do.id FROM reindex.to_do LIMIT count LOOP
57 i = i + 1;
58 BEGIN
59 RAISE NOTICE 'Reindexing id: % -- % of % records', myid, i, count;
60 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
61 -- change this or add more functions as needed
62 UPDATE biblio.record_entry set marc = public.entityize(reindex.cleanup_marc(marc)) where biblio.record_entry.id = myid;
63 EXCEPTION WHEN unique_violation THEN
64 INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unique violation');
65 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
66 RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid;
b1f99bb8 67 WHEN OTHERS THEN
3b257416
JF
68 INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unhandled error');
69 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
70 RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid;
71 END;
72 END LOOP;
73 RETURN now();
74
75END;
76$$ LANGUAGE plpgsql;