2 DROP TABLE reindex.to_do;
3 CREATE TABLE reindex.to_do (id int);
5 DROP TABLE reindex.failed;
6 CREATE TABLE reindex.failed (id int, reason TEXT);
9 --INSERT INTO reindex.to_do SELECT id from biblio.record_entry where not deleted;
11 CREATE OR REPLACE FUNCTION reindex.cleanup_marc(TEXT) RETURNS TEXT as $func$
13 use MARC::File::XML (BinaryEncoding => 'UTF-8');
15 use Unicode::Normalize;
18 MARC::Charset->assume_unicode(1);
21 my $r = MARC::Record->new_from_xml( $xml );
23 for my $f ( $r->field('901') ) {
27 $xml = $r->as_xml_record();
30 $xml =~ s/^<\?xml.+\?\s*>//go;
31 $xml =~ s/>\s+</></go;
34 $xml = decode_utf8($xml);
38 # Convert raw ampersands to entities
39 $xml =~ s/&(?!\S+;)/&/gso;
41 # Convert Unicode characters to entities
42 $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
44 $xml =~ s/[\x00-\x1f]//go;
48 $func$ LANGUAGE PLPERLU;
50 CREATE OR REPLACE FUNCTION reindex.reindex(count INTEGER) RETURNS timestamp AS $$
55 FOR myid IN SELECT reindex.to_do.id FROM reindex.to_do LIMIT count LOOP
58 RAISE NOTICE 'Reindexing id: % -- % of % records', myid, i, count;
59 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
60 -- change this or add more functions as needed
61 UPDATE biblio.record_entry set marc = public.entityize(reindex.cleanup_marc(marc)) where biblio.record_entry.id = myid;
62 EXCEPTION WHEN unique_violation THEN
63 INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unique violation');
64 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
65 RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid;
66 EXCEPTION WHEN OTHERS THEN
67 INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unhandled error');
68 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
69 RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid;