2 DROP TABLE reindex.to_do;
3 CREATE TABLE reindex.to_do (id int);
5 DROP TABLE reindex.failed;
6 CREATE TABLE reindex.failed (id int, reason TEXT);
9 INSERT INTO reindex.to_do SELECT id from biblio.record_entry where not deleted;
11 CREATE OR REPLACE FUNCTION reindex.cleanup_marc(TEXT) RETURNS TEXT as $func$
13 use MARC::File::XML (BinaryEncoding => 'UTF-8');
15 use Unicode::Normalize;
16 use Encode qw(decode_utf8);
19 MARC::Charset->assume_unicode(1);
22 my $r = MARC::Record->new_from_xml( $xml );
24 for my $f ( $r->field('901') ) {
28 $xml = $r->as_xml_record();
31 $xml =~ s/^<\?xml.+\?\s*>//go;
32 $xml =~ s/>\s+</></go;
35 $xml = decode_utf8($xml);
39 # Convert raw ampersands to entities
40 $xml =~ s/&(?!\S+;)/&/gso;
42 # Convert Unicode characters to entities
43 $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
45 $xml =~ s/[\x00-\x1f]//go;
49 $func$ LANGUAGE PLPERLU;
51 CREATE OR REPLACE FUNCTION reindex.reindex(count INTEGER) RETURNS timestamp AS $$
56 FOR myid IN SELECT reindex.to_do.id FROM reindex.to_do LIMIT count LOOP
59 RAISE NOTICE 'Reindexing id: % -- % of % records', myid, i, count;
60 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
61 -- change this or add more functions as needed
62 UPDATE biblio.record_entry set marc = public.entityize(reindex.cleanup_marc(marc)) where biblio.record_entry.id = myid;
63 EXCEPTION WHEN unique_violation THEN
64 INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unique violation');
65 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
66 RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid;
68 INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unhandled error');
69 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
70 RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid;