Commit | Line | Data |
---|---|---|
3b257416 JF |
1 | |
2 | DROP TABLE reindex.to_do; | |
3 | CREATE TABLE reindex.to_do (id int); | |
4 | ||
5 | DROP TABLE reindex.failed; | |
6 | CREATE TABLE reindex.failed (id int, reason TEXT); | |
7 | ||
8 | -- INSERT ALL THE BIBS | |
9 | --INSERT INTO reindex.to_do SELECT id from biblio.record_entry where not deleted; | |
10 | ||
11 | CREATE OR REPLACE FUNCTION reindex.cleanup_marc(TEXT) RETURNS TEXT as $func$ | |
12 | use MARC::Record; | |
13 | use MARC::File::XML (BinaryEncoding => 'UTF-8'); | |
14 | use MARC::Charset; | |
15 | use Unicode::Normalize; | |
16 | use strict; | |
17 | ||
18 | MARC::Charset->assume_unicode(1); | |
19 | ||
20 | my $xml = shift; | |
21 | my $r = MARC::Record->new_from_xml( $xml ); | |
22 | ||
23 | for my $f ( $r->field('901') ) { | |
24 | $r->delete_field($f) | |
25 | } | |
26 | ||
27 | $xml = $r->as_xml_record(); | |
28 | ||
29 | $xml =~ s/\n//sgo; | |
30 | $xml =~ s/^<\?xml.+\?\s*>//go; | |
31 | $xml =~ s/>\s+</></go; | |
32 | $xml =~ s/\p{Cc}//go; | |
33 | ||
34 | $xml = decode_utf8($xml); | |
35 | ||
36 | $xml = NFC($xml); | |
37 | ||
38 | # Convert raw ampersands to entities | |
39 | $xml =~ s/&(?!\S+;)/&/gso; | |
40 | ||
41 | # Convert Unicode characters to entities | |
42 | $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; | |
43 | ||
44 | $xml =~ s/[\x00-\x1f]//go; | |
45 | ||
46 | ||
47 | return $xml; | |
48 | $func$ LANGUAGE PLPERLU; | |
49 | ||
50 | CREATE OR REPLACE FUNCTION reindex.reindex(count INTEGER) RETURNS timestamp AS $$ | |
51 | DECLARE | |
52 | myid INTEGER := 0; | |
53 | i INTEGER := 0; | |
54 | BEGIN | |
55 | FOR myid IN SELECT reindex.to_do.id FROM reindex.to_do LIMIT count LOOP | |
56 | i = i + 1; | |
57 | BEGIN | |
58 | RAISE NOTICE 'Reindexing id: % -- % of % records', myid, i, count; | |
59 | DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid; | |
60 | -- change this or add more functions as needed | |
61 | UPDATE biblio.record_entry set marc = public.entityize(reindex.cleanup_marc(marc)) where biblio.record_entry.id = myid; | |
62 | EXCEPTION WHEN unique_violation THEN | |
63 | INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unique violation'); | |
64 | DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid; | |
65 | RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid; | |
66 | EXCEPTION WHEN OTHERS THEN | |
67 | INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unhandled error'); | |
68 | DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid; | |
69 | RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid; | |
70 | END; | |
71 | END LOOP; | |
72 | RETURN now(); | |
73 | ||
74 | END; | |
75 | $$ LANGUAGE plpgsql; |