Fixed hashref error
[sitka/sitka-tools.git] / reindexing / reindex.sql
CommitLineData
3b257416
JF
1
2DROP TABLE reindex.to_do;
3CREATE TABLE reindex.to_do (id int);
4
5DROP TABLE reindex.failed;
6CREATE TABLE reindex.failed (id int, reason TEXT);
7
8-- INSERT ALL THE BIBS
9--INSERT INTO reindex.to_do SELECT id from biblio.record_entry where not deleted;
10
11CREATE OR REPLACE FUNCTION reindex.cleanup_marc(TEXT) RETURNS TEXT as $func$
12use MARC::Record;
13use MARC::File::XML (BinaryEncoding => 'UTF-8');
14use MARC::Charset;
15use Unicode::Normalize;
16use strict;
17
18MARC::Charset->assume_unicode(1);
19
20my $xml = shift;
21my $r = MARC::Record->new_from_xml( $xml );
22
23for my $f ( $r->field('901') ) {
24 $r->delete_field($f)
25}
26
27$xml = $r->as_xml_record();
28
29$xml =~ s/\n//sgo;
30$xml =~ s/^<\?xml.+\?\s*>//go;
31$xml =~ s/>\s+</></go;
32$xml =~ s/\p{Cc}//go;
33
34$xml = decode_utf8($xml);
35
36$xml = NFC($xml);
37
38# Convert raw ampersands to entities
39$xml =~ s/&(?!\S+;)/&amp;/gso;
40
41# Convert Unicode characters to entities
42$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
43
44$xml =~ s/[\x00-\x1f]//go;
45
46
47return $xml;
48$func$ LANGUAGE PLPERLU;
49
50CREATE OR REPLACE FUNCTION reindex.reindex(count INTEGER) RETURNS timestamp AS $$
51DECLARE
52 myid INTEGER := 0;
53 i INTEGER := 0;
54BEGIN
55 FOR myid IN SELECT reindex.to_do.id FROM reindex.to_do LIMIT count LOOP
56 i = i + 1;
57 BEGIN
58 RAISE NOTICE 'Reindexing id: % -- % of % records', myid, i, count;
59 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
60 -- change this or add more functions as needed
61 UPDATE biblio.record_entry set marc = public.entityize(reindex.cleanup_marc(marc)) where biblio.record_entry.id = myid;
62 EXCEPTION WHEN unique_violation THEN
63 INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unique violation');
64 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
65 RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid;
66 EXCEPTION WHEN OTHERS THEN
67 INSERT INTO reindex.failed (id, reason) VALUES (myid, 'unhandled error');
68 DELETE FROM reindex.to_do WHERE reindex.to_do.id = myid;
69 RAISE NOTICE '% had a unique key violation and could not be reindexed!', myid;
70 END;
71 END LOOP;
72 RETURN now();
73
74END;
75$$ LANGUAGE plpgsql;