From 4856f9b1ff5abaa03bc8b1f43c3f11157cb0aadb Mon Sep 17 00:00:00 2001 From: James Fournie Date: Thu, 23 Feb 2012 15:19:26 -0800 Subject: [PATCH] Updated overdrive ingest scripts --- marc_import_overdrive/overdrive-import.pl | 119 ++++++++++++--------- marc_import_overdrive/overdrive-ingest-db-func.sql | 26 ++--- 2 files changed, 81 insertions(+), 64 deletions(-) diff --git a/marc_import_overdrive/overdrive-import.pl b/marc_import_overdrive/overdrive-import.pl index 563a193..8759ba2 100755 --- a/marc_import_overdrive/overdrive-import.pl +++ b/marc_import_overdrive/overdrive-import.pl @@ -9,6 +9,8 @@ use OpenILS::Utils::Fieldmapper; use Digest::MD5 qw/md5_hex/; use OpenSRF::Utils::JSON; use OpenILS::Application::AppUtils; +use OpenSRF::AppSession; +use OpenSRF::System; use Data::Dumper; use Unicode::Normalize; use Encode; @@ -21,18 +23,18 @@ use MARC::File::XML ( BinaryEncoding => 'utf-8' ); use MARC::Charset; use DBI; + #MARC::Charset->ignore_errors(1); my ($config, $idlfile, $marctype, $enc) = ('/srv/openils/conf/opensrf_core.xml', '/srv/openils/conf/fm_IDL.xml', 'USMARC', 'utf8'); -my (@files, @trash_fields, @req_fields, $quiet, $startid); +my (@files, @trash_fields, $quiet, $startid, $verbose, $directload, $overwritetcn); + +my ($baseorg,$overdrive_prefix,$tcn_prefix) = ('SITKA','http\:\/\/downloads\.bclibrary\.ca\/ContentDetails\.htm\?ID\=',"Overdrive_"); -my @targetorg = ('BPR','BFN','BTE'); +my @req_fields = ('856'); -@req_fields = ('856'); -my $overdrive_prefix = 'http\:\/\/downloads\.bclibrary\.ca\/ContentDetails\.htm\?ID\='; -my $tcn_prefix = "LtG_"; GetOptions( 'marctype=s' => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML @@ -43,9 +45,17 @@ GetOptions( 'trash=s' => \@trash_fields, # fields to remove from all processed records 'xml_idl=s' => \$idlfile, # location of XML IDL file, defaults to /openils/conf/fm_IDL.xml 'startid=i' => \$startid, #starting ID - 'quiet' => \$quiet # do not output progress count + 'direct_load' => \$directload, #starting ID + 'overwritetcn' => \$overwritetcn, #starting ID + 'quiet' => \$quiet, # do not output progress count + 'verbose' => \$verbose # do not output progress count ); +my $U = "OpenILS::Application::AppUtils"; +OpenSRF::System->bootstrap_client( config_file => $config ); +Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); +my $cstore = OpenSRF::AppSession->connect('open-ils.cstore'); + @trash_fields = split(/,/,join(',',@trash_fields)); @req_fields = split(/,/,join(',',@req_fields)); @@ -75,11 +85,10 @@ my $starttime = time; my $rec; my $count = 0; -my $id = 1 || $startid; +my $id = $startid || 1; PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { next if ($rec == -1); - print STDERR "======\n"; $id++; $count++; @@ -96,7 +105,7 @@ PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { # ----------------- my $tcn_value; - my $tcn_source = 'Library To Go'; + my $tcn_source = 'Overdrive'; my $caption = 'DOWNLOADABLE AUDIOBOOK'; @@ -115,20 +124,6 @@ PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { warn "856 has no URL in rec $id. Skipping"; } - # this record has been through Evergreen if a $9 exists somewhere - # instead, we can presumably pull the tcn from the 901 and generate our base field that way - if($uri->subfield('9')){ - # if this doesn't match our prefix, ignore it, we only care about our current prefix - next unless($url =~ m/($overdrive_prefix)/); - - # if it does: - # delete $9 subfield and use this as a base uri field - $baseurifield = $uri->clone; - $rec->delete_fields($uri); - next URIFIELD; - } - - if($uri->subfield('z')){ $caption = 'EBOOK' if ($uri->subfield('z') =~ /Book/); $uri->delete_subfield(code => 'z'); @@ -143,32 +138,15 @@ PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { # make it TCN-ish $tcn_value = $tcn_prefix . $overdrivekey; - # we have a base for our scoped fields - $baseurifield = $uri->clone; - - $rec->delete_fields($uri); + # add our base $9 + $uri->add_subfields('9' => $baseorg); } } - if(!$baseurifield){ - die "the horror!"; - } # add some arbitrary stuff as prescribed by our cataloguer overlords $rec = adjust_leader($rec); $rec = process_custom_fields($rec); - - # add a scoped field for each org unit in our array - foreach(@targetorg){ - if($baseurifield){ - my $newfield = $baseurifield->clone(); - $newfield->add_subfields('9' => $_); - $rec->insert_fields_ordered($newfield); - } else { - die;# $rec->as_formatted(); - } - } - # ----------------- # END Overdrive - specific code # ----------------- @@ -186,14 +164,9 @@ PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { $rec->insert_fields_ordered($field901); - print $rec->as_formatted(); + #print $rec->as_formatted(); - next PROCESS; - - - $tcn_value = $rec->subfield('901' => 'a'); - $tcn_source = $rec->subfield('901' => 'b'); - $id = $rec->subfield('901' => 'c'); + #next PROCESS; (my $xml = $rec->as_xml_record()) =~ s/\n//sog; $xml =~ s/^<\?xml.+\?\s*>//go; @@ -215,17 +188,61 @@ PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { $bib->tcn_value($tcn_value); $bib->last_xact_id('IMPORT-'.$starttime); - #print OpenSRF::Utils::JSON->perl2JSON($bib)."\n"; + if(my $dupetcn = $U->simplereq("open-ils.search","open-ils.search.biblio.tcn",$tcn_value)){ + if($dupetcn->{count} > 0){ + my $newid = $dupetcn->{ids}[0]; + if($overwritetcn){ + $bib->id($newid); + $cstore->request("open-ils.cstore.transaction.begin")->gather(1); + my $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.update",$bib)->gather(1); + $cstore->request("open-ils.cstore.transaction.commit")->gather(1); + print STDERR "overwriting TCN: $tcn_value (id:$id/newid:$newid)\n" if $verbose; + } else { + print STDERR "duplicate TCN: $tcn_value - (id: $id) not imported \n" if $verbose; + } + next PROCESS; + } + } + + if($directload){ + my $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.retrieve",$id)->gather(1); + if($req){ + if($req->tcn_value ne $tcn_value){ + print STDERR "incoming record (id:$id/tcn:$tcn_value) does not match record (id:".$req->id."/tcn:".$rec->tcn_value.")!\n" if $verbose; + if($overwritetcn){ + + } + } elsif (($req->tcn_value eq $tcn_value) && $overwritetcn){ + print STDERR "matching record overlaied with incoming record (id:$id/tcn:$tcn_value)\n" if $verbose; + + $cstore->request("open-ils.cstore.transaction.begin")->gather(1); + $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.update",$bib)->gather(1); + $cstore->request("open-ils.cstore.transaction.commit")->gather(1); + } + } else { + print STDERR "importing new record (id:$id/tcn:$tcn_value)\n" if $verbose; + + $cstore->request("open-ils.cstore.transaction.begin")->gather(1); + $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.create",$bib)->gather(1); + $cstore->request("open-ils.cstore.transaction.commit")->gather(1); + } + print Dumper($req); + } else { + print OpenSRF::Utils::JSON->perl2JSON($bib)."\n"; + } if (!$quiet){# && !($count % 50)) { print STDERR "\r$count\t". $count / (time - $starttime); } } +$cstore->disconnect(); + sub adjust_leader { my $rec = shift; my $leader = $rec->leader(); - $leader = substr($leader,0,5) . 'm' . substr($leader,6,length($leader)); + #$leader = substr($leader,0,5) . 'm' . substr($leader,6,length($leader)); + $leader = substr($leader,0,9) . 'a' . substr($leader,10,length($leader)); $rec->leader($leader); return $rec; } diff --git a/marc_import_overdrive/overdrive-ingest-db-func.sql b/marc_import_overdrive/overdrive-ingest-db-func.sql index 51cadeb..c891615 100755 --- a/marc_import_overdrive/overdrive-ingest-db-func.sql +++ b/marc_import_overdrive/overdrive-ingest-db-func.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION sitka.overdrive_MARCXML_delete_all_uri_fields( TEXT ) RETURNS TEXT AS $func$ +CREATE OR REPLACE FUNCTION sitka.overdrive_bc_delete_all_uri_fields( TEXT ) RETURNS TEXT AS $func$ use MARC::Record; use MARC::File::XML (BinaryEncoding => 'UTF-8'); @@ -13,7 +13,7 @@ CREATE OR REPLACE FUNCTION sitka.overdrive_MARCXML_delete_all_uri_fields( TEXT ) next URIFIELD if ($uri->subfield('3')); # otherwise, delete away - $r->delete_fields($uri); + $r->delete_field($uri); } $xml = $r->as_xml_record(); @@ -23,7 +23,7 @@ CREATE OR REPLACE FUNCTION sitka.overdrive_MARCXML_delete_all_uri_fields( TEXT ) return $xml; $func$ LANGUAGE PLPERLU; -CREATE OR REPLACE FUNCTION sitka.overdrive_MARCXML_add_uri_field( TEXT, TEXT, TEXT, TEXT, TEXT ) RETURNS TEXT AS $func$ +CREATE OR REPLACE FUNCTION sitka.overdrive_bc_add_uri_field( TEXT, TEXT, TEXT, TEXT, TEXT ) RETURNS TEXT AS $func$ use MARC::Record; use MARC::File::XML (BinaryEncoding => 'UTF-8'); @@ -73,7 +73,7 @@ DECLARE urlprefix TEXT := 'http://downloads.bclibrary.ca/ContentDetails.htm?ID='; urlcaption TEXT := 'Click to access online (library card required)'; BEGIN - EXECUTE SELECT sitka.overdrive_MARCXML_delete_all_uri_fields(marc) FROM biblio.record_entry where id = rid INTO marcxml; + SELECT sitka.overdrive_bc_delete_all_uri_fields(marc) FROM biblio.record_entry where id = rid INTO marcxml; -- clean out asset.call_number and asset.uri and asset.uri_call_number_map @@ -82,15 +82,15 @@ BEGIN -- from actor.org_unit_setting b join actor.org_unit a on (a.id = b.org_unit) where b.name = 'sitka.overdrive_setting' and b.value = true -- something like that - EXECUTE SELECT sitka.overdrive_bc_add_uri_field (rid, tcnprefix, urlprefix, urlcaption, 'BBGVL') INTO marcxml; - EXECUTE SELECT sitka.overdrive_bc_add_uri_field (rid, tcnprefix, urlprefix, urlcaption, 'BBNCLF') INTO marcxml; - EXECUTE SELECT sitka.overdrive_bc_add_uri_field (rid, tcnprefix, urlprefix, urlcaption, 'BCK') INTO marcxml; - EXECUTE SELECT sitka.overdrive_bc_add_uri_field (rid, tcnprefix, urlprefix, urlcaption, 'BNCLF') INTO marcxml; - EXECUTE SELECT sitka.overdrive_bc_add_uri_field (rid, tcnprefix, urlprefix, urlcaption, 'BNELF') INTO marcxml; - EXECUTE SELECT sitka.overdrive_bc_add_uri_field (rid, tcnprefix, urlprefix, urlcaption, 'ISLANDLINK') INTO marcxml; - EXECUTE SELECT sitka.overdrive_bc_add_uri_field (rid, tcnprefix, urlprefix, urlcaption, 'UNFEDERATED') INTO marcxml; + SELECT sitka.overdrive_bc_add_uri_field(marcxml::TEXT, tcnprefix::TEXT, urlprefix::TEXT, urlcaption::TEXT, 'BBGVL'::TEXT) INTO marcxml; + SELECT sitka.overdrive_bc_add_uri_field(marcxml::TEXT, tcnprefix::TEXT, urlprefix::TEXT, urlcaption::TEXT, 'BBNCLF'::TEXT) INTO marcxml; + SELECT sitka.overdrive_bc_add_uri_field(marcxml::TEXT, tcnprefix::TEXT, urlprefix::TEXT, urlcaption::TEXT, 'BCK'::TEXT) INTO marcxml; + SELECT sitka.overdrive_bc_add_uri_field(marcxml::TEXT, tcnprefix::TEXT, urlprefix::TEXT, urlcaption::TEXT, 'BNCLF'::TEXT) INTO marcxml; + SELECT sitka.overdrive_bc_add_uri_field(marcxml::TEXT, tcnprefix::TEXT, urlprefix::TEXT, urlcaption::TEXT, 'BNELF'::TEXT) INTO marcxml; + SELECT sitka.overdrive_bc_add_uri_field(marcxml::TEXT, tcnprefix::TEXT, urlprefix::TEXT, urlcaption::TEXT, 'ISLANDLINK'::TEXT) INTO marcxml; + SELECT sitka.overdrive_bc_add_uri_field(marcxml::TEXT, tcnprefix::TEXT, urlprefix::TEXT, urlcaption::TEXT, 'UNFEDERATED'::TEXT) INTO marcxml; - EXECUTE UPDATE biblio.record_entry set marc = marcxml WHERE id = rid; + UPDATE biblio.record_entry set marc = marcxml WHERE id = rid; return true; END; -$$ LANGUAGE PLPGSQL IMMUTABLE; +$$ LANGUAGE PLPGSQL; -- 2.3.6