#!/usr/bin/perl # vim:et:sw=4:ts=4: use strict; use warnings; use bytes; use OpenSRF::System; use OpenSRF::EX qw/:try/; use OpenSRF::AppSession; use OpenSRF::Utils::JSON; use OpenSRF::Utils::SettingsClient; use OpenILS::Application::AppUtils; use OpenILS::Utils::Fieldmapper; use OpenILS::Utils::CStoreEditor; use MARC::Record; use MARC::File::XML; use UNIVERSAL::require; use Time::HiRes qw/time/; use Getopt::Long; use Config::Simple; use Data::Dumper; my @formats = qw/USMARC UNIMARC XML BRE ARE/; my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records,$quiet) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef,0); my ($exclusion_ini,$collapse_to_depth, $output_file); my $cfg; my $force901; my $onlyholdings; GetOptions( 'help' => \$help, 'items' => \$holdings, 'mfhd' => \$export_mfhd, 'all' => \$all_records, 'location=s' => \$location, 'money=s' => \$dollarsign, 'config=s' => \$config, 'format=s' => \$format, 'type=s' => \$type, 'xml-idl=s' => \$idl, 'encoding=s' => \$encoding, 'timeout=i' => \$timeout, 'force901' => \$force901, 'exclusion_ini=s' => \$exclusion_ini, 'collapse_to_depth=i' => \$collapse_to_depth, 'onlyholdings' => \$onlyholdings, 'output-file=s' => \$output_file, 'quiet' => \$quiet, ); if ($exclusion_ini) { die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini); $cfg = new Config::Simple($exclusion_ini) } if ($help) { print <<"HELP"; This script exports MARC authority, bibliographic, and serial holdings records from an Evergreen database. Input to this script can consist of a list of record IDs, with one record ID per line, corresponding to the record ID in the Evergreen database table of your requested record type. Alternately, passing the --all option will attempt to export all records of the specified type from the Evergreen database. The --all option starts at record ID 1 and increments the ID by 1 until the largest ID in the database is retrieved. This may not be very efficient for databases with large gaps in their ID sequences. Usage: $0 [options] --help or -h This screen. --config or -c Configuration file [/openils/conf/opensrf_core.xml] --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC] --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8] --xml-idl or -x Location of the IDL XML --timeout Timeout for exporting a single record; increase if you are using --holdings and are exporting records that have a lot of items attached to them. --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO] --all or -a Export all records; ignores input list Additional options for type = 'BIBLIO': --items or -i Include items (holdings) in the output --money Currency symbol to use in item price field [\$] --mfhd Export serial MFHD records for associated bib records Not compatible with --format=BRE --location or -l MARC Location Code for holdings from http://www.loc.gov/marc/organizations/orgshome.html Options added by Sitka: --force901 Force-add 901 fields --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below will be collapsed up to the parent org unit at the set depth --onlyholdings Clean out 852s before adding new ones, and only export items that successfully recieved an 852 field Examples: To export a set of USMARC records in a file named "output_file" based on the IDs contained in a file named "list_of_ids": cat list_of_ids | $0 > output_file To export a set of MARC21XML authority records in a file named "output.xml" for all authority records in the database: $0 --format XML --type AUTHORITY --all > output.xml HELP exit; } $type = lc($type); $format = uc($format); $encoding = uc($encoding); my $outfh; my $real_stdout; open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!"; if($output_file) { open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!"; } else { $outfh = $real_stdout; } binmode($outfh, ':raw') if ($encoding ne 'UTF-8'); binmode($outfh, ':utf8') if ($encoding eq 'UTF-8'); if (!grep { $format eq $_ } @formats) { die "Please select a supported format. ". "Right now that means one of [". join('|',@formats). "]\n"; } if ($format ne 'XML') { my $type = 'MARC::File::' . $format; $type->require; } if ($timeout <= 0) { # set default timeout and/or correct silly user who # supplied a negative timeout; default timeout of # 300 seconds if exporting items determined empirically. $timeout = $holdings ? 300 : 1; } OpenSRF::System->bootstrap_client( config_file => $config ); if (!$idl) { $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL"); } Fieldmapper->import(IDL => $idl); my $ses = OpenSRF::AppSession->create('open-ils.cstore'); OpenILS::Utils::CStoreEditor::init(); my $editor = OpenILS::Utils::CStoreEditor->new(); print $outfh <
HEADER my %orgs; my %shelves; my %statuses; my %outypes; my $flesh = {}; if ($holdings) { get_bib_locations(); } my $start = time; my $last_time = time; my %count = ('bib' => 0, 'did' => 0); my $speed = 0; if ($all_records) { my $top_record = 0; if ($type eq 'biblio') { $top_record = $editor->search_biblio_record_entry([ {deleted => 'f'}, {order_by => { 'bre' => 'id DESC' }, limit => 1} ])->[0]->id; } elsif ($type eq 'authority') { $top_record = $editor->search_authority_record_entry([ {deleted => 'f'}, {order_by => { 'are' => 'id DESC' }, limit => 1} ])->[0]->id; } for (my $i = 0; $i++ < $top_record;) { export_record($i); } } else { while ( my $i = <> ) { export_record($i); } } print $outfh "\n" if ($format eq 'XML'); $speed = $count{did} / (time - $start); my $time = time - $start; print STDERR <request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh ); my $s = $r->recv(timeout => $timeout); if (!$s) { warn "\n!!!!! Failed trying to read record $id\n"; return; } if ($r->failed) { warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n"; return; } if ($r->timed_out) { warn "\n!!!!!! Timed out trying to read record $id\n"; return; } $bib = $s->content; $r->finish; $count{bib}++; return unless $bib; # Return if the bib is deleted return if ( $bib->deleted eq 't' ); if ($format eq 'ARE' or $format eq 'BRE') { print $outfh OpenSRF::Utils::JSON->perl2JSON($bib); stats() unless $quiet; $count{did}++; return; } try { my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format ); if ($type eq 'biblio') { if($onlyholdings){ # Remove old 852 fields my @f = $r->field('852'); $r->delete_fields(@f) if @f; # Add new 852 fields add_bib_holdings($bib, $r); # Check that at least one 852 was added @f = $r->field('852'); # If not, we should NOT add this item to the export return unless @f; } else { add_bib_holdings($bib, $r); } } if($force901){ $r->delete_field( $r->field('901') ); $r->append_fields( MARC::Field->new( '901', ' ', ' ', a => $bib->tcn_value, b => $bib->tcn_source, c => $bib->id ) ); } my $recordstr = undef; if ($format eq 'XML') { my $xml = $r->as_xml_record; $xml =~ s/^<\?.+?\?>$//mo; $recordstr = $xml; } elsif ($format eq 'UNIMARC') { $recordstr = $r->as_usmarc; } elsif ($format eq 'USMARC') { $recordstr = $r->as_usmarc; } eval { if($format eq 'UNIMARC' or $format eq 'USMARC') { my $rec = MARC::File::USMARC->decode($recordstr); #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc); } elsif($format eq 'XML') { my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC'); #my $tmp = $rec->as_xml_record; #$tmp =~ s/^<\?.+?\?>$//mo; #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr); } } or throw Error::Simple("Failed to parse MARC record back: $!"); print $outfh $recordstr; $count{did}++; } otherwise { my $e = shift; my $errorid = $id; chomp($errorid); chomp($e); warn "\nERROR ON RECORD $errorid: $e\n"; import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export }; if ($export_mfhd and $type eq 'biblio') { my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'}); foreach my $mfhd (@$mfhds) { try { my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format ); if($force901){ $r->delete_field( $r->field('901') ); $r->append_fields( MARC::Field->new( '901', ' ', ' ', a => $bib->tcn_value, b => $bib->tcn_source, c => $bib->id ) ); } if ($format eq 'XML') { my $xml = $r->as_xml_record; $xml =~ s/^<\?.+?\?>$//mo; print $outfh $xml; } elsif ($format eq 'UNIMARC') { print $outfh $r->as_usmarc; } elsif ($format eq 'USMARC') { print $outfh $r->as_usmarc; } } otherwise { my $e = shift; my $errorid = chomp($id); chomp($e); warn "\nERROR ON MFHD RECORD $errorid: $e\n"; import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export }; } } stats() if (!$quiet && ! ($count{bib} % 50 )); } sub stats { try { no warnings; $speed = $count{did} / (time - $start); my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last}); my $cn_speed = $count{cn} / (time - $start); my $cp_speed = $count{cp} / (time - $start); printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ". "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r", $speed, $speed_now, $cn_speed, $cp_speed; } otherwise {}; $count{did_last} = $count{did}; $count{time_last} = time; } sub get_bib_locations { print STDERR "Retrieving Org Units ... " unless $quiet; my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } ); while (my $o = $r->recv) { die $r->failed->stringify if ($r->failed); $o = $o->content; last unless ($o); $orgs{$o->id} = $o; } $r->finish; print STDERR "OK\n"; print STDERR "Retrieving Copy statuses ... " unless $quiet; $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } ); while (my $sta = $r->recv) { die $r->failed->stringify if ($r->failed); $sta = $sta->content; last unless ($sta); $statuses{$sta->id} = $sta; } $r->finish; print STDERR "OK\n"; print STDERR "Retrieving OU types ... " unless $quiet; $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } ); while (my $outy = $r->recv) { die $r->failed->stringify if ($r->failed); $outy = $outy->content; last unless ($outy); $outypes{$outy->id} = $outy; } $r->finish; print STDERR "OK\n"; print STDERR "Retrieving Shelving locations ... " unless $quiet; $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } ); while (my $s = $r->recv) { die $r->failed->stringify if ($r->failed); $s = $s->content; last unless ($s); $shelves{$s->id} = $s; } $r->finish; print STDERR "OK\n"; $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } }; } sub add_bib_holdings { my $bib = shift; my $r = shift; my $cn_list = $bib->call_numbers; if ($cn_list && @$cn_list) { $count{cn} += @$cn_list; my $cp_list = [ map { @{ $_->copies } } @$cn_list ]; if ($cp_list && @$cp_list) { my %cn_map; push @{$cn_map{$_->call_number}}, $_ for (@$cp_list); CALLNUMMAP: for my $cn ( @$cn_list ) { my $cn_map_list = $cn_map{$cn->id}; # Ignore deleted copies next CALLNUMMAP if ( $cn->deleted eq 't' ); COPYMAP: for my $cp ( @$cn_map_list ) { $count{cp}++; my $owninglib = $cn->owning_lib; my $circlib = $cp->circ_lib; my $printlib = $cp->circ_lib; # Ignore deleted copies next COPYMAP if ( $cp->deleted eq 't'); if($cfg){ my $thisorg = $orgs{$circlib}; if($collapse_to_depth){ while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){ my $localcfg = $cfg->param(-block=> $thisorg->shortname); if( $localcfg->{'DontCollapse'} ){ last; } if($thisorg->parent_ou){ $thisorg = $orgs{$thisorg->parent_ou}; $printlib = $thisorg->id; } } } $thisorg = $orgs{$circlib}; while( $thisorg ){ # load the local config from the .ini file for exclusions my $localcfg = $cfg->param(-block=> $thisorg->shortname); my $cfgparam; # if we see this setting, just skip that org $cfgparam = 'ExcludeEntireOrg'; if( $localcfg->{$cfgparam} ) { skipnote($bib->id, $cfgparam); next COPYMAP; } # what follows are exclusion rules # Excluded Flags $cfgparam = 'Flags'; if($localcfg->{$cfgparam}){ # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam})); if(grep( { $_ eq 'reference' } @flags) && ($cp->ref eq 't')) { skipnote($bib->id,"Flags: reference"); next COPYMAP; } if(grep( { $_ eq 'unholdable' } @flags) && ($cp->holdable eq 'f')) { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; } if(grep( { $_ eq 'circulate' } @flags) && ($cp->circulate eq 'f')) { skipnote($bib->id,"Flags: circulate"); next COPYMAP; } if(grep( { $_ eq 'hidden' } @flags) && ($cp->opac_visible eq 'f')) { skipnote($bib->id,"Flags: hidden"); next COPYMAP; } } # Excluded Circ Modifiers $cfgparam = 'CircMods'; if($localcfg->{$cfgparam}){ my $circmod = $cp->circ_modifier || ""; my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) ); if(grep( { $_ eq $circmod } @circmods) && @circmods) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # Inverse rule -- only include specified Circ Mods $cfgparam = 'OnlyIncludeCircMods'; if($localcfg->{$cfgparam}){ my $circmod = $cp->circ_modifier || ""; my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) ); unless(grep( { $_ and $_ eq $circmod } @circmods) && @circmods) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # Excluded Copy Statuses $cfgparam = 'Statuses'; if($localcfg->{$cfgparam}){ my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) ); if(grep( { $_ eq $statuses{$cp->status}->name } @statuses) && @statuses) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # Excluded Locations $cfgparam = 'Locations'; if($localcfg->{$cfgparam}){ my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) ); if(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # Inverse rule - Only use the specified locations $cfgparam = 'OnlyIncludeLocations'; if($localcfg->{$cfgparam}){ my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) ); unless(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # exclude based on a regex match to location names $cfgparam = 'LocationRegex'; if($localcfg->{$cfgparam}){ my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) ); my $reg = $localcfg->{$cfgparam}; if(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # include based on a regex match to location names $cfgparam = 'OnlyIncludeLocationRegex'; if($localcfg->{$cfgparam}){ my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) ); my $reg = $localcfg->{$cfgparam}; unless(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # Exclude based on a callno regex $cfgparam = 'CallNoRegex'; if($localcfg->{$cfgparam}){ my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) ); my $reg = $localcfg->{$cfgparam}; if(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # Include based on a callno regex $cfgparam = 'OnlyIncludeCallNoRegex'; if($localcfg->{$cfgparam}){ my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) ); my $reg = $localcfg->{$cfgparam}; unless(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex) { skipnote($bib->id,$cfgparam); next COPYMAP; } } # Trim call number to a float and exclude based on Dewey Range if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){ my $gt = $localcfg->{'DeweyGT'}; my $lt = $localcfg->{'DeweyLT'}; # FIXME if either config has an array just ditch for now if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY") { skipnote($bib->id,""); next COPYMAP; } $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish my $callno = $cn->label; $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish print STDERR $callno; #note that we are making big assumptions about the call numbers in the db # we have a range, exclude what's inbetween if($lt && $gt){ if($callno > $gt and $callno < $lt) { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; } # we only have a top threshold, exclude everything below it } elsif ($lt){ if($callno < $lt) { skipnote($bib->id,"Dewey LT"); next COPYMAP; } # we only have a bottom threshold, exclude everything above it } elsif ($gt){ if($callno > $gt) { skipnote($bib->id,"Dewey GT"); next COPYMAP; } } } if($thisorg->parent_ou){ $thisorg = $orgs{$thisorg->parent_ou} } else { $thisorg = (); } } } $r->append_fields( MARC::Field->new( 852, '4', '', a => $location, b => $orgs{$printlib}->shortname, #b => $orgs{$owninglib}->shortname, #b => $orgs{$circlib}->shortname, c => $shelves{$cp->location}->name, j => $cn->label, ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()), p => $cp->barcode, ($cp->price ? ( y => $dollarsign.$cp->price ) : ()), ($cp->copy_number ? ( t => $cp->copy_number ) : ()), ($cp->ref eq 't' ? ( x => 'reference' ) : ()), ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()), ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()), ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()), z => $statuses{$cp->status}->name, ) ); stats() if (!$quiet && ! ($count{cp} % 100 )); } # COPYMAP: for my $cp ( @$cn_map_list ) } # for my $cn ( @$cn_list ) } # if ($cp_list && @$cp_list) } # if ($cn_list && @$cn_list) } # sub sub skipnote { my $id = shift; my $note = shift; my $outf = *STDERR; $outf = *STDOUT if($output_file) ; printf($outf "Skipped %s due to config: %s\n",$id,$note) unless $quiet; }