8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29 my ($exclusion_ini,$collapse_to_depth, $output_file);
35 'items' => \$holdings,
36 'mfhd' => \$export_mfhd,
37 'all' => \$all_records,
38 'location=s' => \$location,
39 'money=s' => \$dollarsign,
40 'config=s' => \$config,
41 'format=s' => \$format,
44 'encoding=s' => \$encoding,
45 'timeout=i' => \$timeout,
46 'force901' => \$force901,
47 'exclusion_ini=s' => \$exclusion_ini,
48 'collapse_to_depth=i' => \$collapse_to_depth,
49 'output-file=s' => \$output_file,
53 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
54 $cfg = new Config::Simple($exclusion_ini)
59 This script exports MARC authority, bibliographic, and serial holdings
60 records from an Evergreen database.
62 Input to this script can consist of a list of record IDs, with one record ID
63 per line, corresponding to the record ID in the Evergreen database table of
64 your requested record type.
66 Alternately, passing the --all option will attempt to export all records of
67 the specified type from the Evergreen database. The --all option starts at
68 record ID 1 and increments the ID by 1 until the largest ID in the database
69 is retrieved. This may not be very efficient for databases with large gaps
70 in their ID sequences.
73 --help or -h This screen.
74 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
75 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
76 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
77 --xml-idl or -x Location of the IDL XML
78 --timeout Timeout for exporting a single record; increase if you
79 are using --holdings and are exporting records that
80 have a lot of items attached to them.
81 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
82 --all or -a Export all records; ignores input list
84 Additional options for type = 'BIBLIO':
85 --items or -i Include items (holdings) in the output
86 --money Currency symbol to use in item price field [\$]
87 --mfhd Export serial MFHD records for associated bib records
88 Not compatible with --format=BRE
89 --location or -l MARC Location Code for holdings from
90 http://www.loc.gov/marc/organizations/orgshome.html
94 To export a set of USMARC records in a file named "output_file" based on the
95 IDs contained in a file named "list_of_ids":
96 cat list_of_ids | $0 > output_file
98 To export a set of MARC21XML authority records in a file named "output.xml"
99 for all authority records in the database:
100 $0 --format XML --type AUTHORITY --all > output.xml
107 $format = uc($format);
108 $encoding = uc($encoding);
112 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
114 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
116 $outfh = $real_stdout;
119 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
120 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
122 if (!grep { $format eq $_ } @formats) {
123 die "Please select a supported format. ".
124 "Right now that means one of [".
125 join('|',@formats). "]\n";
128 if ($format ne 'XML') {
129 my $type = 'MARC::File::' . $format;
134 # set default timeout and/or correct silly user who
135 # supplied a negative timeout; default timeout of
136 # 300 seconds if exporting items determined empirically.
137 $timeout = $holdings ? 300 : 1;
140 OpenSRF::System->bootstrap_client( config_file => $config );
143 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
146 Fieldmapper->import(IDL => $idl);
148 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
149 OpenILS::Utils::CStoreEditor::init();
150 my $editor = OpenILS::Utils::CStoreEditor->new();
152 print $outfh <<HEADER if ($format eq 'XML');
153 <?xml version="1.0" encoding="$encoding"?>
154 <collection xmlns='http://www.loc.gov/MARC21/slim'>
169 my $last_time = time;
170 my %count = ('bib' => 0, 'did' => 0);
175 if ($type eq 'biblio') {
176 $top_record = $editor->search_biblio_record_entry([
178 {order_by => { 'bre' => 'id DESC' }, limit => 1}
180 } elsif ($type eq 'authority') {
181 $top_record = $editor->search_authority_record_entry([
183 {order_by => { 'are' => 'id DESC' }, limit => 1}
186 for (my $i = 0; $i++ < $top_record;) {
190 while ( my $i = <> ) {
195 print $outfh "</collection>\n" if ($format eq 'XML');
197 $speed = $count{did} / (time - $start);
198 my $time = time - $start;
201 Exports Attempted : $count{bib}
202 Exports Completed : $count{did}
203 Overall Speed : $speed
204 Total Time Elapsed: $time seconds
213 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
214 my $s = $r->recv(timeout => $timeout);
216 warn "\n!!!!! Failed trying to read record $id\n";
220 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
224 warn "\n!!!!!! Timed out trying to read record $id\n";
233 if ($format eq 'ARE' or $format eq 'BRE') {
234 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
242 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
243 if ($type eq 'biblio') {
244 # Remove old 852 fields
245 my @f = $r->field('852');
246 $r->delete_fields(@f) if defined @f;
249 add_bib_holdings($bib, $r);
250 # Check that at least one 852 was added
251 my @f = $r->field('852');
252 # If not, we should NOT add this item to the export
253 return unless defined @f;
257 $r->delete_field( $r->field('901') );
261 a => $bib->tcn_value,
262 b => $bib->tcn_source,
268 my $recordstr = undef;
270 if ($format eq 'XML') {
271 my $xml = $r->as_xml_record;
272 $xml =~ s/^<\?.+?\?>$//mo;
274 } elsif ($format eq 'UNIMARC') {
275 $recordstr = $r->as_usmarc;
276 } elsif ($format eq 'USMARC') {
277 $recordstr = $r->as_usmarc;
280 if($format eq 'UNIMARC' or $format eq 'USMARC') {
281 my $rec = MARC::File::USMARC->decode($recordstr);
282 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
283 } elsif($format eq 'XML') {
284 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
285 #my $tmp = $rec->as_xml_record;
286 #$tmp =~ s/^<\?.+?\?>$//mo;
287 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
289 } or throw Error::Simple("Failed to parse MARC record back: $!");
290 print $outfh $recordstr;
299 warn "\nERROR ON RECORD $errorid: $e\n";
300 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
303 if ($export_mfhd and $type eq 'biblio') {
304 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
305 foreach my $mfhd (@$mfhds) {
307 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
310 $r->delete_field( $r->field('901') );
314 a => $bib->tcn_value,
315 b => $bib->tcn_source,
321 if ($format eq 'XML') {
322 my $xml = $r->as_xml_record;
323 $xml =~ s/^<\?.+?\?>$//mo;
325 } elsif ($format eq 'UNIMARC') {
326 print $outfh $r->as_usmarc;
327 } elsif ($format eq 'USMARC') {
328 print $outfh $r->as_usmarc;
332 my $errorid = chomp($id);
334 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
335 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
340 stats() if (! ($count{bib} % 50 ));
347 $speed = $count{did} / (time - $start);
349 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
350 my $cn_speed = $count{cn} / (time - $start);
351 my $cp_speed = $count{cp} / (time - $start);
353 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
354 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
360 $count{did_last} = $count{did};
361 $count{time_last} = time;
364 sub get_bib_locations {
365 print STDERR "Retrieving Org Units ... ";
366 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
368 while (my $o = $r->recv) {
369 die $r->failed->stringify if ($r->failed);
377 print STDERR "Retrieving Copy statuses ... ";
378 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
380 while (my $sta = $r->recv) {
381 die $r->failed->stringify if ($r->failed);
382 $sta = $sta->content;
384 $statuses{$sta->id} = $sta;
389 print STDERR "Retrieving OU types ... ";
390 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
392 while (my $outy = $r->recv) {
393 die $r->failed->stringify if ($r->failed);
394 $outy = $outy->content;
396 $outypes{$outy->id} = $outy;
401 print STDERR "Retrieving Shelving locations ... ";
402 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
404 while (my $s = $r->recv) {
405 die $r->failed->stringify if ($r->failed);
408 $shelves{$s->id} = $s;
413 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
416 sub add_bib_holdings {
420 my $cn_list = $bib->call_numbers;
421 if ($cn_list && @$cn_list) {
423 $count{cn} += @$cn_list;
425 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
426 if ($cp_list && @$cp_list) {
429 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
431 for my $cn ( @$cn_list ) {
432 my $cn_map_list = $cn_map{$cn->id};
434 COPYMAP: for my $cp ( @$cn_map_list ) {
438 my $owninglib = $cn->owning_lib;
439 my $circlib = $cp->circ_lib;
440 my $printlib = $cp->circ_lib;
443 my $thisorg = $orgs{$circlib};
445 if($collapse_to_depth){
446 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
447 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
448 if( $localcfg->{'DontCollapse'} ){
451 if($thisorg->parent_ou){
452 $thisorg = $orgs{$thisorg->parent_ou};
453 $printlib = $thisorg->id;
458 $thisorg = $orgs{$circlib};
462 # load the local config from the .ini file for exclusions
463 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
466 # if we see this setting, just skip that org
468 $cfgparam = 'ExcludeEntireOrg';
469 if( $localcfg->{$cfgparam} )
470 { skipnote($bib->id, $cfgparam); next COPYMAP; }
472 # what follows are exclusion rules
476 if($localcfg->{$cfgparam}){
477 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
478 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
479 if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't')
480 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
481 elsif( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f')
482 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
483 elsif( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f')
484 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
485 elsif( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f')
486 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
489 # Excluded Circ Modifiers
490 $cfgparam = 'CircMods';
491 if($localcfg->{$cfgparam}){
492 my $circmod = $cp->circ_modifier || "";
493 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
494 if( grep { $_ eq $circmod } @circmods && @circmods)
495 { skipnote($bib->id,$cfgparam); next COPYMAP; }
497 # Inverse rule -- only include specified Circ Mods
498 $cfgparam = 'OnlyIncludeCircMods';
499 if($localcfg->{$cfgparam}){
500 my $circmod = $cp->circ_modifier || "";
501 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
502 unless( grep { $_ and $_ eq $circmod } @circmods && @circmods)
503 { skipnote($bib->id,$cfgparam); next COPYMAP; }
505 # Excluded Copy Statuses
506 $cfgparam = 'Statuses';
507 if($localcfg->{$cfgparam}){
508 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
509 if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses)
510 { skipnote($bib->id,$cfgparam); next COPYMAP; }
513 $cfgparam = 'Locations';
514 if($localcfg->{$cfgparam}){
515 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
516 if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
517 { skipnote($bib->id,$cfgparam); next COPYMAP; }
519 # Inverse rule - Only use the specified locations
520 $cfgparam = 'OnlyIncludeLocations';
521 if($localcfg->{$cfgparam}){
522 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
523 unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
524 { skipnote($bib->id,$cfgparam); next COPYMAP; }
526 # exclude based on a regex match to location names
527 $cfgparam = 'LocationRegex';
528 if($localcfg->{$cfgparam}){
529 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
530 my $reg = $localcfg->{$cfgparam};
531 if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
532 { skipnote($bib->id,$cfgparam); next COPYMAP; }
534 # include based on a regex match to location names
535 $cfgparam = 'OnlyIncludeLocationRegex';
536 if($localcfg->{$cfgparam}){
537 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
538 my $reg = $localcfg->{$cfgparam};
539 unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
540 { skipnote($bib->id,$cfgparam); next COPYMAP; }
542 # Exclude based on a callno regex
543 $cfgparam = 'CallNoRegex';
544 if($localcfg->{$cfgparam}){
545 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
546 my $reg = $localcfg->{$cfgparam};
547 if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
548 { skipnote($bib->id,$cfgparam); next COPYMAP; }
550 # Include based on a callno regex
551 $cfgparam = 'OnlyIncludeCallNoRegex';
552 if($localcfg->{$cfgparam}){
553 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
554 my $reg = $localcfg->{$cfgparam};
555 unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
556 { skipnote($bib->id,$cfgparam); next COPYMAP; }
559 # Trim call number to a float and exclude based on Dewey Range
560 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
561 my $gt = $localcfg->{'DeweyGT'};
562 my $lt = $localcfg->{'DeweyLT'};
564 # FIXME if either config has an array just ditch for now
565 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
566 { skipnote($bib->id,""); next COPYMAP; }
567 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
568 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
570 my $callno = $cn->label;
571 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
572 print STDERR $callno;
573 #note that we are making big assumptions about the call numbers in the db
575 # we have a range, exclude what's inbetween
577 if($callno > $gt and $callno < $lt)
578 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
579 # we only have a top threshold, exclude everything below it
582 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
583 # we only have a bottom threshold, exclude everything above it
586 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
590 if($thisorg->parent_ou){
591 $thisorg = $orgs{$thisorg->parent_ou}
603 b => $orgs{$printlib}->shortname,
604 #b => $orgs{$owninglib}->shortname,
605 #b => $orgs{$circlib}->shortname,
606 c => $shelves{$cp->location}->name,
608 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
610 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
611 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
612 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
613 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
614 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
615 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
616 z => $statuses{$cp->status}->name,
622 stats() if (! ($count{cp} % 100 ));
623 } # COPYMAP: for my $cp ( @$cn_map_list )
624 } # for my $cn ( @$cn_list )
625 } # if ($cp_list && @$cp_list)
626 } # if ($cn_list && @$cn_list)
633 $outf = *STDOUT if($output_file) ;
634 printf($outf "Skipped %s due to config: %s\n",$id,$note);