8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::MultiSession;
11 use OpenSRF::Utils::JSON;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenILS::Application::AppUtils;
14 use OpenILS::Utils::Fieldmapper;
15 use OpenILS::Utils::CStoreEditor;
19 use UNIVERSAL::require;
21 use Time::HiRes qw/time/;
27 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
29 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records,$quiet) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef,0);
30 my ($exclusion_ini,$collapse_to_depth, $output_file);
39 'items' => \$holdings,
40 'mfhd' => \$export_mfhd,
41 'all' => \$all_records,
42 'location=s' => \$location,
43 'money=s' => \$dollarsign,
44 'config=s' => \$config,
45 'format=s' => \$format,
48 'encoding=s' => \$encoding,
49 'timeout=i' => \$timeout,
50 'force901' => \$force901,
51 'exclusion_ini=s' => \$exclusion_ini,
52 'collapse_to_depth=i' => \$collapse_to_depth,
53 'onlyholdings' => \$onlyholdings,
54 'output-file=s' => \$output_file,
55 'verbose' => \$verbose,
61 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
62 $cfg = new Config::Simple($exclusion_ini)
67 This script exports MARC authority, bibliographic, and serial holdings
68 records from an Evergreen database.
70 Input to this script can consist of a list of record IDs, with one record ID
71 per line, corresponding to the record ID in the Evergreen database table of
72 your requested record type.
74 Alternately, passing the --all option will attempt to export all records of
75 the specified type from the Evergreen database. The --all option starts at
76 record ID 1 and increments the ID by 1 until the largest ID in the database
77 is retrieved. This may not be very efficient for databases with large gaps
78 in their ID sequences.
81 --help or -h This screen.
82 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
83 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
84 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
85 --xml-idl or -x Location of the IDL XML
86 --timeout Timeout for exporting a single record; increase if you
87 are using --holdings and are exporting records that
88 have a lot of items attached to them.
89 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
90 --all or -a Export all records; ignores input list
92 Additional options for type = 'BIBLIO':
93 --items or -i Include items (holdings) in the output
94 --money Currency symbol to use in item price field [\$]
95 --mfhd Export serial MFHD records for associated bib records
96 Not compatible with --format=BRE
97 --location or -l MARC Location Code for holdings from
98 http://www.loc.gov/marc/organizations/orgshome.html
100 Options added by Sitka:
101 --force901 Force-add 901 fields
102 --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export
103 --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below
104 will be collapsed up to the parent org unit at the set depth
105 --onlyholdings Clean out 852s before adding new ones, and only export items that
106 successfully recieved an 852 field
110 To export a set of USMARC records in a file named "output_file" based on the
111 IDs contained in a file named "list_of_ids":
112 cat list_of_ids | $0 > output_file
114 To export a set of MARC21XML authority records in a file named "output.xml"
115 for all authority records in the database:
116 $0 --format XML --type AUTHORITY --all > output.xml
123 $format = uc($format);
124 $encoding = uc($encoding);
128 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
130 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
132 $outfh = $real_stdout;
135 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
136 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
138 if (!grep { $format eq $_ } @formats) {
139 die "Please select a supported format. ".
140 "Right now that means one of [".
141 join('|',@formats). "]\n";
144 if ($format ne 'XML') {
145 my $type = 'MARC::File::' . $format;
150 # set default timeout and/or correct silly user who
151 # supplied a negative timeout; default timeout of
152 # 300 seconds if exporting items determined empirically.
153 $timeout = $holdings ? 300 : 1;
156 OpenSRF::System->bootstrap_client( config_file => $config );
159 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
162 Fieldmapper->import(IDL => $idl);
165 my $bibses = OpenSRF::MultiSession->new(
166 app => 'open-ils.cstore',
168 success_handler => sub {
171 my $bre = $req->{response}[0]->{content}; #->i{content};
172 print STDERR $req->{meth} . " record: " . $req->{params}->[0] . " -- " . OpenSRF::Utils::JSON->perl2JSON($bre) . "ok\n" if $verbose;
173 process_bib($req->{params}->[0], $bre);
175 failure_handler => sub {
178 warn "record $req->{params}->[0] failed: " . OpenSRF::Utils::JSON->perl2JSON($req->{response});
184 OpenILS::Utils::CStoreEditor::init();
185 my $editor = OpenILS::Utils::CStoreEditor->new();
187 print $outfh <<HEADER if ($format eq 'XML');
188 <?xml version="1.0" encoding="$encoding"?>
189 <collection xmlns='http://www.loc.gov/MARC21/slim'>
204 my $last_time = time;
205 my %count = ('bib' => 0, 'did' => 0);
210 if ($type eq 'biblio') {
211 $top_record = $editor->search_biblio_record_entry([
213 {order_by => { 'bre' => 'id DESC' }, limit => 1}
215 } elsif ($type eq 'authority') {
216 $top_record = $editor->search_authority_record_entry([
218 {order_by => { 'are' => 'id DESC' }, limit => 1}
221 for (my $i = 0; $i++ < $top_record;) {
225 while ( my $i = <> ) {
230 print $outfh "</collection>\n" if ($format eq 'XML');
232 $speed = $count{did} / (time - $start);
233 my $time = time - $start;
236 Exports Attempted : $count{bib}
237 Exports Completed : $count{did}
238 Overall Speed : $speed
239 Total Time Elapsed: $time seconds
248 my $r = $bibses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
250 my $s = $r->recv(timeout => $timeout);
252 warn "\n!!!!! Failed trying to read record $id\n";
256 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
260 warn "\n!!!!!! Timed out trying to read record $id\n";
274 # Return if the bib is deleted
275 return if ( $bib->deleted eq 't' );
277 if ($format eq 'ARE' or $format eq 'BRE') {
278 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
279 stats() unless $quiet;
286 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
287 if ($type eq 'biblio') {
290 # Remove old 852 fields
291 my @f = $r->field('852');
292 $r->delete_fields(@f) if @f;
294 add_bib_holdings($bib, $r);
295 # Check that at least one 852 was added
296 @f = $r->field('852');
297 # If not, we should NOT add this item to the export
300 add_bib_holdings($bib, $r);
305 $r->delete_field( $r->field('901') );
309 a => $bib->tcn_value,
310 b => $bib->tcn_source,
316 my $recordstr = undef;
318 if ($format eq 'XML') {
319 my $xml = $r->as_xml_record;
320 $xml =~ s/^<\?.+?\?>$//mo;
322 } elsif ($format eq 'UNIMARC') {
323 $recordstr = $r->as_usmarc;
324 } elsif ($format eq 'USMARC') {
325 $recordstr = $r->as_usmarc;
328 if($format eq 'UNIMARC' or $format eq 'USMARC') {
329 my $rec = MARC::File::USMARC->decode($recordstr);
330 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
331 } elsif($format eq 'XML') {
332 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
333 #my $tmp = $rec->as_xml_record;
334 #$tmp =~ s/^<\?.+?\?>$//mo;
335 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
337 } or throw Error::Simple("Failed to parse MARC record back: $!");
338 print $outfh $recordstr;
347 warn "\nERROR ON RECORD $errorid: $e\n";
348 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
351 if ($export_mfhd and $type eq 'biblio') {
352 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
353 foreach my $mfhd (@$mfhds) {
355 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
358 $r->delete_field( $r->field('901') );
362 a => $bib->tcn_value,
363 b => $bib->tcn_source,
369 if ($format eq 'XML') {
370 my $xml = $r->as_xml_record;
371 $xml =~ s/^<\?.+?\?>$//mo;
373 } elsif ($format eq 'UNIMARC') {
374 print $outfh $r->as_usmarc;
375 } elsif ($format eq 'USMARC') {
376 print $outfh $r->as_usmarc;
380 my $errorid = chomp($id);
382 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
383 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
388 stats() if (!$quiet && ! ($count{bib} % 50 ));
395 $speed = $count{did} / (time - $start);
397 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
398 my $cn_speed = $count{cn} / (time - $start);
399 my $cp_speed = $count{cp} / (time - $start);
401 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
402 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
408 $count{did_last} = $count{did};
409 $count{time_last} = time;
412 sub get_bib_locations {
413 print STDERR "Retrieving Org Units ... " unless $quiet;
414 my $ses = OpenSRF::AppSession->connect('open-ils.cstore');
415 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
417 while (my $o = $r->recv) {
418 die $r->failed->stringify if ($r->failed);
426 print STDERR "Retrieving Copy statuses ... " unless $quiet;
427 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
429 while (my $sta = $r->recv) {
430 die $r->failed->stringify if ($r->failed);
431 $sta = $sta->content;
433 $statuses{$sta->id} = $sta;
438 print STDERR "Retrieving OU types ... " unless $quiet;
439 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
441 while (my $outy = $r->recv) {
442 die $r->failed->stringify if ($r->failed);
443 $outy = $outy->content;
445 $outypes{$outy->id} = $outy;
450 print STDERR "Retrieving Shelving locations ... " unless $quiet;
451 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
453 while (my $s = $r->recv) {
454 die $r->failed->stringify if ($r->failed);
457 $shelves{$s->id} = $s;
462 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
466 sub add_bib_holdings {
470 my $cn_list = $bib->call_numbers;
471 if ($cn_list && @$cn_list) {
473 $count{cn} += @$cn_list;
475 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
476 if ($cp_list && @$cp_list) {
479 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
481 CALLNUMMAP: for my $cn ( @$cn_list ) {
482 my $cn_map_list = $cn_map{$cn->id};
484 # Ignore deleted copies
485 next CALLNUMMAP if ( $cn->deleted eq 't' );
487 COPYMAP: for my $cp ( @$cn_map_list ) {
491 my $owninglib = $cn->owning_lib;
492 my $circlib = $cp->circ_lib;
493 my $printlib = $cp->circ_lib;
495 # Ignore deleted copies
496 next COPYMAP if ( $cp->deleted eq 't');
499 my $thisorg = $orgs{$circlib};
501 if($collapse_to_depth){
502 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
503 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
504 if( $localcfg->{'DontCollapse'} ){
507 if($thisorg->parent_ou){
508 $thisorg = $orgs{$thisorg->parent_ou};
509 $printlib = $thisorg->id;
514 $thisorg = $orgs{$circlib};
518 # load the local config from the .ini file for exclusions
519 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
522 # if we see this setting, just skip that org
524 $cfgparam = 'ExcludeEntireOrg';
525 if( $localcfg->{$cfgparam} )
526 { skipnote($bib->id, $cfgparam); next COPYMAP; }
528 # what follows are exclusion rules
532 if($localcfg->{$cfgparam}){
533 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
534 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
535 if(grep( { $_ eq 'reference' } @flags) && ($cp->ref eq 't'))
536 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
537 if(grep( { $_ eq 'unholdable' } @flags) && ($cp->holdable eq 'f'))
538 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
539 if(grep( { $_ eq 'circulate' } @flags) && ($cp->circulate eq 'f'))
540 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
541 if(grep( { $_ eq 'hidden' } @flags) && ($cp->opac_visible eq 'f'))
542 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
545 # Excluded Circ Modifiers
546 $cfgparam = 'CircMods';
547 if($localcfg->{$cfgparam}){
548 my $circmod = $cp->circ_modifier || "";
549 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
550 if(grep( { $_ eq $circmod } @circmods) && @circmods)
551 { skipnote($bib->id,$cfgparam); next COPYMAP; }
553 # Inverse rule -- only include specified Circ Mods
554 $cfgparam = 'OnlyIncludeCircMods';
555 if($localcfg->{$cfgparam}){
556 my $circmod = $cp->circ_modifier || "";
557 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
558 unless(grep( { $_ and $_ eq $circmod } @circmods) && @circmods)
559 { skipnote($bib->id,$cfgparam); next COPYMAP; }
561 # Excluded Copy Statuses
562 $cfgparam = 'Statuses';
563 if($localcfg->{$cfgparam}){
564 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
565 if(grep( { $_ eq $statuses{$cp->status}->name } @statuses) && @statuses)
566 { skipnote($bib->id,$cfgparam); next COPYMAP; }
569 $cfgparam = 'Locations';
570 if($localcfg->{$cfgparam}){
571 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
572 if(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations)
573 { skipnote($bib->id,$cfgparam); next COPYMAP; }
575 # Inverse rule - Only use the specified locations
576 $cfgparam = 'OnlyIncludeLocations';
577 if($localcfg->{$cfgparam}){
578 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
579 unless(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations)
580 { skipnote($bib->id,$cfgparam); next COPYMAP; }
582 # exclude based on a regex match to location names
583 $cfgparam = 'LocationRegex';
584 if($localcfg->{$cfgparam}){
585 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
586 my $reg = $localcfg->{$cfgparam};
587 if(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex)
588 { skipnote($bib->id,$cfgparam); next COPYMAP; }
590 # include based on a regex match to location names
591 $cfgparam = 'OnlyIncludeLocationRegex';
592 if($localcfg->{$cfgparam}){
593 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
594 my $reg = $localcfg->{$cfgparam};
595 unless(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex)
596 { skipnote($bib->id,$cfgparam); next COPYMAP; }
598 # Exclude based on a callno regex
599 $cfgparam = 'CallNoRegex';
600 if($localcfg->{$cfgparam}){
601 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
602 my $reg = $localcfg->{$cfgparam};
603 if(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex)
604 { skipnote($bib->id,$cfgparam); next COPYMAP; }
606 # Include based on a callno regex
607 $cfgparam = 'OnlyIncludeCallNoRegex';
608 if($localcfg->{$cfgparam}){
609 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
610 my $reg = $localcfg->{$cfgparam};
611 unless(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex)
612 { skipnote($bib->id,$cfgparam); next COPYMAP; }
615 # Trim call number to a float and exclude based on Dewey Range
616 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
617 my $gt = $localcfg->{'DeweyGT'};
618 my $lt = $localcfg->{'DeweyLT'};
620 # FIXME if either config has an array just ditch for now
621 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
622 { skipnote($bib->id,""); next COPYMAP; }
623 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
624 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
626 my $callno = $cn->label;
627 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
628 print STDERR $callno;
629 #note that we are making big assumptions about the call numbers in the db
631 # we have a range, exclude what's inbetween
633 if($callno > $gt and $callno < $lt)
634 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
635 # we only have a top threshold, exclude everything below it
638 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
639 # we only have a bottom threshold, exclude everything above it
642 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
646 if($thisorg->parent_ou){
647 $thisorg = $orgs{$thisorg->parent_ou}
659 b => $orgs{$printlib}->shortname,
660 #b => $orgs{$owninglib}->shortname,
661 #b => $orgs{$circlib}->shortname,
662 c => $shelves{$cp->location}->name,
664 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
666 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
667 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
668 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
669 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
670 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
671 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
672 z => $statuses{$cp->status}->name,
678 stats() if (!$quiet && ! ($count{cp} % 100 ));
679 } # COPYMAP: for my $cp ( @$cn_map_list )
680 } # for my $cn ( @$cn_list )
681 } # if ($cp_list && @$cp_list)
682 } # if ($cn_list && @$cn_list)
689 $outf = *STDOUT if($output_file) ;
690 printf($outf "Skipped %s due to config: %s\n",$id,$note) unless $quiet;