8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::MultiSession;
11 use OpenSRF::Utils::JSON;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenILS::Application::AppUtils;
14 use OpenILS::Utils::Fieldmapper;
15 use OpenILS::Utils::CStoreEditor;
19 use UNIVERSAL::require;
21 use Time::HiRes qw/time/;
27 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
29 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
30 my ($exclusion_ini,$collapse_to_depth, $output_file);
39 'items' => \$holdings,
40 'mfhd' => \$export_mfhd,
41 'all' => \$all_records,
42 'location=s' => \$location,
43 'money=s' => \$dollarsign,
44 'config=s' => \$config,
45 'format=s' => \$format,
48 'encoding=s' => \$encoding,
49 'timeout=i' => \$timeout,
50 'force901' => \$force901,
51 'exclusion_ini=s' => \$exclusion_ini,
52 'collapse_to_depth=i' => \$collapse_to_depth,
53 'onlyholdings' => \$onlyholdings,
54 'output-file=s' => \$output_file,
55 'verbose' => \$verbose,
60 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
61 $cfg = new Config::Simple($exclusion_ini)
66 This script exports MARC authority, bibliographic, and serial holdings
67 records from an Evergreen database.
69 Input to this script can consist of a list of record IDs, with one record ID
70 per line, corresponding to the record ID in the Evergreen database table of
71 your requested record type.
73 Alternately, passing the --all option will attempt to export all records of
74 the specified type from the Evergreen database. The --all option starts at
75 record ID 1 and increments the ID by 1 until the largest ID in the database
76 is retrieved. This may not be very efficient for databases with large gaps
77 in their ID sequences.
80 --help or -h This screen.
81 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
82 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
83 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
84 --xml-idl or -x Location of the IDL XML
85 --timeout Timeout for exporting a single record; increase if you
86 are using --holdings and are exporting records that
87 have a lot of items attached to them.
88 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
89 --all or -a Export all records; ignores input list
91 Additional options for type = 'BIBLIO':
92 --items or -i Include items (holdings) in the output
93 --money Currency symbol to use in item price field [\$]
94 --mfhd Export serial MFHD records for associated bib records
95 Not compatible with --format=BRE
96 --location or -l MARC Location Code for holdings from
97 http://www.loc.gov/marc/organizations/orgshome.html
99 Options added by Sitka:
100 --force901 Force-add 901 fields
101 --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export
102 --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below
103 will be collapsed up to the parent org unit at the set depth
104 --onlyholdings Clean out 852s before adding new ones, and only export items that
105 successfully recieved an 852 field
109 To export a set of USMARC records in a file named "output_file" based on the
110 IDs contained in a file named "list_of_ids":
111 cat list_of_ids | $0 > output_file
113 To export a set of MARC21XML authority records in a file named "output.xml"
114 for all authority records in the database:
115 $0 --format XML --type AUTHORITY --all > output.xml
122 $format = uc($format);
123 $encoding = uc($encoding);
127 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
129 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
131 $outfh = $real_stdout;
134 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
135 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
137 if (!grep { $format eq $_ } @formats) {
138 die "Please select a supported format. ".
139 "Right now that means one of [".
140 join('|',@formats). "]\n";
143 if ($format ne 'XML') {
144 my $type = 'MARC::File::' . $format;
149 # set default timeout and/or correct silly user who
150 # supplied a negative timeout; default timeout of
151 # 300 seconds if exporting items determined empirically.
152 $timeout = $holdings ? 300 : 1;
155 OpenSRF::System->bootstrap_client( config_file => $config );
158 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
161 Fieldmapper->import(IDL => $idl);
164 my $bibses = OpenSRF::MultiSession->new(
165 app => 'open-ils.cstore',
167 success_handler => sub {
170 my $bre = $req->{response}[0]->{content}; #->i{content};
171 print STDERR $req->{meth} . " record: " . $req->{params}->[0] . " -- " . OpenSRF::Utils::JSON->perl2JSON($bre) . "ok\n" if $verbose;
172 process_bib($req->{params}->[0], $bre);
174 failure_handler => sub {
177 warn "record $req->{params}->[0] failed: " . OpenSRF::Utils::JSON->perl2JSON($req->{response});
183 OpenILS::Utils::CStoreEditor::init();
184 my $editor = OpenILS::Utils::CStoreEditor->new();
186 print $outfh <<HEADER if ($format eq 'XML');
187 <?xml version="1.0" encoding="$encoding"?>
188 <collection xmlns='http://www.loc.gov/MARC21/slim'>
203 my $last_time = time;
204 my %count = ('bib' => 0, 'did' => 0);
209 if ($type eq 'biblio') {
210 $top_record = $editor->search_biblio_record_entry([
212 {order_by => { 'bre' => 'id DESC' }, limit => 1}
214 } elsif ($type eq 'authority') {
215 $top_record = $editor->search_authority_record_entry([
217 {order_by => { 'are' => 'id DESC' }, limit => 1}
220 for (my $i = 0; $i++ < $top_record;) {
224 while ( my $i = <> ) {
229 print $outfh "</collection>\n" if ($format eq 'XML');
231 $speed = $count{did} / (time - $start);
232 my $time = time - $start;
235 Exports Attempted : $count{bib}
236 Exports Completed : $count{did}
237 Overall Speed : $speed
238 Total Time Elapsed: $time seconds
247 my $r = $bibses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
249 my $s = $r->recv(timeout => $timeout);
251 warn "\n!!!!! Failed trying to read record $id\n";
255 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
259 warn "\n!!!!!! Timed out trying to read record $id\n";
273 # Return if the bib is deleted
274 return if ( $bib->deleted eq 't' );
276 if ($format eq 'ARE' or $format eq 'BRE') {
277 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
285 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
286 if ($type eq 'biblio') {
289 # Remove old 852 fields
290 my @f = $r->field('852');
291 $r->delete_fields(@f) if @f;
293 add_bib_holdings($bib, $r);
294 # Check that at least one 852 was added
295 @f = $r->field('852');
296 # If not, we should NOT add this item to the export
299 add_bib_holdings($bib, $r);
304 $r->delete_field( $r->field('901') );
308 a => $bib->tcn_value,
309 b => $bib->tcn_source,
315 my $recordstr = undef;
317 if ($format eq 'XML') {
318 my $xml = $r->as_xml_record;
319 $xml =~ s/^<\?.+?\?>$//mo;
321 } elsif ($format eq 'UNIMARC') {
322 $recordstr = $r->as_usmarc;
323 } elsif ($format eq 'USMARC') {
324 $recordstr = $r->as_usmarc;
327 if($format eq 'UNIMARC' or $format eq 'USMARC') {
328 my $rec = MARC::File::USMARC->decode($recordstr);
329 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
330 } elsif($format eq 'XML') {
331 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
332 #my $tmp = $rec->as_xml_record;
333 #$tmp =~ s/^<\?.+?\?>$//mo;
334 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
336 } or throw Error::Simple("Failed to parse MARC record back: $!");
337 print $outfh $recordstr;
346 warn "\nERROR ON RECORD $errorid: $e\n";
347 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
350 if ($export_mfhd and $type eq 'biblio') {
351 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
352 foreach my $mfhd (@$mfhds) {
354 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
357 $r->delete_field( $r->field('901') );
361 a => $bib->tcn_value,
362 b => $bib->tcn_source,
368 if ($format eq 'XML') {
369 my $xml = $r->as_xml_record;
370 $xml =~ s/^<\?.+?\?>$//mo;
372 } elsif ($format eq 'UNIMARC') {
373 print $outfh $r->as_usmarc;
374 } elsif ($format eq 'USMARC') {
375 print $outfh $r->as_usmarc;
379 my $errorid = chomp($id);
381 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
382 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
387 stats() if (! ($count{bib} % 50 ));
394 $speed = $count{did} / (time - $start);
396 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
397 my $cn_speed = $count{cn} / (time - $start);
398 my $cp_speed = $count{cp} / (time - $start);
400 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
401 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
407 $count{did_last} = $count{did};
408 $count{time_last} = time;
411 sub get_bib_locations {
412 print STDERR "Retrieving Org Units ... ";
413 my $ses = OpenSRF::AppSession->connect('open-ils.cstore');
414 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
416 while (my $o = $r->recv) {
417 die $r->failed->stringify if ($r->failed);
425 print STDERR "Retrieving Copy statuses ... ";
426 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
428 while (my $sta = $r->recv) {
429 die $r->failed->stringify if ($r->failed);
430 $sta = $sta->content;
432 $statuses{$sta->id} = $sta;
437 print STDERR "Retrieving OU types ... ";
438 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
440 while (my $outy = $r->recv) {
441 die $r->failed->stringify if ($r->failed);
442 $outy = $outy->content;
444 $outypes{$outy->id} = $outy;
449 print STDERR "Retrieving Shelving locations ... ";
450 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
452 while (my $s = $r->recv) {
453 die $r->failed->stringify if ($r->failed);
456 $shelves{$s->id} = $s;
461 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
465 sub add_bib_holdings {
469 my $cn_list = $bib->call_numbers;
470 if ($cn_list && @$cn_list) {
472 $count{cn} += @$cn_list;
474 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
475 if ($cp_list && @$cp_list) {
478 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
480 CALLNUMMAP: for my $cn ( @$cn_list ) {
481 my $cn_map_list = $cn_map{$cn->id};
483 # Ignore deleted copies
484 next CALLNUMMAP if ( $cn->deleted eq 't' );
486 COPYMAP: for my $cp ( @$cn_map_list ) {
490 my $owninglib = $cn->owning_lib;
491 my $circlib = $cp->circ_lib;
492 my $printlib = $cp->circ_lib;
494 # Ignore deleted copies
495 next COPYMAP if ( $cp->deleted eq 't');
498 my $thisorg = $orgs{$circlib};
500 if($collapse_to_depth){
501 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
502 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
503 if( $localcfg->{'DontCollapse'} ){
506 if($thisorg->parent_ou){
507 $thisorg = $orgs{$thisorg->parent_ou};
508 $printlib = $thisorg->id;
513 $thisorg = $orgs{$circlib};
517 # load the local config from the .ini file for exclusions
518 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
521 # if we see this setting, just skip that org
523 $cfgparam = 'ExcludeEntireOrg';
524 if( $localcfg->{$cfgparam} )
525 { skipnote($bib->id, $cfgparam); next COPYMAP; }
527 # what follows are exclusion rules
531 if($localcfg->{$cfgparam}){
532 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
533 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
534 if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't')
535 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
536 elsif( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f')
537 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
538 elsif( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f')
539 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
540 elsif( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f')
541 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
544 # Excluded Circ Modifiers
545 $cfgparam = 'CircMods';
546 if($localcfg->{$cfgparam}){
547 my $circmod = $cp->circ_modifier || "";
548 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
549 if( grep { $_ eq $circmod } @circmods && @circmods)
550 { skipnote($bib->id,$cfgparam); next COPYMAP; }
552 # Inverse rule -- only include specified Circ Mods
553 $cfgparam = 'OnlyIncludeCircMods';
554 if($localcfg->{$cfgparam}){
555 my $circmod = $cp->circ_modifier || "";
556 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
557 unless( grep { $_ and $_ eq $circmod } @circmods && @circmods)
558 { skipnote($bib->id,$cfgparam); next COPYMAP; }
560 # Excluded Copy Statuses
561 $cfgparam = 'Statuses';
562 if($localcfg->{$cfgparam}){
563 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
564 if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses)
565 { skipnote($bib->id,$cfgparam); next COPYMAP; }
568 $cfgparam = 'Locations';
569 if($localcfg->{$cfgparam}){
570 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
571 if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
572 { skipnote($bib->id,$cfgparam); next COPYMAP; }
574 # Inverse rule - Only use the specified locations
575 $cfgparam = 'OnlyIncludeLocations';
576 if($localcfg->{$cfgparam}){
577 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
578 unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
579 { skipnote($bib->id,$cfgparam); next COPYMAP; }
581 # exclude based on a regex match to location names
582 $cfgparam = 'LocationRegex';
583 if($localcfg->{$cfgparam}){
584 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
585 my $reg = $localcfg->{$cfgparam};
586 if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
587 { skipnote($bib->id,$cfgparam); next COPYMAP; }
589 # include based on a regex match to location names
590 $cfgparam = 'OnlyIncludeLocationRegex';
591 if($localcfg->{$cfgparam}){
592 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
593 my $reg = $localcfg->{$cfgparam};
594 unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
595 { skipnote($bib->id,$cfgparam); next COPYMAP; }
597 # Exclude based on a callno regex
598 $cfgparam = 'CallNoRegex';
599 if($localcfg->{$cfgparam}){
600 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
601 my $reg = $localcfg->{$cfgparam};
602 if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
603 { skipnote($bib->id,$cfgparam); next COPYMAP; }
605 # Include based on a callno regex
606 $cfgparam = 'OnlyIncludeCallNoRegex';
607 if($localcfg->{$cfgparam}){
608 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
609 my $reg = $localcfg->{$cfgparam};
610 unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
611 { skipnote($bib->id,$cfgparam); next COPYMAP; }
614 # Trim call number to a float and exclude based on Dewey Range
615 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
616 my $gt = $localcfg->{'DeweyGT'};
617 my $lt = $localcfg->{'DeweyLT'};
619 # FIXME if either config has an array just ditch for now
620 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
621 { skipnote($bib->id,""); next COPYMAP; }
622 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
623 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
625 my $callno = $cn->label;
626 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
627 print STDERR $callno;
628 #note that we are making big assumptions about the call numbers in the db
630 # we have a range, exclude what's inbetween
632 if($callno > $gt and $callno < $lt)
633 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
634 # we only have a top threshold, exclude everything below it
637 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
638 # we only have a bottom threshold, exclude everything above it
641 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
645 if($thisorg->parent_ou){
646 $thisorg = $orgs{$thisorg->parent_ou}
658 b => $orgs{$printlib}->shortname,
659 #b => $orgs{$owninglib}->shortname,
660 #b => $orgs{$circlib}->shortname,
661 c => $shelves{$cp->location}->name,
663 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
665 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
666 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
667 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
668 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
669 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
670 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
671 z => $statuses{$cp->status}->name,
677 stats() if (! ($count{cp} % 100 ));
678 } # COPYMAP: for my $cp ( @$cn_map_list )
679 } # for my $cn ( @$cn_list )
680 } # if ($cp_list && @$cp_list)
681 } # if ($cn_list && @$cn_list)
688 $outf = *STDOUT if($output_file) ;
689 printf($outf "Skipped %s due to config: %s\n",$id,$note);