8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records,$quiet) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef,0);
29 my ($exclusion_ini,$collapse_to_depth, $output_file);
34 my $field852_4b_type = 'collapsed';
38 'items' => \$holdings,
39 'mfhd' => \$export_mfhd,
40 'all' => \$all_records,
41 'location=s' => \$location,
42 'money=s' => \$dollarsign,
43 'config=s' => \$config,
44 'format=s' => \$format,
47 'encoding=s' => \$encoding,
48 'timeout=i' => \$timeout,
49 'force901' => \$force901,
50 'exclusion_ini=s' => \$exclusion_ini,
51 'collapse_to_depth=i' => \$collapse_to_depth,
52 'onlyholdings' => \$onlyholdings,
53 'with-ebooks' => \$with_ebooks,
54 'output-file=s' => \$output_file,
56 'field-852-4b-type=s' => \$field852_4b_type,
60 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
61 $cfg = new Config::Simple($exclusion_ini)
63 unless($field852_4b_type =~ /^(collapsed?|circ|owning)/) {
64 die sprintf("Invalid argument '%s' to --field-852-4b-type", $field852_4b_type);
69 This script exports MARC authority, bibliographic, and serial holdings
70 records from an Evergreen database.
72 Input to this script can consist of a list of record IDs, with one record ID
73 per line, corresponding to the record ID in the Evergreen database table of
74 your requested record type.
76 Alternately, passing the --all option will attempt to export all records of
77 the specified type from the Evergreen database. The --all option starts at
78 record ID 1 and increments the ID by 1 until the largest ID in the database
79 is retrieved. This may not be very efficient for databases with large gaps
80 in their ID sequences.
83 --help or -h This screen.
84 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
85 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
86 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
87 --xml-idl or -x Location of the IDL XML
88 --timeout Timeout for exporting a single record; increase if you
89 are using --holdings and are exporting records that
90 have a lot of items attached to them.
91 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
92 --all or -a Export all records; ignores input list
94 Additional options for type = 'BIBLIO':
95 --items or -i Include items (holdings) in the output
96 --money Currency symbol to use in item price field [\$]
97 --mfhd Export serial MFHD records for associated bib records
98 Not compatible with --format=BRE
99 --location or -l MARC Location Code for holdings from
100 http://www.loc.gov/marc/organizations/orgshome.html
102 Options added by Sitka:
103 --force901 Force-add 901 fields
104 --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export
105 --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below
106 will be collapsed up to the parent org unit at the set depth
107 --onlyholdings Clean out 852s before adding new ones, and only export items that
108 successfully recieved an 852 field
109 --with-ebooks Include records with 856 fields, even if they have no holdings
110 --field-852-4b-type TYPE TYPE IN 'circ', 'owning', 'collapsed' (default)
111 What library should be displayed in field 852 4\$b?
115 To export a set of USMARC records in a file named "output_file" based on the
116 IDs contained in a file named "list_of_ids":
117 cat list_of_ids | $0 > output_file
119 To export a set of MARC21XML authority records in a file named "output.xml"
120 for all authority records in the database:
121 $0 --format XML --type AUTHORITY --all > output.xml
128 $format = uc($format);
129 $encoding = uc($encoding);
133 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
135 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
137 $outfh = $real_stdout;
140 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
141 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
143 if (!grep { $format eq $_ } @formats) {
144 die "Please select a supported format. ".
145 "Right now that means one of [".
146 join('|',@formats). "]\n";
149 if ($format ne 'XML') {
150 my $type = 'MARC::File::' . $format;
155 # set default timeout and/or correct silly user who
156 # supplied a negative timeout; default timeout of
157 # 300 seconds if exporting items determined empirically.
158 $timeout = $holdings ? 300 : 1;
161 OpenSRF::System->bootstrap_client( config_file => $config );
164 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
167 Fieldmapper->import(IDL => $idl);
169 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
170 OpenILS::Utils::CStoreEditor::init();
171 my $editor = OpenILS::Utils::CStoreEditor->new();
173 print $outfh <<HEADER if ($format eq 'XML');
174 <?xml version="1.0" encoding="$encoding"?>
175 <collection xmlns='http://www.loc.gov/MARC21/slim'>
190 my $last_time = time;
191 my %count = ('bib' => 0, 'did' => 0);
196 if ($type eq 'biblio') {
197 $top_record = $editor->search_biblio_record_entry([
199 {order_by => { 'bre' => 'id DESC' }, limit => 1}
201 } elsif ($type eq 'authority') {
202 $top_record = $editor->search_authority_record_entry([
204 {order_by => { 'are' => 'id DESC' }, limit => 1}
207 for (my $i = 0; $i++ < $top_record;) {
211 while ( my $i = <> ) {
216 print $outfh "</collection>\n" if ($format eq 'XML');
218 $speed = $count{did} / (time - $start);
219 my $time = time - $start;
222 Exports Attempted : $count{bib}
223 Exports Completed : $count{did}
224 Overall Speed : $speed
225 Total Time Elapsed: $time seconds
234 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
235 my $s = $r->recv(timeout => $timeout);
237 warn "\n!!!!! Failed trying to read record $id\n";
241 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
245 warn "\n!!!!!! Timed out trying to read record $id\n";
253 # Return if the bib is deleted
254 return if ( $bib->deleted eq 't' );
256 if ($format eq 'ARE' or $format eq 'BRE') {
257 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
258 stats() unless $quiet;
265 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
266 if ($type eq 'biblio') {
269 # Remove old 852 fields
270 my @f = $r->field('852');
271 $r->delete_fields(@f) if @f;
273 add_bib_holdings($bib, $r);
274 # Check that at least one 852 was added
275 @marc852 = $r->field('852');
276 @marc856 = $r->field('856');
277 # If not, we should NOT add this item to the export
278 # ... but we may still want the record if it has an 856
279 return unless ( @marc852 || ($with_ebooks && @marc856) );
281 add_bib_holdings($bib, $r);
286 $r->delete_field( $r->field('901') );
290 a => $bib->tcn_value,
291 b => $bib->tcn_source,
297 my $recordstr = undef;
299 if ($format eq 'XML') {
300 my $xml = $r->as_xml_record;
301 $xml =~ s/^<\?.+?\?>$//mo;
303 } elsif ($format eq 'UNIMARC') {
304 $recordstr = $r->as_usmarc;
305 } elsif ($format eq 'USMARC') {
306 $recordstr = $r->as_usmarc;
309 if($format eq 'UNIMARC' or $format eq 'USMARC') {
310 my $rec = MARC::File::USMARC->decode($recordstr);
311 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
312 } elsif($format eq 'XML') {
313 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
314 #my $tmp = $rec->as_xml_record;
315 #$tmp =~ s/^<\?.+?\?>$//mo;
316 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
318 } or throw Error::Simple("Failed to parse MARC record back: $!");
319 print $outfh $recordstr;
328 warn "\nERROR ON RECORD $errorid: $e\n";
329 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
332 if ($export_mfhd and $type eq 'biblio') {
333 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
334 foreach my $mfhd (@$mfhds) {
336 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
339 $r->delete_field( $r->field('901') );
343 a => $bib->tcn_value,
344 b => $bib->tcn_source,
350 if ($format eq 'XML') {
351 my $xml = $r->as_xml_record;
352 $xml =~ s/^<\?.+?\?>$//mo;
354 } elsif ($format eq 'UNIMARC') {
355 print $outfh $r->as_usmarc;
356 } elsif ($format eq 'USMARC') {
357 print $outfh $r->as_usmarc;
361 my $errorid = chomp($id);
363 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
364 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
369 stats() if (!$quiet && ! ($count{bib} % 50 ));
376 $speed = $count{did} / (time - $start);
378 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
379 my $cn_speed = $count{cn} / (time - $start);
380 my $cp_speed = $count{cp} / (time - $start);
382 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
383 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
389 $count{did_last} = $count{did};
390 $count{time_last} = time;
393 sub get_bib_locations {
394 print STDERR "Retrieving Org Units ... " unless $quiet;
395 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
397 while (my $o = $r->recv) {
398 die $r->failed->stringify if ($r->failed);
406 print STDERR "Retrieving Copy statuses ... " unless $quiet;
407 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
409 while (my $sta = $r->recv) {
410 die $r->failed->stringify if ($r->failed);
411 $sta = $sta->content;
413 $statuses{$sta->id} = $sta;
418 print STDERR "Retrieving OU types ... " unless $quiet;
419 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
421 while (my $outy = $r->recv) {
422 die $r->failed->stringify if ($r->failed);
423 $outy = $outy->content;
425 $outypes{$outy->id} = $outy;
430 print STDERR "Retrieving Shelving locations ... " unless $quiet;
431 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
433 while (my $s = $r->recv) {
434 die $r->failed->stringify if ($r->failed);
437 $shelves{$s->id} = $s;
442 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
445 sub add_bib_holdings {
449 my $cn_list = $bib->call_numbers;
450 if ($cn_list && @$cn_list) {
452 $count{cn} += @$cn_list;
454 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
455 if ($cp_list && @$cp_list) {
458 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
460 CALLNUMMAP: for my $cn ( @$cn_list ) {
461 my $cn_map_list = $cn_map{$cn->id};
463 # Ignore deleted copies
464 next CALLNUMMAP if ( $cn->deleted eq 't' );
466 COPYMAP: for my $cp ( @$cn_map_list ) {
470 my $owninglib = $cn->owning_lib;
471 my $circlib = $cp->circ_lib;
472 my $printlib = $cp->circ_lib;
474 # Ignore deleted copies
475 next COPYMAP if ( $cp->deleted eq 't');
478 my $thisorg = $orgs{$circlib};
480 if($collapse_to_depth){
481 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
482 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
483 if( $localcfg->{'DontCollapse'} ){
486 if($thisorg->parent_ou){
487 $thisorg = $orgs{$thisorg->parent_ou};
488 $printlib = $thisorg->id;
493 $thisorg = $orgs{$circlib};
497 # load the local config from the .ini file for exclusions
498 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
501 # if we see this setting, just skip that org
503 $cfgparam = 'ExcludeEntireOrg';
504 if( $localcfg->{$cfgparam} )
505 { skipnote($bib->id, $cfgparam); next COPYMAP; }
507 # what follows are exclusion rules
511 if($localcfg->{$cfgparam}){
512 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
513 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
514 if(grep( { $_ eq 'reference' } @flags) && ($cp->ref eq 't'))
515 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
516 if(grep( { $_ eq 'unholdable' } @flags) && ($cp->holdable eq 'f'))
517 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
518 if(grep( { $_ eq 'circulate' } @flags) && ($cp->circulate eq 'f'))
519 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
520 if(grep( { $_ eq 'hidden' } @flags) && ($cp->opac_visible eq 'f'))
521 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
524 # Excluded Circ Modifiers
525 $cfgparam = 'CircMods';
526 if($localcfg->{$cfgparam}){
527 my $circmod = $cp->circ_modifier || "";
528 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
529 if(grep( { $_ eq $circmod } @circmods) && @circmods)
530 { skipnote($bib->id,$cfgparam); next COPYMAP; }
532 # Inverse rule -- only include specified Circ Mods
533 $cfgparam = 'OnlyIncludeCircMods';
534 if($localcfg->{$cfgparam}){
535 my $circmod = $cp->circ_modifier || "";
536 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
537 unless(grep( { $_ and $_ eq $circmod } @circmods) && @circmods)
538 { skipnote($bib->id,$cfgparam); next COPYMAP; }
540 # Excluded Copy Statuses
541 $cfgparam = 'Statuses';
542 if($localcfg->{$cfgparam}){
543 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
544 if(grep( { $_ eq $statuses{$cp->status}->name } @statuses) && @statuses)
545 { skipnote($bib->id,$cfgparam); next COPYMAP; }
548 $cfgparam = 'Locations';
549 if($localcfg->{$cfgparam}){
550 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
551 if(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations)
552 { skipnote($bib->id,$cfgparam); next COPYMAP; }
554 # Inverse rule - Only use the specified locations
555 $cfgparam = 'OnlyIncludeLocations';
556 if($localcfg->{$cfgparam}){
557 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
558 unless(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations)
559 { skipnote($bib->id,$cfgparam); next COPYMAP; }
561 # exclude based on a regex match to location names
562 $cfgparam = 'LocationRegex';
563 if($localcfg->{$cfgparam}){
564 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
565 my $reg = $localcfg->{$cfgparam};
566 if(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex)
567 { skipnote($bib->id,$cfgparam); next COPYMAP; }
569 # include based on a regex match to location names
570 $cfgparam = 'OnlyIncludeLocationRegex';
571 if($localcfg->{$cfgparam}){
572 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
573 my $reg = $localcfg->{$cfgparam};
574 unless(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex)
575 { skipnote($bib->id,$cfgparam); next COPYMAP; }
577 # Exclude based on a callno regex
578 $cfgparam = 'CallNoRegex';
579 if($localcfg->{$cfgparam}){
580 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
581 my $reg = $localcfg->{$cfgparam};
582 if(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex)
583 { skipnote($bib->id,$cfgparam); next COPYMAP; }
585 # Include based on a callno regex
586 $cfgparam = 'OnlyIncludeCallNoRegex';
587 if($localcfg->{$cfgparam}){
588 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
589 my $reg = $localcfg->{$cfgparam};
590 unless(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex)
591 { skipnote($bib->id,$cfgparam); next COPYMAP; }
594 # Trim call number to a float and exclude based on Dewey Range
595 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
596 my $gt = $localcfg->{'DeweyGT'};
597 my $lt = $localcfg->{'DeweyLT'};
599 # FIXME if either config has an array just ditch for now
600 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
601 { skipnote($bib->id,""); next COPYMAP; }
602 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
603 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
605 my $callno = $cn->label;
606 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
607 print STDERR $callno;
608 #note that we are making big assumptions about the call numbers in the db
610 # we have a range, exclude what's inbetween
612 if($callno > $gt and $callno < $lt)
613 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
614 # we only have a top threshold, exclude everything below it
617 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
618 # we only have a bottom threshold, exclude everything above it
621 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
625 if($thisorg->parent_ou){
626 $thisorg = $orgs{$thisorg->parent_ou}
634 my $field_852_4b = undef;
635 if($field852_4b_type =~ /collapsed?/) {
636 $field_852_4b = $orgs{$printlib}->shortname;
637 } elsif($field852_4b_type == 'circ') {
638 $field_852_4b = $orgs{$circlib}->shortname;
639 } elsif($field852_4b_type == 'owning') {
640 $field_852_4b = $orgs{$owninglib}->shortname;
642 die "Should not have reached here, invalid \$field852_4b_type=${field852_4b_type}";
650 c => $shelves{$cp->location}->name,
652 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
654 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
655 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
656 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
657 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
658 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
659 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
660 z => $statuses{$cp->status}->name,
666 stats() if (!$quiet && ! ($count{cp} % 100 ));
667 } # COPYMAP: for my $cp ( @$cn_map_list )
668 } # for my $cn ( @$cn_list )
669 } # if ($cp_list && @$cp_list)
670 } # if ($cn_list && @$cn_list)
677 $outf = *STDOUT if($output_file) ;
678 printf($outf "Skipped %s due to config: %s\n",$id,$note) unless $quiet;