8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records,$quiet) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef,0);
29 my ($exclusion_ini,$collapse_to_depth, $output_file);
33 my $field852_4b_type = 'collapsed';
37 'items' => \$holdings,
38 'mfhd' => \$export_mfhd,
39 'all' => \$all_records,
40 'location=s' => \$location,
41 'money=s' => \$dollarsign,
42 'config=s' => \$config,
43 'format=s' => \$format,
46 'encoding=s' => \$encoding,
47 'timeout=i' => \$timeout,
48 'force901' => \$force901,
49 'exclusion_ini=s' => \$exclusion_ini,
50 'collapse_to_depth=i' => \$collapse_to_depth,
51 'onlyholdings' => \$onlyholdings,
52 'output-file=s' => \$output_file,
54 'field-852-4b-type=s' => \$field852_4b_type,
58 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
59 $cfg = new Config::Simple($exclusion_ini)
61 unless($field852_4b_type =~ /^(collapsed?|circ|owning)/) {
62 die sprintf("Invalid argument '%s' to --field-852-4b-type", $field852_4b_type);
67 This script exports MARC authority, bibliographic, and serial holdings
68 records from an Evergreen database.
70 Input to this script can consist of a list of record IDs, with one record ID
71 per line, corresponding to the record ID in the Evergreen database table of
72 your requested record type.
74 Alternately, passing the --all option will attempt to export all records of
75 the specified type from the Evergreen database. The --all option starts at
76 record ID 1 and increments the ID by 1 until the largest ID in the database
77 is retrieved. This may not be very efficient for databases with large gaps
78 in their ID sequences.
81 --help or -h This screen.
82 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
83 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
84 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
85 --xml-idl or -x Location of the IDL XML
86 --timeout Timeout for exporting a single record; increase if you
87 are using --holdings and are exporting records that
88 have a lot of items attached to them.
89 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
90 --all or -a Export all records; ignores input list
92 Additional options for type = 'BIBLIO':
93 --items or -i Include items (holdings) in the output
94 --money Currency symbol to use in item price field [\$]
95 --mfhd Export serial MFHD records for associated bib records
96 Not compatible with --format=BRE
97 --location or -l MARC Location Code for holdings from
98 http://www.loc.gov/marc/organizations/orgshome.html
100 Options added by Sitka:
101 --force901 Force-add 901 fields
102 --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export
103 --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below
104 will be collapsed up to the parent org unit at the set depth
105 --onlyholdings Clean out 852s before adding new ones, and only export items that
106 successfully recieved an 852 field
107 --field-852-4b-type TYPE TYPE IN 'circ', 'owning', 'collapsed' (default)
108 What library should be displayed in field 852 4\$b?
112 To export a set of USMARC records in a file named "output_file" based on the
113 IDs contained in a file named "list_of_ids":
114 cat list_of_ids | $0 > output_file
116 To export a set of MARC21XML authority records in a file named "output.xml"
117 for all authority records in the database:
118 $0 --format XML --type AUTHORITY --all > output.xml
125 $format = uc($format);
126 $encoding = uc($encoding);
130 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
132 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
134 $outfh = $real_stdout;
137 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
138 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
140 if (!grep { $format eq $_ } @formats) {
141 die "Please select a supported format. ".
142 "Right now that means one of [".
143 join('|',@formats). "]\n";
146 if ($format ne 'XML') {
147 my $type = 'MARC::File::' . $format;
152 # set default timeout and/or correct silly user who
153 # supplied a negative timeout; default timeout of
154 # 300 seconds if exporting items determined empirically.
155 $timeout = $holdings ? 300 : 1;
158 OpenSRF::System->bootstrap_client( config_file => $config );
161 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
164 Fieldmapper->import(IDL => $idl);
166 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
167 OpenILS::Utils::CStoreEditor::init();
168 my $editor = OpenILS::Utils::CStoreEditor->new();
170 print $outfh <<HEADER if ($format eq 'XML');
171 <?xml version="1.0" encoding="$encoding"?>
172 <collection xmlns='http://www.loc.gov/MARC21/slim'>
187 my $last_time = time;
188 my %count = ('bib' => 0, 'did' => 0);
193 if ($type eq 'biblio') {
194 $top_record = $editor->search_biblio_record_entry([
196 {order_by => { 'bre' => 'id DESC' }, limit => 1}
198 } elsif ($type eq 'authority') {
199 $top_record = $editor->search_authority_record_entry([
201 {order_by => { 'are' => 'id DESC' }, limit => 1}
204 for (my $i = 0; $i++ < $top_record;) {
208 while ( my $i = <> ) {
213 print $outfh "</collection>\n" if ($format eq 'XML');
215 $speed = $count{did} / (time - $start);
216 my $time = time - $start;
219 Exports Attempted : $count{bib}
220 Exports Completed : $count{did}
221 Overall Speed : $speed
222 Total Time Elapsed: $time seconds
231 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
232 my $s = $r->recv(timeout => $timeout);
234 warn "\n!!!!! Failed trying to read record $id\n";
238 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
242 warn "\n!!!!!! Timed out trying to read record $id\n";
250 # Return if the bib is deleted
251 return if ( $bib->deleted eq 't' );
253 if ($format eq 'ARE' or $format eq 'BRE') {
254 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
255 stats() unless $quiet;
262 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
263 if ($type eq 'biblio') {
266 # Remove old 852 fields
267 my @f = $r->field('852');
268 $r->delete_fields(@f) if @f;
270 add_bib_holdings($bib, $r);
271 # Check that at least one 852 was added
272 @f = $r->field('852');
273 # If not, we should NOT add this item to the export
276 add_bib_holdings($bib, $r);
281 $r->delete_field( $r->field('901') );
285 a => $bib->tcn_value,
286 b => $bib->tcn_source,
292 my $recordstr = undef;
294 if ($format eq 'XML') {
295 my $xml = $r->as_xml_record;
296 $xml =~ s/^<\?.+?\?>$//mo;
298 } elsif ($format eq 'UNIMARC') {
299 $recordstr = $r->as_usmarc;
300 } elsif ($format eq 'USMARC') {
301 $recordstr = $r->as_usmarc;
304 if($format eq 'UNIMARC' or $format eq 'USMARC') {
305 my $rec = MARC::File::USMARC->decode($recordstr);
306 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
307 } elsif($format eq 'XML') {
308 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
309 #my $tmp = $rec->as_xml_record;
310 #$tmp =~ s/^<\?.+?\?>$//mo;
311 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
313 } or throw Error::Simple("Failed to parse MARC record back: $!");
314 print $outfh $recordstr;
323 warn "\nERROR ON RECORD $errorid: $e\n";
324 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
327 if ($export_mfhd and $type eq 'biblio') {
328 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
329 foreach my $mfhd (@$mfhds) {
331 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
334 $r->delete_field( $r->field('901') );
338 a => $bib->tcn_value,
339 b => $bib->tcn_source,
345 if ($format eq 'XML') {
346 my $xml = $r->as_xml_record;
347 $xml =~ s/^<\?.+?\?>$//mo;
349 } elsif ($format eq 'UNIMARC') {
350 print $outfh $r->as_usmarc;
351 } elsif ($format eq 'USMARC') {
352 print $outfh $r->as_usmarc;
356 my $errorid = chomp($id);
358 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
359 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
364 stats() if (!$quiet && ! ($count{bib} % 50 ));
371 $speed = $count{did} / (time - $start);
373 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
374 my $cn_speed = $count{cn} / (time - $start);
375 my $cp_speed = $count{cp} / (time - $start);
377 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
378 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
384 $count{did_last} = $count{did};
385 $count{time_last} = time;
388 sub get_bib_locations {
389 print STDERR "Retrieving Org Units ... " unless $quiet;
390 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
392 while (my $o = $r->recv) {
393 die $r->failed->stringify if ($r->failed);
401 print STDERR "Retrieving Copy statuses ... " unless $quiet;
402 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
404 while (my $sta = $r->recv) {
405 die $r->failed->stringify if ($r->failed);
406 $sta = $sta->content;
408 $statuses{$sta->id} = $sta;
413 print STDERR "Retrieving OU types ... " unless $quiet;
414 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
416 while (my $outy = $r->recv) {
417 die $r->failed->stringify if ($r->failed);
418 $outy = $outy->content;
420 $outypes{$outy->id} = $outy;
425 print STDERR "Retrieving Shelving locations ... " unless $quiet;
426 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
428 while (my $s = $r->recv) {
429 die $r->failed->stringify if ($r->failed);
432 $shelves{$s->id} = $s;
437 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
440 sub add_bib_holdings {
444 my $cn_list = $bib->call_numbers;
445 if ($cn_list && @$cn_list) {
447 $count{cn} += @$cn_list;
449 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
450 if ($cp_list && @$cp_list) {
453 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
455 CALLNUMMAP: for my $cn ( @$cn_list ) {
456 my $cn_map_list = $cn_map{$cn->id};
458 # Ignore deleted copies
459 next CALLNUMMAP if ( $cn->deleted eq 't' );
461 COPYMAP: for my $cp ( @$cn_map_list ) {
465 my $owninglib = $cn->owning_lib;
466 my $circlib = $cp->circ_lib;
467 my $printlib = $cp->circ_lib;
469 # Ignore deleted copies
470 next COPYMAP if ( $cp->deleted eq 't');
473 my $thisorg = $orgs{$circlib};
475 if($collapse_to_depth){
476 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
477 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
478 if( $localcfg->{'DontCollapse'} ){
481 if($thisorg->parent_ou){
482 $thisorg = $orgs{$thisorg->parent_ou};
483 $printlib = $thisorg->id;
488 $thisorg = $orgs{$circlib};
492 # load the local config from the .ini file for exclusions
493 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
496 # if we see this setting, just skip that org
498 $cfgparam = 'ExcludeEntireOrg';
499 if( $localcfg->{$cfgparam} )
500 { skipnote($bib->id, $cfgparam); next COPYMAP; }
502 # what follows are exclusion rules
506 if($localcfg->{$cfgparam}){
507 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
508 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
509 if(grep( { $_ eq 'reference' } @flags) && ($cp->ref eq 't'))
510 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
511 if(grep( { $_ eq 'unholdable' } @flags) && ($cp->holdable eq 'f'))
512 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
513 if(grep( { $_ eq 'circulate' } @flags) && ($cp->circulate eq 'f'))
514 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
515 if(grep( { $_ eq 'hidden' } @flags) && ($cp->opac_visible eq 'f'))
516 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
519 # Excluded Circ Modifiers
520 $cfgparam = 'CircMods';
521 if($localcfg->{$cfgparam}){
522 my $circmod = $cp->circ_modifier || "";
523 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
524 if(grep( { $_ eq $circmod } @circmods) && @circmods)
525 { skipnote($bib->id,$cfgparam); next COPYMAP; }
527 # Inverse rule -- only include specified Circ Mods
528 $cfgparam = 'OnlyIncludeCircMods';
529 if($localcfg->{$cfgparam}){
530 my $circmod = $cp->circ_modifier || "";
531 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
532 unless(grep( { $_ and $_ eq $circmod } @circmods) && @circmods)
533 { skipnote($bib->id,$cfgparam); next COPYMAP; }
535 # Excluded Copy Statuses
536 $cfgparam = 'Statuses';
537 if($localcfg->{$cfgparam}){
538 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
539 if(grep( { $_ eq $statuses{$cp->status}->name } @statuses) && @statuses)
540 { skipnote($bib->id,$cfgparam); next COPYMAP; }
543 $cfgparam = 'Locations';
544 if($localcfg->{$cfgparam}){
545 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
546 if(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations)
547 { skipnote($bib->id,$cfgparam); next COPYMAP; }
549 # Inverse rule - Only use the specified locations
550 $cfgparam = 'OnlyIncludeLocations';
551 if($localcfg->{$cfgparam}){
552 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
553 unless(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations)
554 { skipnote($bib->id,$cfgparam); next COPYMAP; }
556 # exclude based on a regex match to location names
557 $cfgparam = 'LocationRegex';
558 if($localcfg->{$cfgparam}){
559 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
560 my $reg = $localcfg->{$cfgparam};
561 if(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex)
562 { skipnote($bib->id,$cfgparam); next COPYMAP; }
564 # include based on a regex match to location names
565 $cfgparam = 'OnlyIncludeLocationRegex';
566 if($localcfg->{$cfgparam}){
567 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
568 my $reg = $localcfg->{$cfgparam};
569 unless(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex)
570 { skipnote($bib->id,$cfgparam); next COPYMAP; }
572 # Exclude based on a callno regex
573 $cfgparam = 'CallNoRegex';
574 if($localcfg->{$cfgparam}){
575 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
576 my $reg = $localcfg->{$cfgparam};
577 if(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex)
578 { skipnote($bib->id,$cfgparam); next COPYMAP; }
580 # Include based on a callno regex
581 $cfgparam = 'OnlyIncludeCallNoRegex';
582 if($localcfg->{$cfgparam}){
583 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
584 my $reg = $localcfg->{$cfgparam};
585 unless(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex)
586 { skipnote($bib->id,$cfgparam); next COPYMAP; }
589 # Trim call number to a float and exclude based on Dewey Range
590 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
591 my $gt = $localcfg->{'DeweyGT'};
592 my $lt = $localcfg->{'DeweyLT'};
594 # FIXME if either config has an array just ditch for now
595 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
596 { skipnote($bib->id,""); next COPYMAP; }
597 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
598 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
600 my $callno = $cn->label;
601 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
602 print STDERR $callno;
603 #note that we are making big assumptions about the call numbers in the db
605 # we have a range, exclude what's inbetween
607 if($callno > $gt and $callno < $lt)
608 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
609 # we only have a top threshold, exclude everything below it
612 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
613 # we only have a bottom threshold, exclude everything above it
616 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
620 if($thisorg->parent_ou){
621 $thisorg = $orgs{$thisorg->parent_ou}
629 my $field_852_4b = undef;
630 if($field852_4b_type =~ /collapsed?/) {
631 $field_852_4b = $orgs{$printlib}->shortname;
632 } elsif($field852_4b_type == 'circ') {
633 $field_852_4b = $orgs{$circlib}->shortname;
634 } elsif($field852_4b_type == 'owning') {
635 $field_852_4b = $orgs{$owninglib}->shortname;
637 die "Should not have reached here, invalid \$field852_4b_type=${field852_4b_type}";
645 c => $shelves{$cp->location}->name,
647 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
649 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
650 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
651 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
652 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
653 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
654 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
655 z => $statuses{$cp->status}->name,
661 stats() if (!$quiet && ! ($count{cp} % 100 ));
662 } # COPYMAP: for my $cp ( @$cn_map_list )
663 } # for my $cn ( @$cn_list )
664 } # if ($cp_list && @$cp_list)
665 } # if ($cn_list && @$cn_list)
672 $outf = *STDOUT if($output_file) ;
673 printf($outf "Skipped %s due to config: %s\n",$id,$note) unless $quiet;