8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/srv/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29 my ($exclusion_ini,$collapse_to_depth, $output_file);
39 'items' => \$holdings,
40 'mfhd' => \$export_mfhd,
41 'all' => \$all_records,
42 'location=s' => \$location,
43 'branch=s' => \$branch,
44 'money=s' => \$dollarsign,
45 'config=s' => \$config,
46 'format=s' => \$format,
49 'encoding=s' => \$encoding,
50 'timeout=i' => \$timeout,
51 'force901' => \$force901,
52 'exclusion_ini=s' => \$exclusion_ini,
53 'collapse_to_depth=i' => \$collapse_to_depth,
54 'onlyholdings' => \$onlyholdings,
55 'output-file=s' => \$output_file,
59 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
60 $cfg = new Config::Simple($exclusion_ini)
65 This script exports MARC authority, bibliographic, and serial holdings
66 records from an Evergreen database.
68 Input to this script can consist of a list of record IDs, with one record ID
69 per line, corresponding to the record ID in the Evergreen database table of
70 your requested record type.
72 Alternately, passing the --all option will attempt to export all records of
73 the specified type from the Evergreen database. The --all option starts at
74 record ID 1 and increments the ID by 1 until the largest ID in the database
75 is retrieved. This may not be very efficient for databases with large gaps
76 in their ID sequences.
79 --help or -h This screen.
80 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
81 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
82 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
83 --xml-idl or -x Location of the IDL XML
84 --timeout Timeout for exporting a single record; increase if you
85 are using --holdings and are exporting records that
86 have a lot of items attached to them.
87 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
88 --all or -a Export all records; ignores input list
90 Additional options for type = 'BIBLIO':
91 --items or -i Include items (holdings) in the output
92 --money Currency symbol to use in item price field [\$]
93 --mfhd Export serial MFHD records for associated bib records
94 Not compatible with --format=BRE
95 --location or -l MARC Location Code for holdings from
96 http://www.loc.gov/marc/organizations/orgshome.html
98 Options added by Sitka:
99 --force901 Force-add 901 fields
100 --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export
101 --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below
102 will be collapsed up to the parent org unit at the set depth
103 --onlyholdings Clean out 852s before adding new ones, and only export items that
104 successfully recieved an 852 field
105 --branch or -b Branch Codes of items to export, ex. 'BW' or 'BLP,BLP-LL'
109 To export a set of USMARC records in a file named "output_file" based on the
110 IDs contained in a file named "list_of_ids":
111 cat list_of_ids | $0 > output_file
113 To export a set of MARC21XML authority records in a file named "output.xml"
114 for all authority records in the database:
115 $0 --format XML --type AUTHORITY --all > output.xml
122 @branches = split(',', $branch);
123 @hashbranches{@branches} = (0 .. $#branches);
126 $format = uc($format);
127 $encoding = uc($encoding);
131 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
133 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
135 $outfh = $real_stdout;
138 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
139 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
141 if (!grep { $format eq $_ } @formats) {
142 die "Please select a supported format. ".
143 "Right now that means one of [".
144 join('|',@formats). "]\n";
147 if ($format ne 'XML') {
148 my $type = 'MARC::File::' . $format;
153 # set default timeout and/or correct silly user who
154 # supplied a negative timeout; default timeout of
155 # 300 seconds if exporting items determined empirically.
156 $timeout = $holdings ? 300 : 1;
159 OpenSRF::System->bootstrap_client( config_file => $config );
162 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
165 Fieldmapper->import(IDL => $idl);
167 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
168 OpenILS::Utils::CStoreEditor::init();
169 my $editor = OpenILS::Utils::CStoreEditor->new();
171 print $outfh <<HEADER if ($format eq 'XML');
172 <?xml version="1.0" encoding="$encoding"?>
173 <collection xmlns='http://www.loc.gov/MARC21/slim'>
188 my $last_time = time;
189 my %count = ('bib' => 0, 'did' => 0);
194 if ($type eq 'biblio') {
195 $top_record = $editor->search_biblio_record_entry([
197 {order_by => { 'bre' => 'id DESC' }, limit => 1}
199 } elsif ($type eq 'authority') {
200 $top_record = $editor->search_authority_record_entry([
202 {order_by => { 'are' => 'id DESC' }, limit => 1}
205 for (my $i = 0; $i++ < $top_record;) {
209 while ( my $i = <> ) {
214 print $outfh "</collection>\n" if ($format eq 'XML');
216 $speed = $count{did} / (time - $start);
217 my $time = time - $start;
220 Exports Attempted : $count{bib}
221 Exports Completed : $count{did}
222 Overall Speed : $speed
223 Total Time Elapsed: $time seconds
232 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
233 my $s = $r->recv(timeout => $timeout);
235 warn "\n!!!!! Failed trying to read record $id\n";
239 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
243 warn "\n!!!!!! Timed out trying to read record $id\n";
251 # Return if the bib is deleted
252 return if ( $bib->deleted eq 't' );
254 if ($format eq 'ARE' or $format eq 'BRE') {
255 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
263 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
264 if ($type eq 'biblio') {
267 # Remove old 852 fields
268 my @f = $r->field('852');
269 #$r->delete_fields(@f) if @f;
271 foreach $field852(@f){
272 $r->delete_field($field852);
275 add_bib_holdings($bib, $r);
276 # Check that at least one 852 was added
277 @f = $r->field('852');
278 # If not, we should NOT add this item to the export
281 add_bib_holdings($bib, $r);
286 $r->delete_field( $r->field('901') );
290 a => $bib->tcn_value,
291 b => $bib->tcn_source,
297 my $recordstr = undef;
299 if ($format eq 'XML') {
300 my $xml = $r->as_xml_record;
301 $xml =~ s/^<\?.+?\?>$//mo;
303 } elsif ($format eq 'UNIMARC') {
304 $recordstr = $r->as_usmarc;
305 } elsif ($format eq 'USMARC') {
306 $recordstr = $r->as_usmarc;
309 if($format eq 'UNIMARC' or $format eq 'USMARC') {
310 my $rec = MARC::File::USMARC->decode($recordstr);
311 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
312 } elsif($format eq 'XML') {
313 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
314 #my $tmp = $rec->as_xml_record;
315 #$tmp =~ s/^<\?.+?\?>$//mo;
316 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
318 } or throw Error::Simple("Failed to parse MARC record back: $!");
319 print $outfh $recordstr;
328 warn "\nERROR ON RECORD $errorid: $e\n";
329 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
332 if ($export_mfhd and $type eq 'biblio') {
333 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
334 foreach my $mfhd (@$mfhds) {
336 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
339 $r->delete_field( $r->field('901') );
343 a => $bib->tcn_value,
344 b => $bib->tcn_source,
350 if ($format eq 'XML') {
351 my $xml = $r->as_xml_record;
352 $xml =~ s/^<\?.+?\?>$//mo;
354 } elsif ($format eq 'UNIMARC') {
355 print $outfh $r->as_usmarc;
356 } elsif ($format eq 'USMARC') {
357 print $outfh $r->as_usmarc;
361 my $errorid = chomp($id);
363 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
364 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
369 stats() if (! ($count{bib} % 50 ));
376 $speed = $count{did} / (time - $start);
378 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
379 my $cn_speed = $count{cn} / (time - $start);
380 my $cp_speed = $count{cp} / (time - $start);
382 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
383 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
389 $count{did_last} = $count{did};
390 $count{time_last} = time;
393 sub get_bib_locations {
394 print STDERR "Retrieving Org Units ... ";
395 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
397 while (my $o = $r->recv) {
398 die $r->failed->stringify if ($r->failed);
406 print STDERR "Retrieving Copy statuses ... ";
407 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
409 while (my $sta = $r->recv) {
410 die $r->failed->stringify if ($r->failed);
411 $sta = $sta->content;
413 $statuses{$sta->id} = $sta;
418 print STDERR "Retrieving OU types ... ";
419 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
421 while (my $outy = $r->recv) {
422 die $r->failed->stringify if ($r->failed);
423 $outy = $outy->content;
425 $outypes{$outy->id} = $outy;
430 print STDERR "Retrieving Shelving locations ... ";
431 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
433 while (my $s = $r->recv) {
434 die $r->failed->stringify if ($r->failed);
437 $shelves{$s->id} = $s;
442 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
445 sub add_bib_holdings {
449 my $cn_list = $bib->call_numbers;
450 if ($cn_list && @$cn_list) {
452 $count{cn} += @$cn_list;
454 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
455 if ($cp_list && @$cp_list) {
458 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
460 CALLNUMMAP: for my $cn ( @$cn_list ) {
461 my $cn_map_list = $cn_map{$cn->id};
463 # Ignore deleted copies
464 next CALLNUMMAP if ( $cn->deleted eq 't' );
466 COPYMAP: for my $cp ( @$cn_map_list ) {
470 if (not exists $hashbranches{$orgs{$cn->owning_lib}->shortname}){
475 my $owninglib = $cn->owning_lib;
476 my $circlib = $cp->circ_lib;
477 my $printlib = $cp->circ_lib;
478 # Ignore deleted copies
479 next COPYMAP if ( $cp->deleted eq 't');
482 my $thisorg = $orgs{$circlib};
483 if($collapse_to_depth){
484 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
485 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
486 if( $localcfg->{'DontCollapse'} ){
489 if($thisorg->parent_ou){
490 $thisorg = $orgs{$thisorg->parent_ou};
491 $printlib = $thisorg->id;
496 $thisorg = $orgs{$circlib};
500 # load the local config from the .ini file for exclusions
501 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
504 # if we see this setting, just skip that org
506 $cfgparam = 'ExcludeEntireOrg';
507 if( $localcfg->{$cfgparam} )
508 { skipnote($bib->id, $cfgparam); next COPYMAP; }
510 # what follows are exclusion rules
514 if($localcfg->{$cfgparam}){
515 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
516 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
517 if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't')
518 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
519 elsif( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f')
520 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
521 elsif( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f')
522 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
523 elsif( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f')
524 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
527 # Excluded Circ Modifiers
528 $cfgparam = 'CircMods';
529 if($localcfg->{$cfgparam}){
530 my $circmod = $cp->circ_modifier || "";
531 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
532 if( grep { $_ eq $circmod } @circmods && @circmods)
533 { skipnote($bib->id,$cfgparam); next COPYMAP; }
535 # Inverse rule -- only include specified Circ Mods
536 $cfgparam = 'OnlyIncludeCircMods';
537 if($localcfg->{$cfgparam}){
538 my $circmod = $cp->circ_modifier || "";
539 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
540 unless( grep { $_ and $_ eq $circmod } @circmods && @circmods)
541 { skipnote($bib->id,$cfgparam); next COPYMAP; }
543 # Excluded Copy Statuses
544 $cfgparam = 'Statuses';
545 if($localcfg->{$cfgparam}){
546 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
547 if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses)
548 { skipnote($bib->id,$cfgparam); next COPYMAP; }
551 $cfgparam = 'Locations';
552 if($localcfg->{$cfgparam}){
553 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
554 if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
555 { skipnote($bib->id,$cfgparam); next COPYMAP; }
557 # Inverse rule - Only use the specified locations
558 $cfgparam = 'OnlyIncludeLocations';
559 if($localcfg->{$cfgparam}){
560 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
561 unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
562 { skipnote($bib->id,$cfgparam); next COPYMAP; }
564 # exclude based on a regex match to location names
565 $cfgparam = 'LocationRegex';
566 if($localcfg->{$cfgparam}){
567 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
568 my $reg = $localcfg->{$cfgparam};
569 if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
570 { skipnote($bib->id,$cfgparam); next COPYMAP; }
572 # include based on a regex match to location names
573 $cfgparam = 'OnlyIncludeLocationRegex';
574 if($localcfg->{$cfgparam}){
575 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
576 my $reg = $localcfg->{$cfgparam};
577 unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
578 { skipnote($bib->id,$cfgparam); next COPYMAP; }
580 # Exclude based on a callno regex
581 $cfgparam = 'CallNoRegex';
582 if($localcfg->{$cfgparam}){
583 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
584 my $reg = $localcfg->{$cfgparam};
585 if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
586 { skipnote($bib->id,$cfgparam); next COPYMAP; }
588 # Include based on a callno regex
589 $cfgparam = 'OnlyIncludeCallNoRegex';
590 if($localcfg->{$cfgparam}){
591 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
592 my $reg = $localcfg->{$cfgparam};
593 unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
594 { skipnote($bib->id,$cfgparam); next COPYMAP; }
597 # Trim call number to a float and exclude based on Dewey Range
598 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
599 my $gt = $localcfg->{'DeweyGT'};
600 my $lt = $localcfg->{'DeweyLT'};
602 # FIXME if either config has an array just ditch for now
603 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
604 { skipnote($bib->id,""); next COPYMAP; }
605 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
606 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
608 my $callno = $cn->label;
609 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
610 print STDERR $callno;
611 #note that we are making big assumptions about the call numbers in the db
613 # we have a range, exclude what's inbetween
615 if($callno > $gt and $callno < $lt)
616 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
617 # we only have a top threshold, exclude everything below it
620 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
621 # we only have a bottom threshold, exclude everything above it
624 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
628 if($thisorg->parent_ou){
629 $thisorg = $orgs{$thisorg->parent_ou}
641 #b => $orgs{$printlib}->shortname,
642 #b => $orgs{$owninglib}->shortname,
643 b => $orgs{$circlib}->shortname,
644 c => $shelves{$cp->location}->name,
646 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
648 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
649 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
650 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
651 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
652 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
653 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
654 z => $statuses{$cp->status}->name,
660 stats() if (! ($count{cp} % 100 ));
661 } # COPYMAP: for my $cp ( @$cn_map_list )
662 } # for my $cn ( @$cn_list )
663 } # if ($cp_list && @$cp_list)
664 } # if ($cn_list && @$cn_list)
671 $outf = *STDOUT if($output_file) ;
672 printf($outf "Skipped %s due to config: %s\n",$id,$note);