8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records,$quiet) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef,0);
29 my ($exclusion_ini,$collapse_to_depth, $output_file);
36 'items' => \$holdings,
37 'mfhd' => \$export_mfhd,
38 'all' => \$all_records,
39 'location=s' => \$location,
40 'money=s' => \$dollarsign,
41 'config=s' => \$config,
42 'format=s' => \$format,
45 'encoding=s' => \$encoding,
46 'timeout=i' => \$timeout,
47 'force901' => \$force901,
48 'exclusion_ini=s' => \$exclusion_ini,
49 'collapse_to_depth=i' => \$collapse_to_depth,
50 'onlyholdings' => \$onlyholdings,
51 'output-file=s' => \$output_file,
56 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
57 $cfg = new Config::Simple($exclusion_ini)
62 This script exports MARC authority, bibliographic, and serial holdings
63 records from an Evergreen database.
65 Input to this script can consist of a list of record IDs, with one record ID
66 per line, corresponding to the record ID in the Evergreen database table of
67 your requested record type.
69 Alternately, passing the --all option will attempt to export all records of
70 the specified type from the Evergreen database. The --all option starts at
71 record ID 1 and increments the ID by 1 until the largest ID in the database
72 is retrieved. This may not be very efficient for databases with large gaps
73 in their ID sequences.
76 --help or -h This screen.
77 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
78 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
79 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
80 --xml-idl or -x Location of the IDL XML
81 --timeout Timeout for exporting a single record; increase if you
82 are using --holdings and are exporting records that
83 have a lot of items attached to them.
84 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
85 --all or -a Export all records; ignores input list
87 Additional options for type = 'BIBLIO':
88 --items or -i Include items (holdings) in the output
89 --money Currency symbol to use in item price field [\$]
90 --mfhd Export serial MFHD records for associated bib records
91 Not compatible with --format=BRE
92 --location or -l MARC Location Code for holdings from
93 http://www.loc.gov/marc/organizations/orgshome.html
95 Options added by Sitka:
96 --force901 Force-add 901 fields
97 --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export
98 --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below
99 will be collapsed up to the parent org unit at the set depth
100 --onlyholdings Clean out 852s before adding new ones, and only export items that
101 successfully recieved an 852 field
105 To export a set of USMARC records in a file named "output_file" based on the
106 IDs contained in a file named "list_of_ids":
107 cat list_of_ids | $0 > output_file
109 To export a set of MARC21XML authority records in a file named "output.xml"
110 for all authority records in the database:
111 $0 --format XML --type AUTHORITY --all > output.xml
118 $format = uc($format);
119 $encoding = uc($encoding);
123 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
125 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
127 $outfh = $real_stdout;
130 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
131 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
133 if (!grep { $format eq $_ } @formats) {
134 die "Please select a supported format. ".
135 "Right now that means one of [".
136 join('|',@formats). "]\n";
139 if ($format ne 'XML') {
140 my $type = 'MARC::File::' . $format;
145 # set default timeout and/or correct silly user who
146 # supplied a negative timeout; default timeout of
147 # 300 seconds if exporting items determined empirically.
148 $timeout = $holdings ? 300 : 1;
151 OpenSRF::System->bootstrap_client( config_file => $config );
154 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
157 Fieldmapper->import(IDL => $idl);
159 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
160 OpenILS::Utils::CStoreEditor::init();
161 my $editor = OpenILS::Utils::CStoreEditor->new();
163 print $outfh <<HEADER if ($format eq 'XML');
164 <?xml version="1.0" encoding="$encoding"?>
165 <collection xmlns='http://www.loc.gov/MARC21/slim'>
180 my $last_time = time;
181 my %count = ('bib' => 0, 'did' => 0);
186 if ($type eq 'biblio') {
187 $top_record = $editor->search_biblio_record_entry([
189 {order_by => { 'bre' => 'id DESC' }, limit => 1}
191 } elsif ($type eq 'authority') {
192 $top_record = $editor->search_authority_record_entry([
194 {order_by => { 'are' => 'id DESC' }, limit => 1}
197 for (my $i = 0; $i++ < $top_record;) {
201 while ( my $i = <> ) {
206 print $outfh "</collection>\n" if ($format eq 'XML');
208 $speed = $count{did} / (time - $start);
209 my $time = time - $start;
212 Exports Attempted : $count{bib}
213 Exports Completed : $count{did}
214 Overall Speed : $speed
215 Total Time Elapsed: $time seconds
224 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
225 my $s = $r->recv(timeout => $timeout);
227 warn "\n!!!!! Failed trying to read record $id\n";
231 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
235 warn "\n!!!!!! Timed out trying to read record $id\n";
243 # Return if the bib is deleted
244 return if ( $bib->deleted eq 't' );
246 if ($format eq 'ARE' or $format eq 'BRE') {
247 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
248 stats() unless $quiet;
255 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
256 if ($type eq 'biblio') {
259 # Remove old 852 fields
260 my @f = $r->field('852');
261 $r->delete_fields(@f) if @f;
263 add_bib_holdings($bib, $r);
264 # Check that at least one 852 was added
265 @f = $r->field('852');
266 # If not, we should NOT add this item to the export
269 add_bib_holdings($bib, $r);
274 $r->delete_field( $r->field('901') );
278 a => $bib->tcn_value,
279 b => $bib->tcn_source,
285 my $recordstr = undef;
287 if ($format eq 'XML') {
288 my $xml = $r->as_xml_record;
289 $xml =~ s/^<\?.+?\?>$//mo;
291 } elsif ($format eq 'UNIMARC') {
292 $recordstr = $r->as_usmarc;
293 } elsif ($format eq 'USMARC') {
294 $recordstr = $r->as_usmarc;
297 if($format eq 'UNIMARC' or $format eq 'USMARC') {
298 my $rec = MARC::File::USMARC->decode($recordstr);
299 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
300 } elsif($format eq 'XML') {
301 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
302 #my $tmp = $rec->as_xml_record;
303 #$tmp =~ s/^<\?.+?\?>$//mo;
304 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
306 } or throw Error::Simple("Failed to parse MARC record back: $!");
307 print $outfh $recordstr;
316 warn "\nERROR ON RECORD $errorid: $e\n";
317 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
320 if ($export_mfhd and $type eq 'biblio') {
321 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
322 foreach my $mfhd (@$mfhds) {
324 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
327 $r->delete_field( $r->field('901') );
331 a => $bib->tcn_value,
332 b => $bib->tcn_source,
338 if ($format eq 'XML') {
339 my $xml = $r->as_xml_record;
340 $xml =~ s/^<\?.+?\?>$//mo;
342 } elsif ($format eq 'UNIMARC') {
343 print $outfh $r->as_usmarc;
344 } elsif ($format eq 'USMARC') {
345 print $outfh $r->as_usmarc;
349 my $errorid = chomp($id);
351 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
352 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
357 stats() if (!$quiet && ! ($count{bib} % 50 ));
364 $speed = $count{did} / (time - $start);
366 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
367 my $cn_speed = $count{cn} / (time - $start);
368 my $cp_speed = $count{cp} / (time - $start);
370 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
371 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
377 $count{did_last} = $count{did};
378 $count{time_last} = time;
381 sub get_bib_locations {
382 print STDERR "Retrieving Org Units ... ";
383 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
385 while (my $o = $r->recv) {
386 die $r->failed->stringify if ($r->failed);
394 print STDERR "Retrieving Copy statuses ... ";
395 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
397 while (my $sta = $r->recv) {
398 die $r->failed->stringify if ($r->failed);
399 $sta = $sta->content;
401 $statuses{$sta->id} = $sta;
406 print STDERR "Retrieving OU types ... ";
407 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
409 while (my $outy = $r->recv) {
410 die $r->failed->stringify if ($r->failed);
411 $outy = $outy->content;
413 $outypes{$outy->id} = $outy;
418 print STDERR "Retrieving Shelving locations ... ";
419 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
421 while (my $s = $r->recv) {
422 die $r->failed->stringify if ($r->failed);
425 $shelves{$s->id} = $s;
430 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
433 sub add_bib_holdings {
437 my $cn_list = $bib->call_numbers;
438 if ($cn_list && @$cn_list) {
440 $count{cn} += @$cn_list;
442 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
443 if ($cp_list && @$cp_list) {
446 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
448 CALLNUMMAP: for my $cn ( @$cn_list ) {
449 my $cn_map_list = $cn_map{$cn->id};
451 # Ignore deleted copies
452 next CALLNUMMAP if ( $cn->deleted eq 't' );
454 COPYMAP: for my $cp ( @$cn_map_list ) {
458 my $owninglib = $cn->owning_lib;
459 my $circlib = $cp->circ_lib;
460 my $printlib = $cp->circ_lib;
462 # Ignore deleted copies
463 next COPYMAP if ( $cp->deleted eq 't');
466 my $thisorg = $orgs{$circlib};
468 if($collapse_to_depth){
469 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
470 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
471 if( $localcfg->{'DontCollapse'} ){
474 if($thisorg->parent_ou){
475 $thisorg = $orgs{$thisorg->parent_ou};
476 $printlib = $thisorg->id;
481 $thisorg = $orgs{$circlib};
485 # load the local config from the .ini file for exclusions
486 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
489 # if we see this setting, just skip that org
491 $cfgparam = 'ExcludeEntireOrg';
492 if( $localcfg->{$cfgparam} )
493 { skipnote($bib->id, $cfgparam); next COPYMAP; }
495 # what follows are exclusion rules
499 if($localcfg->{$cfgparam}){
500 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
501 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
502 if(grep( { $_ eq 'reference' } @flags) && ($cp->ref eq 't'))
503 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
504 if(grep( { $_ eq 'unholdable' } @flags) && ($cp->holdable eq 'f'))
505 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
506 if(grep( { $_ eq 'circulate' } @flags) && ($cp->circulate eq 'f'))
507 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
508 if(grep( { $_ eq 'hidden' } @flags) && ($cp->opac_visible eq 'f'))
509 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
512 # Excluded Circ Modifiers
513 $cfgparam = 'CircMods';
514 if($localcfg->{$cfgparam}){
515 my $circmod = $cp->circ_modifier || "";
516 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
517 if(grep( { $_ eq $circmod } @circmods) && @circmods)
518 { skipnote($bib->id,$cfgparam); next COPYMAP; }
520 # Inverse rule -- only include specified Circ Mods
521 $cfgparam = 'OnlyIncludeCircMods';
522 if($localcfg->{$cfgparam}){
523 my $circmod = $cp->circ_modifier || "";
524 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
525 unless(grep( { $_ and $_ eq $circmod } @circmods) && @circmods)
526 { skipnote($bib->id,$cfgparam); next COPYMAP; }
528 # Excluded Copy Statuses
529 $cfgparam = 'Statuses';
530 if($localcfg->{$cfgparam}){
531 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
532 if(grep( { $_ eq $statuses{$cp->status}->name } @statuses) && @statuses)
533 { skipnote($bib->id,$cfgparam); next COPYMAP; }
536 $cfgparam = 'Locations';
537 if($localcfg->{$cfgparam}){
538 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
539 if(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations)
540 { skipnote($bib->id,$cfgparam); next COPYMAP; }
542 # Inverse rule - Only use the specified locations
543 $cfgparam = 'OnlyIncludeLocations';
544 if($localcfg->{$cfgparam}){
545 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
546 unless(grep( { $_ eq $shelves{$cp->location}->name } @locations) && @locations)
547 { skipnote($bib->id,$cfgparam); next COPYMAP; }
549 # exclude based on a regex match to location names
550 $cfgparam = 'LocationRegex';
551 if($localcfg->{$cfgparam}){
552 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
553 my $reg = $localcfg->{$cfgparam};
554 if(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex)
555 { skipnote($bib->id,$cfgparam); next COPYMAP; }
557 # include based on a regex match to location names
558 $cfgparam = 'OnlyIncludeLocationRegex';
559 if($localcfg->{$cfgparam}){
560 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
561 my $reg = $localcfg->{$cfgparam};
562 unless(grep( { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex) && @locregex)
563 { skipnote($bib->id,$cfgparam); next COPYMAP; }
565 # Exclude based on a callno regex
566 $cfgparam = 'CallNoRegex';
567 if($localcfg->{$cfgparam}){
568 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
569 my $reg = $localcfg->{$cfgparam};
570 if(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex)
571 { skipnote($bib->id,$cfgparam); next COPYMAP; }
573 # Include based on a callno regex
574 $cfgparam = 'OnlyIncludeCallNoRegex';
575 if($localcfg->{$cfgparam}){
576 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
577 my $reg = $localcfg->{$cfgparam};
578 unless(grep( { $cn->label =~ m/($reg)/ } @callnoregex) && @callnoregex)
579 { skipnote($bib->id,$cfgparam); next COPYMAP; }
582 # Trim call number to a float and exclude based on Dewey Range
583 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
584 my $gt = $localcfg->{'DeweyGT'};
585 my $lt = $localcfg->{'DeweyLT'};
587 # FIXME if either config has an array just ditch for now
588 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
589 { skipnote($bib->id,""); next COPYMAP; }
590 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
591 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
593 my $callno = $cn->label;
594 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
595 print STDERR $callno;
596 #note that we are making big assumptions about the call numbers in the db
598 # we have a range, exclude what's inbetween
600 if($callno > $gt and $callno < $lt)
601 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
602 # we only have a top threshold, exclude everything below it
605 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
606 # we only have a bottom threshold, exclude everything above it
609 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
613 if($thisorg->parent_ou){
614 $thisorg = $orgs{$thisorg->parent_ou}
626 b => $orgs{$printlib}->shortname,
627 #b => $orgs{$owninglib}->shortname,
628 #b => $orgs{$circlib}->shortname,
629 c => $shelves{$cp->location}->name,
631 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
633 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
634 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
635 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
636 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
637 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
638 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
639 z => $statuses{$cp->status}->name,
645 stats() if (!$quiet && ! ($count{cp} % 100 ));
646 } # COPYMAP: for my $cp ( @$cn_map_list )
647 } # for my $cn ( @$cn_list )
648 } # if ($cp_list && @$cp_list)
649 } # if ($cn_list && @$cn_list)
656 $outf = *STDOUT if($output_file) ;
657 printf($outf "Skipped %s due to config: %s\n",$id,$note) unless $quiet;