8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29 my ($exclusion_ini,$collapse_to_depth, $output_file);
36 'items' => \$holdings,
37 'mfhd' => \$export_mfhd,
38 'all' => \$all_records,
39 'location=s' => \$location,
40 'money=s' => \$dollarsign,
41 'config=s' => \$config,
42 'format=s' => \$format,
45 'encoding=s' => \$encoding,
46 'timeout=i' => \$timeout,
47 'force901' => \$force901,
48 'exclusion_ini=s' => \$exclusion_ini,
49 'collapse_to_depth=i' => \$collapse_to_depth,
50 'onlyholdings' => \$onlyholdings,
51 'output-file=s' => \$output_file,
55 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
56 $cfg = new Config::Simple($exclusion_ini)
61 This script exports MARC authority, bibliographic, and serial holdings
62 records from an Evergreen database.
64 Input to this script can consist of a list of record IDs, with one record ID
65 per line, corresponding to the record ID in the Evergreen database table of
66 your requested record type.
68 Alternately, passing the --all option will attempt to export all records of
69 the specified type from the Evergreen database. The --all option starts at
70 record ID 1 and increments the ID by 1 until the largest ID in the database
71 is retrieved. This may not be very efficient for databases with large gaps
72 in their ID sequences.
75 --help or -h This screen.
76 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
77 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
78 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
79 --xml-idl or -x Location of the IDL XML
80 --timeout Timeout for exporting a single record; increase if you
81 are using --holdings and are exporting records that
82 have a lot of items attached to them.
83 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
84 --all or -a Export all records; ignores input list
86 Additional options for type = 'BIBLIO':
87 --items or -i Include items (holdings) in the output
88 --money Currency symbol to use in item price field [\$]
89 --mfhd Export serial MFHD records for associated bib records
90 Not compatible with --format=BRE
91 --location or -l MARC Location Code for holdings from
92 http://www.loc.gov/marc/organizations/orgshome.html
94 Options added by Sitka:
95 --force901 Force-add 901 fields
96 --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export
97 --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below
98 will be collapsed up to the parent org unit at the set depth
99 --onlyholdings Clean out 852s before adding new ones, and only export items that
100 successfully recieved an 852 field
104 To export a set of USMARC records in a file named "output_file" based on the
105 IDs contained in a file named "list_of_ids":
106 cat list_of_ids | $0 > output_file
108 To export a set of MARC21XML authority records in a file named "output.xml"
109 for all authority records in the database:
110 $0 --format XML --type AUTHORITY --all > output.xml
117 $format = uc($format);
118 $encoding = uc($encoding);
122 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
124 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
126 $outfh = $real_stdout;
129 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
130 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
132 if (!grep { $format eq $_ } @formats) {
133 die "Please select a supported format. ".
134 "Right now that means one of [".
135 join('|',@formats). "]\n";
138 if ($format ne 'XML') {
139 my $type = 'MARC::File::' . $format;
144 # set default timeout and/or correct silly user who
145 # supplied a negative timeout; default timeout of
146 # 300 seconds if exporting items determined empirically.
147 $timeout = $holdings ? 300 : 1;
150 OpenSRF::System->bootstrap_client( config_file => $config );
153 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
156 Fieldmapper->import(IDL => $idl);
158 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
159 OpenILS::Utils::CStoreEditor::init();
160 my $editor = OpenILS::Utils::CStoreEditor->new();
162 print $outfh <<HEADER if ($format eq 'XML');
163 <?xml version="1.0" encoding="$encoding"?>
164 <collection xmlns='http://www.loc.gov/MARC21/slim'>
179 my $last_time = time;
180 my %count = ('bib' => 0, 'did' => 0);
185 if ($type eq 'biblio') {
186 $top_record = $editor->search_biblio_record_entry([
188 {order_by => { 'bre' => 'id DESC' }, limit => 1}
190 } elsif ($type eq 'authority') {
191 $top_record = $editor->search_authority_record_entry([
193 {order_by => { 'are' => 'id DESC' }, limit => 1}
196 for (my $i = 0; $i++ < $top_record;) {
200 while ( my $i = <> ) {
205 print $outfh "</collection>\n" if ($format eq 'XML');
207 $speed = $count{did} / (time - $start);
208 my $time = time - $start;
211 Exports Attempted : $count{bib}
212 Exports Completed : $count{did}
213 Overall Speed : $speed
214 Total Time Elapsed: $time seconds
223 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
224 my $s = $r->recv(timeout => $timeout);
226 warn "\n!!!!! Failed trying to read record $id\n";
230 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
234 warn "\n!!!!!! Timed out trying to read record $id\n";
243 if ($format eq 'ARE' or $format eq 'BRE') {
244 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
252 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
253 if ($type eq 'biblio') {
256 # Remove old 852 fields
257 my @f = $r->field('852');
258 $r->delete_fields(@f) if @f;
260 add_bib_holdings($bib, $r);
261 # Check that at least one 852 was added
262 @f = $r->field('852');
263 # If not, we should NOT add this item to the export
266 add_bib_holdings($bib, $r);
271 $r->delete_field( $r->field('901') );
275 a => $bib->tcn_value,
276 b => $bib->tcn_source,
282 my $recordstr = undef;
284 if ($format eq 'XML') {
285 my $xml = $r->as_xml_record;
286 $xml =~ s/^<\?.+?\?>$//mo;
288 } elsif ($format eq 'UNIMARC') {
289 $recordstr = $r->as_usmarc;
290 } elsif ($format eq 'USMARC') {
291 $recordstr = $r->as_usmarc;
294 if($format eq 'UNIMARC' or $format eq 'USMARC') {
295 my $rec = MARC::File::USMARC->decode($recordstr);
296 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
297 } elsif($format eq 'XML') {
298 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
299 #my $tmp = $rec->as_xml_record;
300 #$tmp =~ s/^<\?.+?\?>$//mo;
301 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
303 } or throw Error::Simple("Failed to parse MARC record back: $!");
304 print $outfh $recordstr;
313 warn "\nERROR ON RECORD $errorid: $e\n";
314 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
317 if ($export_mfhd and $type eq 'biblio') {
318 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
319 foreach my $mfhd (@$mfhds) {
321 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
324 $r->delete_field( $r->field('901') );
328 a => $bib->tcn_value,
329 b => $bib->tcn_source,
335 if ($format eq 'XML') {
336 my $xml = $r->as_xml_record;
337 $xml =~ s/^<\?.+?\?>$//mo;
339 } elsif ($format eq 'UNIMARC') {
340 print $outfh $r->as_usmarc;
341 } elsif ($format eq 'USMARC') {
342 print $outfh $r->as_usmarc;
346 my $errorid = chomp($id);
348 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
349 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
354 stats() if (! ($count{bib} % 50 ));
361 $speed = $count{did} / (time - $start);
363 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
364 my $cn_speed = $count{cn} / (time - $start);
365 my $cp_speed = $count{cp} / (time - $start);
367 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
368 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
374 $count{did_last} = $count{did};
375 $count{time_last} = time;
378 sub get_bib_locations {
379 print STDERR "Retrieving Org Units ... ";
380 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
382 while (my $o = $r->recv) {
383 die $r->failed->stringify if ($r->failed);
391 print STDERR "Retrieving Copy statuses ... ";
392 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
394 while (my $sta = $r->recv) {
395 die $r->failed->stringify if ($r->failed);
396 $sta = $sta->content;
398 $statuses{$sta->id} = $sta;
403 print STDERR "Retrieving OU types ... ";
404 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
406 while (my $outy = $r->recv) {
407 die $r->failed->stringify if ($r->failed);
408 $outy = $outy->content;
410 $outypes{$outy->id} = $outy;
415 print STDERR "Retrieving Shelving locations ... ";
416 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
418 while (my $s = $r->recv) {
419 die $r->failed->stringify if ($r->failed);
422 $shelves{$s->id} = $s;
427 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
430 sub add_bib_holdings {
434 my $cn_list = $bib->call_numbers;
435 if ($cn_list && @$cn_list) {
437 $count{cn} += @$cn_list;
439 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
440 if ($cp_list && @$cp_list) {
443 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
445 for my $cn ( @$cn_list ) {
446 my $cn_map_list = $cn_map{$cn->id};
448 COPYMAP: for my $cp ( @$cn_map_list ) {
452 my $owninglib = $cn->owning_lib;
453 my $circlib = $cp->circ_lib;
454 my $printlib = $cp->circ_lib;
456 # Ignore deleted copies
457 next COPYMAP if $cp->deleted;
460 my $thisorg = $orgs{$circlib};
462 if($collapse_to_depth){
463 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
464 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
465 if( $localcfg->{'DontCollapse'} ){
468 if($thisorg->parent_ou){
469 $thisorg = $orgs{$thisorg->parent_ou};
470 $printlib = $thisorg->id;
475 $thisorg = $orgs{$circlib};
479 # load the local config from the .ini file for exclusions
480 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
483 # if we see this setting, just skip that org
485 $cfgparam = 'ExcludeEntireOrg';
486 if( $localcfg->{$cfgparam} )
487 { skipnote($bib->id, $cfgparam); next COPYMAP; }
489 # what follows are exclusion rules
493 if($localcfg->{$cfgparam}){
494 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
495 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
496 if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't')
497 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
498 elsif( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f')
499 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
500 elsif( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f')
501 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
502 elsif( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f')
503 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
506 # Excluded Circ Modifiers
507 $cfgparam = 'CircMods';
508 if($localcfg->{$cfgparam}){
509 my $circmod = $cp->circ_modifier || "";
510 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
511 if( grep { $_ eq $circmod } @circmods && @circmods)
512 { skipnote($bib->id,$cfgparam); next COPYMAP; }
514 # Inverse rule -- only include specified Circ Mods
515 $cfgparam = 'OnlyIncludeCircMods';
516 if($localcfg->{$cfgparam}){
517 my $circmod = $cp->circ_modifier || "";
518 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
519 unless( grep { $_ and $_ eq $circmod } @circmods && @circmods)
520 { skipnote($bib->id,$cfgparam); next COPYMAP; }
522 # Excluded Copy Statuses
523 $cfgparam = 'Statuses';
524 if($localcfg->{$cfgparam}){
525 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
526 if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses)
527 { skipnote($bib->id,$cfgparam); next COPYMAP; }
530 $cfgparam = 'Locations';
531 if($localcfg->{$cfgparam}){
532 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
533 if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
534 { skipnote($bib->id,$cfgparam); next COPYMAP; }
536 # Inverse rule - Only use the specified locations
537 $cfgparam = 'OnlyIncludeLocations';
538 if($localcfg->{$cfgparam}){
539 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
540 unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
541 { skipnote($bib->id,$cfgparam); next COPYMAP; }
543 # exclude based on a regex match to location names
544 $cfgparam = 'LocationRegex';
545 if($localcfg->{$cfgparam}){
546 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
547 my $reg = $localcfg->{$cfgparam};
548 if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
549 { skipnote($bib->id,$cfgparam); next COPYMAP; }
551 # include based on a regex match to location names
552 $cfgparam = 'OnlyIncludeLocationRegex';
553 if($localcfg->{$cfgparam}){
554 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
555 my $reg = $localcfg->{$cfgparam};
556 unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
557 { skipnote($bib->id,$cfgparam); next COPYMAP; }
559 # Exclude based on a callno regex
560 $cfgparam = 'CallNoRegex';
561 if($localcfg->{$cfgparam}){
562 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
563 my $reg = $localcfg->{$cfgparam};
564 if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
565 { skipnote($bib->id,$cfgparam); next COPYMAP; }
567 # Include based on a callno regex
568 $cfgparam = 'OnlyIncludeCallNoRegex';
569 if($localcfg->{$cfgparam}){
570 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
571 my $reg = $localcfg->{$cfgparam};
572 unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
573 { skipnote($bib->id,$cfgparam); next COPYMAP; }
576 # Trim call number to a float and exclude based on Dewey Range
577 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
578 my $gt = $localcfg->{'DeweyGT'};
579 my $lt = $localcfg->{'DeweyLT'};
581 # FIXME if either config has an array just ditch for now
582 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
583 { skipnote($bib->id,""); next COPYMAP; }
584 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
585 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
587 my $callno = $cn->label;
588 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
589 print STDERR $callno;
590 #note that we are making big assumptions about the call numbers in the db
592 # we have a range, exclude what's inbetween
594 if($callno > $gt and $callno < $lt)
595 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
596 # we only have a top threshold, exclude everything below it
599 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
600 # we only have a bottom threshold, exclude everything above it
603 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
607 if($thisorg->parent_ou){
608 $thisorg = $orgs{$thisorg->parent_ou}
620 b => $orgs{$printlib}->shortname,
621 #b => $orgs{$owninglib}->shortname,
622 #b => $orgs{$circlib}->shortname,
623 c => $shelves{$cp->location}->name,
625 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
627 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
628 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
629 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
630 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
631 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
632 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
633 z => $statuses{$cp->status}->name,
639 stats() if (! ($count{cp} % 100 ));
640 } # COPYMAP: for my $cp ( @$cn_map_list )
641 } # for my $cn ( @$cn_list )
642 } # if ($cp_list && @$cp_list)
643 } # if ($cn_list && @$cn_list)
650 $outf = *STDOUT if($output_file) ;
651 printf($outf "Skipped %s due to config: %s\n",$id,$note);