8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29 my ($exclusion_ini,$collapse_to_depth, $output_file);
36 'items' => \$holdings,
37 'mfhd' => \$export_mfhd,
38 'all' => \$all_records,
39 'location=s' => \$location,
40 'money=s' => \$dollarsign,
41 'config=s' => \$config,
42 'format=s' => \$format,
45 'encoding=s' => \$encoding,
46 'timeout=i' => \$timeout,
47 'force901' => \$force901,
48 'exclusion_ini=s' => \$exclusion_ini,
49 'collapse_to_depth=i' => \$collapse_to_depth,
50 'onlyholdings' => \$onlyholdings,
51 'output-file=s' => \$output_file,
55 die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
56 $cfg = new Config::Simple($exclusion_ini)
61 This script exports MARC authority, bibliographic, and serial holdings
62 records from an Evergreen database.
64 Input to this script can consist of a list of record IDs, with one record ID
65 per line, corresponding to the record ID in the Evergreen database table of
66 your requested record type.
68 Alternately, passing the --all option will attempt to export all records of
69 the specified type from the Evergreen database. The --all option starts at
70 record ID 1 and increments the ID by 1 until the largest ID in the database
71 is retrieved. This may not be very efficient for databases with large gaps
72 in their ID sequences.
75 --help or -h This screen.
76 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
77 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
78 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
79 --xml-idl or -x Location of the IDL XML
80 --timeout Timeout for exporting a single record; increase if you
81 are using --holdings and are exporting records that
82 have a lot of items attached to them.
83 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
84 --all or -a Export all records; ignores input list
86 Additional options for type = 'BIBLIO':
87 --items or -i Include items (holdings) in the output
88 --money Currency symbol to use in item price field [\$]
89 --mfhd Export serial MFHD records for associated bib records
90 Not compatible with --format=BRE
91 --location or -l MARC Location Code for holdings from
92 http://www.loc.gov/marc/organizations/orgshome.html
94 Options added by Sitka:
95 --force901 Force-add 901 fields
96 --exclusion_ini FILENAME Config::Simple based INI file for excluding holdings from the export
97 --collapse_to_depth 2 Depth to collapse holdings. Any holdings at a depth below
98 will be collapsed up to the parent org unit at the set depth
99 --onlyholdings Clean out 852s before adding new ones, and only export items that
100 successfully recieved an 852 field
104 To export a set of USMARC records in a file named "output_file" based on the
105 IDs contained in a file named "list_of_ids":
106 cat list_of_ids | $0 > output_file
108 To export a set of MARC21XML authority records in a file named "output.xml"
109 for all authority records in the database:
110 $0 --format XML --type AUTHORITY --all > output.xml
117 $format = uc($format);
118 $encoding = uc($encoding);
122 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
124 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
126 $outfh = $real_stdout;
129 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
130 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
132 if (!grep { $format eq $_ } @formats) {
133 die "Please select a supported format. ".
134 "Right now that means one of [".
135 join('|',@formats). "]\n";
138 if ($format ne 'XML') {
139 my $type = 'MARC::File::' . $format;
144 # set default timeout and/or correct silly user who
145 # supplied a negative timeout; default timeout of
146 # 300 seconds if exporting items determined empirically.
147 $timeout = $holdings ? 300 : 1;
150 OpenSRF::System->bootstrap_client( config_file => $config );
153 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
156 Fieldmapper->import(IDL => $idl);
158 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
159 OpenILS::Utils::CStoreEditor::init();
160 my $editor = OpenILS::Utils::CStoreEditor->new();
162 print $outfh <<HEADER if ($format eq 'XML');
163 <?xml version="1.0" encoding="$encoding"?>
164 <collection xmlns='http://www.loc.gov/MARC21/slim'>
179 my $last_time = time;
180 my %count = ('bib' => 0, 'did' => 0);
185 if ($type eq 'biblio') {
186 $top_record = $editor->search_biblio_record_entry([
188 {order_by => { 'bre' => 'id DESC' }, limit => 1}
190 } elsif ($type eq 'authority') {
191 $top_record = $editor->search_authority_record_entry([
193 {order_by => { 'are' => 'id DESC' }, limit => 1}
196 for (my $i = 0; $i++ < $top_record;) {
200 while ( my $i = <> ) {
205 print $outfh "</collection>\n" if ($format eq 'XML');
207 $speed = $count{did} / (time - $start);
208 my $time = time - $start;
211 Exports Attempted : $count{bib}
212 Exports Completed : $count{did}
213 Overall Speed : $speed
214 Total Time Elapsed: $time seconds
223 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
224 my $s = $r->recv(timeout => $timeout);
226 warn "\n!!!!! Failed trying to read record $id\n";
230 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
234 warn "\n!!!!!! Timed out trying to read record $id\n";
242 # Return if the bib is deleted
243 return if $bib->deleted;
245 if ($format eq 'ARE' or $format eq 'BRE') {
246 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
254 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
255 if ($type eq 'biblio') {
258 # Remove old 852 fields
259 my @f = $r->field('852');
260 $r->delete_fields(@f) if @f;
262 add_bib_holdings($bib, $r);
263 # Check that at least one 852 was added
264 @f = $r->field('852');
265 # If not, we should NOT add this item to the export
268 add_bib_holdings($bib, $r);
273 $r->delete_field( $r->field('901') );
277 a => $bib->tcn_value,
278 b => $bib->tcn_source,
284 my $recordstr = undef;
286 if ($format eq 'XML') {
287 my $xml = $r->as_xml_record;
288 $xml =~ s/^<\?.+?\?>$//mo;
290 } elsif ($format eq 'UNIMARC') {
291 $recordstr = $r->as_usmarc;
292 } elsif ($format eq 'USMARC') {
293 $recordstr = $r->as_usmarc;
296 if($format eq 'UNIMARC' or $format eq 'USMARC') {
297 my $rec = MARC::File::USMARC->decode($recordstr);
298 #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
299 } elsif($format eq 'XML') {
300 my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
301 #my $tmp = $rec->as_xml_record;
302 #$tmp =~ s/^<\?.+?\?>$//mo;
303 #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
305 } or throw Error::Simple("Failed to parse MARC record back: $!");
306 print $outfh $recordstr;
315 warn "\nERROR ON RECORD $errorid: $e\n";
316 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
319 if ($export_mfhd and $type eq 'biblio') {
320 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
321 foreach my $mfhd (@$mfhds) {
323 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
326 $r->delete_field( $r->field('901') );
330 a => $bib->tcn_value,
331 b => $bib->tcn_source,
337 if ($format eq 'XML') {
338 my $xml = $r->as_xml_record;
339 $xml =~ s/^<\?.+?\?>$//mo;
341 } elsif ($format eq 'UNIMARC') {
342 print $outfh $r->as_usmarc;
343 } elsif ($format eq 'USMARC') {
344 print $outfh $r->as_usmarc;
348 my $errorid = chomp($id);
350 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
351 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
356 stats() if (! ($count{bib} % 50 ));
363 $speed = $count{did} / (time - $start);
365 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
366 my $cn_speed = $count{cn} / (time - $start);
367 my $cp_speed = $count{cp} / (time - $start);
369 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
370 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
376 $count{did_last} = $count{did};
377 $count{time_last} = time;
380 sub get_bib_locations {
381 print STDERR "Retrieving Org Units ... ";
382 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
384 while (my $o = $r->recv) {
385 die $r->failed->stringify if ($r->failed);
393 print STDERR "Retrieving Copy statuses ... ";
394 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
396 while (my $sta = $r->recv) {
397 die $r->failed->stringify if ($r->failed);
398 $sta = $sta->content;
400 $statuses{$sta->id} = $sta;
405 print STDERR "Retrieving OU types ... ";
406 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
408 while (my $outy = $r->recv) {
409 die $r->failed->stringify if ($r->failed);
410 $outy = $outy->content;
412 $outypes{$outy->id} = $outy;
417 print STDERR "Retrieving Shelving locations ... ";
418 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
420 while (my $s = $r->recv) {
421 die $r->failed->stringify if ($r->failed);
424 $shelves{$s->id} = $s;
429 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
432 sub add_bib_holdings {
436 my $cn_list = $bib->call_numbers;
437 if ($cn_list && @$cn_list) {
439 $count{cn} += @$cn_list;
441 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
442 if ($cp_list && @$cp_list) {
445 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
447 CALLNUMMAP: for my $cn ( @$cn_list ) {
448 my $cn_map_list = $cn_map{$cn->id};
450 # Ignore deleted copies
451 next CALLNUMMAP if $cn->deleted;
453 COPYMAP: for my $cp ( @$cn_map_list ) {
457 my $owninglib = $cn->owning_lib;
458 my $circlib = $cp->circ_lib;
459 my $printlib = $cp->circ_lib;
461 # Ignore deleted copies
462 next COPYMAP if $cp->deleted;
465 my $thisorg = $orgs{$circlib};
467 if($collapse_to_depth){
468 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
469 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
470 if( $localcfg->{'DontCollapse'} ){
473 if($thisorg->parent_ou){
474 $thisorg = $orgs{$thisorg->parent_ou};
475 $printlib = $thisorg->id;
480 $thisorg = $orgs{$circlib};
484 # load the local config from the .ini file for exclusions
485 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
488 # if we see this setting, just skip that org
490 $cfgparam = 'ExcludeEntireOrg';
491 if( $localcfg->{$cfgparam} )
492 { skipnote($bib->id, $cfgparam); next COPYMAP; }
494 # what follows are exclusion rules
498 if($localcfg->{$cfgparam}){
499 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
500 my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
501 if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't')
502 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
503 elsif( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f')
504 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
505 elsif( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f')
506 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
507 elsif( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f')
508 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
511 # Excluded Circ Modifiers
512 $cfgparam = 'CircMods';
513 if($localcfg->{$cfgparam}){
514 my $circmod = $cp->circ_modifier || "";
515 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
516 if( grep { $_ eq $circmod } @circmods && @circmods)
517 { skipnote($bib->id,$cfgparam); next COPYMAP; }
519 # Inverse rule -- only include specified Circ Mods
520 $cfgparam = 'OnlyIncludeCircMods';
521 if($localcfg->{$cfgparam}){
522 my $circmod = $cp->circ_modifier || "";
523 my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
524 unless( grep { $_ and $_ eq $circmod } @circmods && @circmods)
525 { skipnote($bib->id,$cfgparam); next COPYMAP; }
527 # Excluded Copy Statuses
528 $cfgparam = 'Statuses';
529 if($localcfg->{$cfgparam}){
530 my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
531 if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses)
532 { skipnote($bib->id,$cfgparam); next COPYMAP; }
535 $cfgparam = 'Locations';
536 if($localcfg->{$cfgparam}){
537 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
538 if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
539 { skipnote($bib->id,$cfgparam); next COPYMAP; }
541 # Inverse rule - Only use the specified locations
542 $cfgparam = 'OnlyIncludeLocations';
543 if($localcfg->{$cfgparam}){
544 my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
545 unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
546 { skipnote($bib->id,$cfgparam); next COPYMAP; }
548 # exclude based on a regex match to location names
549 $cfgparam = 'LocationRegex';
550 if($localcfg->{$cfgparam}){
551 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
552 my $reg = $localcfg->{$cfgparam};
553 if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
554 { skipnote($bib->id,$cfgparam); next COPYMAP; }
556 # include based on a regex match to location names
557 $cfgparam = 'OnlyIncludeLocationRegex';
558 if($localcfg->{$cfgparam}){
559 my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
560 my $reg = $localcfg->{$cfgparam};
561 unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
562 { skipnote($bib->id,$cfgparam); next COPYMAP; }
564 # Exclude based on a callno regex
565 $cfgparam = 'CallNoRegex';
566 if($localcfg->{$cfgparam}){
567 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
568 my $reg = $localcfg->{$cfgparam};
569 if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
570 { skipnote($bib->id,$cfgparam); next COPYMAP; }
572 # Include based on a callno regex
573 $cfgparam = 'OnlyIncludeCallNoRegex';
574 if($localcfg->{$cfgparam}){
575 my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
576 my $reg = $localcfg->{$cfgparam};
577 unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
578 { skipnote($bib->id,$cfgparam); next COPYMAP; }
581 # Trim call number to a float and exclude based on Dewey Range
582 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
583 my $gt = $localcfg->{'DeweyGT'};
584 my $lt = $localcfg->{'DeweyLT'};
586 # FIXME if either config has an array just ditch for now
587 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
588 { skipnote($bib->id,""); next COPYMAP; }
589 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
590 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
592 my $callno = $cn->label;
593 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
594 print STDERR $callno;
595 #note that we are making big assumptions about the call numbers in the db
597 # we have a range, exclude what's inbetween
599 if($callno > $gt and $callno < $lt)
600 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
601 # we only have a top threshold, exclude everything below it
604 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
605 # we only have a bottom threshold, exclude everything above it
608 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
612 if($thisorg->parent_ou){
613 $thisorg = $orgs{$thisorg->parent_ou}
625 b => $orgs{$printlib}->shortname,
626 #b => $orgs{$owninglib}->shortname,
627 #b => $orgs{$circlib}->shortname,
628 c => $shelves{$cp->location}->name,
630 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
632 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
633 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
634 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
635 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
636 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
637 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
638 z => $statuses{$cp->status}->name,
644 stats() if (! ($count{cp} % 100 ));
645 } # COPYMAP: for my $cp ( @$cn_map_list )
646 } # for my $cn ( @$cn_list )
647 } # if ($cp_list && @$cp_list)
648 } # if ($cn_list && @$cn_list)
655 $outf = *STDOUT if($output_file) ;
656 printf($outf "Skipped %s due to config: %s\n",$id,$note);