8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29 my ($exclusion_ini,$collapse_to_depth, $output_file);
35 'items' => \$holdings,
36 'mfhd' => \$export_mfhd,
37 'all' => \$all_records,
38 'location=s' => \$location,
39 'money=s' => \$dollarsign,
40 'config=s' => \$config,
41 'format=s' => \$format,
44 'encoding=s' => \$encoding,
45 'timeout=i' => \$timeout,
46 'force901' => \$force901,
47 'exclusion_ini=s' => \$exclusion_ini,
48 'collapse_to_depth=i' => \$collapse_to_depth,
49 'output-file=s' => \$output_file,
52 $cfg = new Config::Simple($exclusion_ini) if ($exclusion_ini);
56 This script exports MARC authority, bibliographic, and serial holdings
57 records from an Evergreen database.
59 Input to this script can consist of a list of record IDs, with one record ID
60 per line, corresponding to the record ID in the Evergreen database table of
61 your requested record type.
63 Alternately, passing the --all option will attempt to export all records of
64 the specified type from the Evergreen database. The --all option starts at
65 record ID 1 and increments the ID by 1 until the largest ID in the database
66 is retrieved. This may not be very efficient for databases with large gaps
67 in their ID sequences.
70 --help or -h This screen.
71 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
72 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
73 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
74 --xml-idl or -x Location of the IDL XML
75 --timeout Timeout for exporting a single record; increase if you
76 are using --holdings and are exporting records that
77 have a lot of items attached to them.
78 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
79 --all or -a Export all records; ignores input list
81 Additional options for type = 'BIBLIO':
82 --items or -i Include items (holdings) in the output
83 --money Currency symbol to use in item price field [\$]
84 --mfhd Export serial MFHD records for associated bib records
85 Not compatible with --format=BRE
86 --location or -l MARC Location Code for holdings from
87 http://www.loc.gov/marc/organizations/orgshome.html
91 To export a set of USMARC records in a file named "output_file" based on the
92 IDs contained in a file named "list_of_ids":
93 cat list_of_ids | $0 > output_file
95 To export a set of MARC21XML authority records in a file named "output.xml"
96 for all authority records in the database:
97 $0 --format XML --type AUTHORITY --all > output.xml
104 $format = uc($format);
105 $encoding = uc($encoding);
109 open($real_stdout, ">&STDOUT") or die "Can't dup STDOUT: $!";
111 open($outfh, '>', $output_file) or die "Can't open file for output $output_file: $!";
113 $outfh = $real_stdout;
116 binmode($outfh, ':raw') if ($encoding ne 'UTF-8');
117 binmode($outfh, ':utf8') if ($encoding eq 'UTF-8');
119 if (!grep { $format eq $_ } @formats) {
120 die "Please select a supported format. ".
121 "Right now that means one of [".
122 join('|',@formats). "]\n";
125 if ($format ne 'XML') {
126 my $type = 'MARC::File::' . $format;
131 # set default timeout and/or correct silly user who
132 # supplied a negative timeout; default timeout of
133 # 300 seconds if exporting items determined empirically.
134 $timeout = $holdings ? 300 : 1;
137 OpenSRF::System->bootstrap_client( config_file => $config );
140 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
143 Fieldmapper->import(IDL => $idl);
145 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
146 OpenILS::Utils::CStoreEditor::init();
147 my $editor = OpenILS::Utils::CStoreEditor->new();
149 print $outfh <<HEADER if ($format eq 'XML');
150 <?xml version="1.0" encoding="$encoding"?>
151 <collection xmlns='http://www.loc.gov/MARC21/slim'>
166 my $last_time = time;
167 my %count = ('bib' => 0, 'did' => 0);
172 if ($type eq 'biblio') {
173 $top_record = $editor->search_biblio_record_entry([
175 {order_by => { 'bre' => 'id DESC' }, limit => 1}
177 } elsif ($type eq 'authority') {
178 $top_record = $editor->search_authority_record_entry([
180 {order_by => { 'are' => 'id DESC' }, limit => 1}
183 for (my $i = 0; $i++ < $top_record;) {
187 while ( my $i = <> ) {
192 print $outfh "</collection>\n" if ($format eq 'XML');
194 $speed = $count{did} / (time - $start);
195 my $time = time - $start;
198 Exports Attempted : $count{bib}
199 Exports Completed : $count{did}
200 Overall Speed : $speed
201 Total Time Elapsed: $time seconds
210 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
211 my $s = $r->recv(timeout => $timeout);
213 warn "\n!!!!! Failed trying to read record $id\n";
217 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
221 warn "\n!!!!!! Timed out trying to read record $id\n";
230 if ($format eq 'ARE' or $format eq 'BRE') {
231 print $outfh OpenSRF::Utils::JSON->perl2JSON($bib);
239 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
240 if ($type eq 'biblio') {
241 add_bib_holdings($bib, $r);
244 if ($format eq 'XML') {
245 my $xml = $r->as_xml_record;
246 $xml =~ s/^<\?.+?\?>$//mo;
248 } elsif ($format eq 'UNIMARC') {
249 print $outfh $r->as_usmarc;
250 } elsif ($format eq 'USMARC') {
251 print $outfh $r->as_usmarc;
259 $errorid =~ s/\n$//g;
261 warn "\nERROR ON RECORD $errorid: $e\n";
262 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
265 if ($export_mfhd and $type eq 'biblio') {
266 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
267 foreach my $mfhd (@$mfhds) {
269 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
271 if ($format eq 'XML') {
272 my $xml = $r->as_xml_record;
273 $xml =~ s/^<\?.+?\?>$//mo;
275 } elsif ($format eq 'UNIMARC') {
276 print $outfh $r->as_usmarc;
277 } elsif ($format eq 'USMARC') {
278 print $outfh $r->as_usmarc;
282 my $errorid = chomp($id);
284 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
285 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
290 stats() if (! ($count{bib} % 50 ));
297 $speed = $count{did} / (time - $start);
299 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
300 my $cn_speed = $count{cn} / (time - $start);
301 my $cp_speed = $count{cp} / (time - $start);
303 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
304 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
310 $count{did_last} = $count{did};
311 $count{time_last} = time;
314 sub get_bib_locations {
315 print STDERR "Retrieving Org Units ... ";
316 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
318 while (my $o = $r->recv) {
319 die $r->failed->stringify if ($r->failed);
327 print STDERR "Retrieving Copy statuses ... ";
328 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
330 while (my $sta = $r->recv) {
331 die $r->failed->stringify if ($r->failed);
332 $sta = $sta->content;
334 $statuses{$sta->id} = $sta;
339 print STDERR "Retrieving OU types ... ";
340 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
342 while (my $outy = $r->recv) {
343 die $r->failed->stringify if ($r->failed);
344 $outy = $outy->content;
346 $outypes{$outy->id} = $outy;
351 print STDERR "Retrieving Shelving locations ... ";
352 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
354 while (my $s = $r->recv) {
355 die $r->failed->stringify if ($r->failed);
358 $shelves{$s->id} = $s;
363 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
366 sub add_bib_holdings {
370 my $cn_list = $bib->call_numbers;
371 if ($cn_list && @$cn_list) {
373 $count{cn} += @$cn_list;
375 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
376 if ($cp_list && @$cp_list) {
379 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
381 for my $cn ( @$cn_list ) {
382 my $cn_map_list = $cn_map{$cn->id};
384 COPYMAP: for my $cp ( @$cn_map_list ) {
388 my $owninglib = $cn->owning_lib;
389 my $circlib = $cp->circ_lib;
390 my $printlib = $cp->circ_lib;
393 my $thisorg = $orgs{$circlib};
395 if($collapse_to_depth){
396 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
397 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
398 if( $localcfg->{'DontCollapse'} ){
401 if($thisorg->parent_ou){
402 $thisorg = $orgs{$thisorg->parent_ou};
403 $printlib = $thisorg->id;
408 $thisorg = $orgs{$circlib};
412 # load the local config from the .ini file for exclusions
413 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
415 # if we see this setting, just skip that org
417 if( $localcfg->{'ExcludeEntireOrg'} )
418 { skipnote($bib->id,"ExcludeEntireOrg"); next COPYMAP; }
420 # what follows are exclusion rules
423 if($localcfg->{'Flags'}){
424 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
425 my @flags = ( (ref($localcfg->{'Flags'}) eq "ARRAY") ? @{$localcfg->{'Flags'}} : ($localcfg->{'Flags'}));
426 if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't')
427 { skipnote($bib->id,"Flags: reference"); next COPYMAP; }
428 elsif( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f')
429 { skipnote($bib->id,"Flags: unholdable"); next COPYMAP; }
430 elsif( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f')
431 { skipnote($bib->id,"Flags: circulate"); next COPYMAP; }
432 elsif( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f')
433 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; }
435 # Excluded Circ Modifiers
436 if($localcfg->{'CircMods'}){
437 my $circmod = $cp->circ_modifier || "";
438 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
439 if( grep { $_ eq $circmod } @circmods && @circmods)
440 { skipnote($bib->id,"CircMods"); next COPYMAP; }
442 # Inverse rule -- only include specified Circ Mods
443 if($localcfg->{'OnlyIncludeCircMods'}){
444 my $circmod = $cp->circ_modifier || "";
445 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
446 unless( grep { $_ eq $circmod } @circmods && @circmods)
447 { skipnote($bib->id,"OnlyIncludeCircMods"); next COPYMAP; }
449 # Excluded Copy Statuses
450 if($localcfg->{'Statuses'}){
451 my @statuses = ( (ref($localcfg->{'Statuses'}) eq "ARRAY") ? @{$localcfg->{'Statuses'}} : ($localcfg->{'Statuses'}) );
452 if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses)
453 { skipnote($bib->id,"Statuses"); next COPYMAP; }
456 if($localcfg->{'Locations'}){
457 my @locations = ( (ref($localcfg->{'Locations'}) eq "ARRAY") ? @{$localcfg->{'Locations'}} : ($localcfg->{'Locations'}) );
458 if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
459 { skipnote($bib->id,"Locations"); next COPYMAP; }
461 # Inverse rule - Only use the specified locations
462 if($localcfg->{'OnlyIncludeLocations'}){
463 my @locations = ( (ref($localcfg->{'OnlyIncludeLocations'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocations'}} : ($localcfg->{'Locations'}) );
464 unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
465 { skipnote($bib->id,"OnlyIncludeLocations"); next COPYMAP; }
467 # exclude based on a regex match to location names
468 if($localcfg->{'LocationRegex'}){
469 my @locregex = ( (ref($localcfg->{'LocationRegex'}) eq "ARRAY") ? @{$localcfg->{'LocationRegex'}} : ($localcfg->{'LocationRegex'}) );
470 my $reg = $localcfg->{'LocationRegex'};
471 if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
472 { skipnote($bib->id,"LocationRegex"); next COPYMAP; }
474 # include based on a regex match to location names
475 if($localcfg->{'OnlyIncludeLocationRegex'}){
476 my @locregex = ( (ref($localcfg->{'OnlyIncludeLocationRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocationRegex'}} : ($localcfg->{'OnlyIncludeLocationRegex'}) );
477 my $reg = $localcfg->{'OnlyIncludeLocationRegex'};
478 unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
479 { skipnote($bib->id,"OnlyIncludeLocationRegex"); next COPYMAP; }
481 # Exclude based on a callno regex
482 if($localcfg->{'CallNoRegex'}){
483 my @callnoregex = ( (ref($localcfg->{'CallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'CallNoRegex'}} : ($localcfg->{'CallNoRegex'}) );
484 my $reg = $localcfg->{'CallNoRegex'};
485 if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
486 { skipnote($bib->id,"CallNoRegex"); next COPYMAP; }
488 # Include based on a callno regex
489 if($localcfg->{'OnlyIncludeCallNoRegex'}){
490 my @callnoregex = ( (ref($localcfg->{'OnlyIncludeCallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeCallNoRegex'}} : ($localcfg->{'OnlyIncludeCallNoRegex'}) );
491 my $reg = $localcfg->{'OnlyIncludeCallNoRegex'};
492 unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
493 { skipnote($bib->id,"OnlyIncludeCallNoRegex"); next COPYMAP; }
496 # Trim call number to a float and exclude based on Dewey Range
497 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
498 my $gt = $localcfg->{'DeweyGT'};
499 my $lt = $localcfg->{'DeweyLT'};
501 # FIXME if either config has an array just ditch for now
502 if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY")
503 { skipnote($bib->id,""); next COPYMAP; }
504 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
505 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
507 my $callno = $cn->label;
508 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
509 print STDERR $callno;
510 #note that we are making big assumptions about the call numbers in the db
512 # we have a range, exclude what's inbetween
514 if($callno > $gt and $callno < $lt)
515 { skipnote($bib->id,"Dewey LTGT"); next COPYMAP; }
516 # we only have a top threshold, exclude everything below it
519 { skipnote($bib->id,"Dewey LT"); next COPYMAP; }
520 # we only have a bottom threshold, exclude everything above it
523 { skipnote($bib->id,"Dewey GT"); next COPYMAP; }
527 if($thisorg->parent_ou){
528 $thisorg = $orgs{$thisorg->parent_ou}
540 b => $orgs{$printlib}->shortname,
541 #b => $orgs{$owninglib}->shortname,
542 #b => $orgs{$circlib}->shortname,
543 c => $shelves{$cp->location}->name,
545 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
547 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
548 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
549 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
550 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
551 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
552 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
553 z => $statuses{$cp->status}->name,
558 $r->delete_field( $r->field('901') );
562 a => $bib->tcn_value,
563 b => $bib->tcn_source,
570 stats() if (! ($count{cp} % 100 ));
578 printf(STDOUT "Skipped %s due to config: %s\n",$1,$2);