8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29 my ($exclusion_ini,$collapse_to_depth);
35 'items' => \$holdings,
36 'mfhd' => \$export_mfhd,
37 'all' => \$all_records,
38 'location=s' => \$location,
39 'money=s' => \$dollarsign,
40 'config=s' => \$config,
41 'format=s' => \$format,
44 'encoding=s' => \$encoding,
45 'timeout=i' => \$timeout,
46 'force901' => \$force901,
47 'exclusion_ini=s' => \$exclusion_ini,
48 'collapse_to_depth=i' => \$collapse_to_depth,
52 $cfg = new Config::Simple($exclusion_ini) if ($exclusion_ini);
56 This script exports MARC authority, bibliographic, and serial holdings
57 records from an Evergreen database.
59 Input to this script can consist of a list of record IDs, with one record ID
60 per line, corresponding to the record ID in the Evergreen database table of
61 your requested record type.
63 Alternately, passing the --all option will attempt to export all records of
64 the specified type from the Evergreen database. The --all option starts at
65 record ID 1 and increments the ID by 1 until the largest ID in the database
66 is retrieved. This may not be very efficient for databases with large gaps
67 in their ID sequences.
70 --help or -h This screen.
71 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
72 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
73 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
74 --xml-idl or -x Location of the IDL XML
75 --timeout Timeout for exporting a single record; increase if you
76 are using --holdings and are exporting records that
77 have a lot of items attached to them.
78 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
79 --all or -a Export all records; ignores input list
81 Additional options for type = 'BIBLIO':
82 --items or -i Include items (holdings) in the output
83 --money Currency symbol to use in item price field [\$]
84 --mfhd Export serial MFHD records for associated bib records
85 Not compatible with --format=BRE
86 --location or -l MARC Location Code for holdings from
87 http://www.loc.gov/marc/organizations/orgshome.html
91 To export a set of USMARC records in a file named "output_file" based on the
92 IDs contained in a file named "list_of_ids":
93 cat list_of_ids | $0 > output_file
95 To export a set of MARC21XML authority records in a file named "output.xml"
96 for all authority records in the database:
97 $0 --format XML --type AUTHORITY --all > output.xml
104 $format = uc($format);
105 $encoding = uc($encoding);
107 binmode(STDOUT, ':raw') if ($encoding ne 'UTF-8');
108 binmode(STDOUT, ':utf8') if ($encoding eq 'UTF-8');
110 if (!grep { $format eq $_ } @formats) {
111 die "Please select a supported format. ".
112 "Right now that means one of [".
113 join('|',@formats). "]\n";
116 if ($format ne 'XML') {
117 my $type = 'MARC::File::' . $format;
122 # set default timeout and/or correct silly user who
123 # supplied a negative timeout; default timeout of
124 # 300 seconds if exporting items determined empirically.
125 $timeout = $holdings ? 300 : 1;
128 OpenSRF::System->bootstrap_client( config_file => $config );
131 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
134 Fieldmapper->import(IDL => $idl);
136 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
137 OpenILS::Utils::CStoreEditor::init();
138 my $editor = OpenILS::Utils::CStoreEditor->new();
140 print <<HEADER if ($format eq 'XML');
141 <?xml version="1.0" encoding="$encoding"?>
142 <collection xmlns='http://www.loc.gov/MARC21/slim'>
157 my $last_time = time;
158 my %count = ('bib' => 0, 'did' => 0);
163 if ($type eq 'biblio') {
164 $top_record = $editor->search_biblio_record_entry([
166 {order_by => { 'bre' => 'id DESC' }, limit => 1}
168 } elsif ($type eq 'authority') {
169 $top_record = $editor->search_authority_record_entry([
171 {order_by => { 'are' => 'id DESC' }, limit => 1}
174 for (my $i = 0; $i++ < $top_record;) {
178 while ( my $i = <> ) {
183 print "</collection>\n" if ($format eq 'XML');
185 $speed = $count{did} / (time - $start);
186 my $time = time - $start;
189 Exports Attempted : $count{bib}
190 Exports Completed : $count{did}
191 Overall Speed : $speed
192 Total Time Elapsed: $time seconds
201 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
202 my $s = $r->recv(timeout => $timeout);
204 warn "\n!!!!! Failed trying to read record $id\n";
208 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
212 warn "\n!!!!!! Timed out trying to read record $id\n";
221 if ($format eq 'ARE' or $format eq 'BRE') {
222 print OpenSRF::Utils::JSON->perl2JSON($bib);
230 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
231 if ($type eq 'biblio') {
232 add_bib_holdings($bib, $r);
235 if ($format eq 'XML') {
236 my $xml = $r->as_xml_record;
237 $xml =~ s/^<\?.+?\?>$//mo;
239 } elsif ($format eq 'UNIMARC') {
241 } elsif ($format eq 'USMARC') {
250 $errorid =~ s/\n$//g;
252 warn "\nERROR ON RECORD $errorid: $e\n";
253 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
256 if ($export_mfhd and $type eq 'biblio') {
257 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
258 foreach my $mfhd (@$mfhds) {
260 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
262 if ($format eq 'XML') {
263 my $xml = $r->as_xml_record;
264 $xml =~ s/^<\?.+?\?>$//mo;
266 } elsif ($format eq 'UNIMARC') {
268 } elsif ($format eq 'USMARC') {
273 my $errorid = chomp($id);
275 warn "\nERROR ON MFHD RECORD $errorid: $e\n";
276 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
281 stats() if (! ($count{bib} % 50 ));
288 $speed = $count{did} / (time - $start);
290 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
291 my $cn_speed = $count{cn} / (time - $start);
292 my $cp_speed = $count{cp} / (time - $start);
294 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
295 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
301 $count{did_last} = $count{did};
302 $count{time_last} = time;
305 sub get_bib_locations {
306 print STDERR "Retrieving Org Units ... ";
307 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
309 while (my $o = $r->recv) {
310 die $r->failed->stringify if ($r->failed);
318 print STDERR "Retrieving Copy statuses ... ";
319 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
321 while (my $sta = $r->recv) {
322 die $r->failed->stringify if ($r->failed);
323 $sta = $sta->content;
325 $statuses{$sta->id} = $sta;
330 print STDERR "Retrieving OU types ... ";
331 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
333 while (my $outy = $r->recv) {
334 die $r->failed->stringify if ($r->failed);
335 $outy = $outy->content;
337 $outypes{$outy->id} = $outy;
342 print STDERR "Retrieving Shelving locations ... ";
343 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
345 while (my $s = $r->recv) {
346 die $r->failed->stringify if ($r->failed);
349 $shelves{$s->id} = $s;
354 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
357 sub add_bib_holdings {
361 my $cn_list = $bib->call_numbers;
362 if ($cn_list && @$cn_list) {
364 $count{cn} += @$cn_list;
366 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
367 if ($cp_list && @$cp_list) {
370 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
372 for my $cn ( @$cn_list ) {
373 my $cn_map_list = $cn_map{$cn->id};
375 COPYMAP: for my $cp ( @$cn_map_list ) {
379 my $owninglib = $cn->owning_lib;
380 my $circlib = $cp->circ_lib;
381 my $printlib = $cp->circ_lib;
384 my $thisorg = $orgs{$circlib};
386 if($collapse_to_depth){
387 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
388 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
389 if( $localcfg->{'DontCollapse'} ){
392 if($thisorg->parent_ou){
393 $thisorg = $orgs{$thisorg->parent_ou};
394 $printlib = $thisorg->id;
399 $thisorg = $orgs{$circlib};
403 # load the local config from the .ini file for exclusions
404 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
406 # if we see this setting, just skip that org
408 next COPYMAP if( $localcfg->{'ExcludeEntireOrg'} );
410 # what follows are exclusion rules
413 if($localcfg->{'Flags'}){
414 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
415 my @flags = ( (ref($localcfg->{'Flags'}) eq "ARRAY") ? @{$localcfg->{'Flags'}} : ($localcfg->{'Flags'}));
416 next COPYMAP if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't');
417 next COPYMAP if( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f');
418 next COPYMAP if( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f');
419 next COPYMAP if( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f');
421 # Excluded Circ Modifiers
422 if($localcfg->{'CircMods'}){
423 my $circmod = $cp->circ_modifier || "";
424 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
425 next COPYMAP if( grep { $_ eq $circmod } @circmods && @circmods);
427 # Inverse rule -- only include specified Circ Mods
428 if($localcfg->{'OnlyIncludeCircMods'}){
429 my $circmod = $cp->circ_modifier || "";
430 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
431 next COPYMAP unless( grep { $_ eq $circmod } @circmods && @circmods);
433 # Excluded Copy Statuses
434 if($localcfg->{'Statuses'}){
435 my @statuses = ( (ref($localcfg->{'Statuses'}) eq "ARRAY") ? @{$localcfg->{'Statuses'}} : ($localcfg->{'Statuses'}) );
436 next COPYMAP if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses);
439 if($localcfg->{'Locations'}){
440 my @locations = ( (ref($localcfg->{'Locations'}) eq "ARRAY") ? @{$localcfg->{'Locations'}} : ($localcfg->{'Locations'}) );
441 next COPYMAP if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations);
443 # Inverse rule - Only use the specified locations
444 if($localcfg->{'OnlyIncludeLocations'}){
445 my @locations = ( (ref($localcfg->{'OnlyIncludeLocations'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocations'}} : ($localcfg->{'Locations'}) );
446 next COPYMAP unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations);
448 # exclude based on a regex match to location names
449 if($localcfg->{'LocationRegex'}){
450 my @locregex = ( (ref($localcfg->{'LocationRegex'}) eq "ARRAY") ? @{$localcfg->{'LocationRegex'}} : ($localcfg->{'LocationRegex'}) );
451 my $reg = $localcfg->{'LocationRegex'};
452 next COPYMAP if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex);
454 # include based on a regex match to location names
455 if($localcfg->{'OnlyIncludeLocationRegex'}){
456 my @locregex = ( (ref($localcfg->{'OnlyIncludeLocationRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocationRegex'}} : ($localcfg->{'OnlyIncludeLocationRegex'}) );
457 my $reg = $localcfg->{'OnlyIncludeLocationRegex'};
458 next COPYMAP unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex);
460 # Exclude based on a callno regex
461 if($localcfg->{'CallNoRegex'}){
462 my @callnoregex = ( (ref($localcfg->{'CallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'CallNoRegex'}} : ($localcfg->{'CallNoRegex'}) );
463 my $reg = $localcfg->{'CallNoRegex'};
464 next COPYMAP if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex);
466 # Include based on a callno regex
467 if($localcfg->{'OnlyIncludeCallNoRegex'}){
468 my @callnoregex = ( (ref($localcfg->{'OnlyIncludeCallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeCallNoRegex'}} : ($localcfg->{'OnlyIncludeCallNoRegex'}) );
469 my $reg = $localcfg->{'OnlyIncludeCallNoRegex'};
470 next COPYMAP unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex);
473 # Trim call number to a float and exclude based on Dewey Range
474 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
475 my $gt = $localcfg->{'DeweyGT'};
476 my $lt = $localcfg->{'DeweyLT'};
478 # FIXME if either config has an array just ditch for now
479 next COPYMAP if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY");
480 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
481 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
483 my $callno = $cn->label;
484 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
485 print STDERR $callno;
486 #note that we are making big assumptions about the call numbers in the db
488 # we have a range, exclude what's inbetween
490 next COPYMAP if $callno > $gt and $callno < $lt;
491 # we only have a top threshold, exclude everything below it
493 next COPYMAP if $callno < $lt;
494 # we only have a bottom threshold, exclude everything above it
496 next COPYMAP if $callno > $gt;
500 if($thisorg->parent_ou){
501 $thisorg = $orgs{$thisorg->parent_ou}
513 b => $orgs{$printlib}->shortname,
514 #b => $orgs{$owninglib}->shortname,
515 #b => $orgs{$circlib}->shortname,
516 c => $shelves{$cp->location}->name,
518 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
520 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
521 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
522 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
523 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
524 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
525 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
526 z => $statuses{$cp->status}->name,
531 $r->delete_field( $r->field('901') );
535 a => $bib->tcn_value,
536 b => $bib->tcn_source,
543 stats() if (! ($count{cp} % 100 ));