8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29 my ($exclusion_ini,$collapse_to_depth);
35 'items' => \$holdings,
36 'mfhd' => \$export_mfhd,
37 'all' => \$all_records,
38 'location=s' => \$location,
39 'money=s' => \$dollarsign,
40 'config=s' => \$config,
41 'format=s' => \$format,
44 'encoding=s' => \$encoding,
45 'timeout=i' => \$timeout,
46 'force901' => \$force901,
47 'exclusion_ini=s' => \$exclusion_ini,
48 'collapse_to_depth=i' => \$collapse_to_depth,
52 $cfg = new Config::Simple($exclusion_ini) if ($exclusion_ini);
56 This script exports MARC authority, bibliographic, and serial holdings
57 records from an Evergreen database.
59 Input to this script can consist of a list of record IDs, with one record ID
60 per line, corresponding to the record ID in the Evergreen database table of
61 your requested record type.
63 Alternately, passing the --all option will attempt to export all records of
64 the specified type from the Evergreen database. The --all option starts at
65 record ID 1 and increments the ID by 1 until the largest ID in the database
66 is retrieved. This may not be very efficient for databases with large gaps
67 in their ID sequences.
70 --help or -h This screen.
71 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
72 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
73 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
74 --xml-idl or -x Location of the IDL XML
75 --timeout Timeout for exporting a single record; increase if you
76 are using --holdings and are exporting records that
77 have a lot of items attached to them.
78 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
79 --all or -a Export all records; ignores input list
81 Additional options for type = 'BIBLIO':
82 --items or -i Include items (holdings) in the output
83 --money Currency symbol to use in item price field [\$]
84 --mfhd Export serial MFHD records for associated bib records
85 Not compatible with --format=BRE
86 --location or -l MARC Location Code for holdings from
87 http://www.loc.gov/marc/organizations/orgshome.html
91 To export a set of USMARC records in a file named "output_file" based on the
92 IDs contained in a file named "list_of_ids":
93 cat list_of_ids | $0 > output_file
95 To export a set of MARC21XML authority records in a file named "output.xml"
96 for all authority records in the database:
97 $0 --format XML --type AUTHORITY --all > output.xml
104 $format = uc($format);
105 $encoding = uc($encoding);
107 binmode(STDOUT, ':raw') if ($encoding ne 'UTF-8');
108 binmode(STDOUT, ':utf8') if ($encoding eq 'UTF-8');
110 if (!grep { $format eq $_ } @formats) {
111 die "Please select a supported format. ".
112 "Right now that means one of [".
113 join('|',@formats). "]\n";
116 if ($format ne 'XML') {
117 my $type = 'MARC::File::' . $format;
122 # set default timeout and/or correct silly user who
123 # supplied a negative timeout; default timeout of
124 # 300 seconds if exporting items determined empirically.
125 $timeout = $holdings ? 300 : 1;
128 OpenSRF::System->bootstrap_client( config_file => $config );
131 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
134 Fieldmapper->import(IDL => $idl);
136 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
137 OpenILS::Utils::CStoreEditor::init();
138 my $editor = OpenILS::Utils::CStoreEditor->new();
140 print <<HEADER if ($format eq 'XML');
141 <?xml version="1.0" encoding="$encoding"?>
142 <collection xmlns='http://www.loc.gov/MARC21/slim'>
157 my $last_time = time;
158 my %count = ('bib' => 0, 'did' => 0);
163 if ($type eq 'biblio') {
164 $top_record = $editor->search_biblio_record_entry([
166 {order_by => { 'bre' => 'id DESC' }, limit => 1}
168 } elsif ($type eq 'authority') {
169 $top_record = $editor->search_authority_record_entry([
171 {order_by => { 'are' => 'id DESC' }, limit => 1}
174 for (my $i = 0; $i++ < $top_record;) {
178 while ( my $i = <> ) {
183 print "</collection>\n" if ($format eq 'XML');
185 $speed = $count{did} / (time - $start);
186 my $time = time - $start;
189 Exports Attempted : $count{bib}
190 Exports Completed : $count{did}
191 Overall Speed : $speed
192 Total Time Elapsed: $time seconds
201 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
202 my $s = $r->recv(timeout => $timeout);
204 warn "\n!!!!! Failed trying to read record $id\n";
208 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
212 warn "\n!!!!!! Timed out trying to read record $id\n";
221 if ($format eq 'ARE' or $format eq 'BRE') {
222 print OpenSRF::Utils::JSON->perl2JSON($bib);
230 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
231 if ($type eq 'biblio') {
232 add_bib_holdings($bib, $r);
235 if ($format eq 'XML') {
236 my $xml = $r->as_xml_record;
237 $xml =~ s/^<\?.+?\?>$//mo;
239 } elsif ($format eq 'UNIMARC') {
241 } elsif ($format eq 'USMARC') {
250 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
253 if ($export_mfhd and $type eq 'biblio') {
254 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
255 foreach my $mfhd (@$mfhds) {
257 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
259 if ($format eq 'XML') {
260 my $xml = $r->as_xml_record;
261 $xml =~ s/^<\?.+?\?>$//mo;
263 } elsif ($format eq 'UNIMARC') {
265 } elsif ($format eq 'USMARC') {
271 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
276 stats() if (! ($count{bib} % 50 ));
283 $speed = $count{did} / (time - $start);
285 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
286 my $cn_speed = $count{cn} / (time - $start);
287 my $cp_speed = $count{cp} / (time - $start);
289 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
290 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
296 $count{did_last} = $count{did};
297 $count{time_last} = time;
300 sub get_bib_locations {
301 print STDERR "Retrieving Org Units ... ";
302 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
304 while (my $o = $r->recv) {
305 die $r->failed->stringify if ($r->failed);
313 print STDERR "Retrieving Copy statuses ... ";
314 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
316 while (my $sta = $r->recv) {
317 die $r->failed->stringify if ($r->failed);
318 $sta = $sta->content;
320 $statuses{$sta->id} = $sta;
325 print STDERR "Retrieving OU types ... ";
326 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
328 while (my $outy = $r->recv) {
329 die $r->failed->stringify if ($r->failed);
330 $outy = $outy->content;
332 $outypes{$outy->id} = $outy;
337 print STDERR "Retrieving Shelving locations ... ";
338 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
340 while (my $s = $r->recv) {
341 die $r->failed->stringify if ($r->failed);
344 $shelves{$s->id} = $s;
349 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
352 sub add_bib_holdings {
356 my $cn_list = $bib->call_numbers;
357 if ($cn_list && @$cn_list) {
359 $count{cn} += @$cn_list;
361 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
362 if ($cp_list && @$cp_list) {
365 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
367 for my $cn ( @$cn_list ) {
368 my $cn_map_list = $cn_map{$cn->id};
370 COPYMAP: for my $cp ( @$cn_map_list ) {
374 my $owninglib = $cn->owning_lib;
375 my $circlib = $cp->circ_lib;
376 my $printlib = $cp->circ_lib;
379 my $thisorg = $orgs{$circlib};
381 if($collapse_to_depth){
382 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
383 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
384 if( $localcfg->{'DontCollapse'} ){
387 if($thisorg->parent_ou){
388 $thisorg = $orgs{$thisorg->parent_ou};
389 $printlib = $thisorg->id;
394 $thisorg = $orgs{$circlib};
398 print STDERR "here.";
399 # load the local config from the .ini file for exclusions
400 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
402 # if we see this setting, just skip that org
404 next COPYMAP if( $localcfg->{'ExcludeEntireOrg'} );
406 # what follows are exclusion rules
409 if($localcfg->{'Flags'}){
410 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
411 my @flags = ( (ref($localcfg->{'Flags'}) eq "ARRAY") ? @{$localcfg->{'Flags'}} : ($localcfg->{'Flags'}));
412 next COPYMAP if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't');
413 next COPYMAP if( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f');
414 next COPYMAP if( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f');
415 next COPYMAP if( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f');
417 # Excluded Circ Modifiers
418 if($localcfg->{'CircMods'}){
419 my $circmod = $cp->circ_modifier || "";
420 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
421 next COPYMAP if( grep { $_ eq $circmod } @circmods && @circmods);
423 # Inverse rule -- only include specified Circ Mods
424 if($localcfg->{'OnlyIncludeCircMods'}){
425 my $circmod = $cp->circ_modifier || "";
426 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
427 next COPYMAP unless( grep { $_ eq $circmod } @circmods && @circmods);
429 # Excluded Copy Statuses
430 if($localcfg->{'Statuses'}){
431 my @statuses = ( (ref($localcfg->{'Statuses'}) eq "ARRAY") ? @{$localcfg->{'Statuses'}} : ($localcfg->{'Statuses'}) );
432 next COPYMAP if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses);
435 if($localcfg->{'Locations'}){
436 my @locations = ( (ref($localcfg->{'Locations'}) eq "ARRAY") ? @{$localcfg->{'Locations'}} : ($localcfg->{'Locations'}) );
437 next COPYMAP if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations);
439 # Inverse rule - Only use the specified locations
440 if($localcfg->{'OnlyIncludeLocations'}){
441 my @locations = ( (ref($localcfg->{'OnlyIncludeLocations'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocations'}} : ($localcfg->{'Locations'}) );
442 next COPYMAP unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations);
444 # exclude based on a regex match to location names
445 if($localcfg->{'LocationRegex'}){
446 my @locregex = ( (ref($localcfg->{'LocationRegex'}) eq "ARRAY") ? @{$localcfg->{'LocationRegex'}} : ($localcfg->{'LocationRegex'}) );
447 my $reg = $localcfg->{'LocationRegex'};
448 next COPYMAP if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex);
450 # include based on a regex match to location names
451 if($localcfg->{'OnlyIncludeLocationRegex'}){
452 my @locregex = ( (ref($localcfg->{'OnlyIncludeLocationRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocationRegex'}} : ($localcfg->{'OnlyIncludeLocationRegex'}) );
453 my $reg = $localcfg->{'OnlyIncludeLocationRegex'};
454 next COPYMAP unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex);
456 # Exclude based on a callno regex
457 if($localcfg->{'CallNoRegex'}){
458 my @callnoregex = ( (ref($localcfg->{'CallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'CallNoRegex'}} : ($localcfg->{'CallNoRegex'}) );
459 my $reg = $localcfg->{'CallNoRegex'};
460 next COPYMAP if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex);
462 # Include based on a callno regex
463 if($localcfg->{'OnlyIncludeCallNoRegex'}){
464 my @callnoregex = ( (ref($localcfg->{'OnlyIncludeCallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeCallNoRegex'}} : ($localcfg->{'OnlyIncludeCallNoRegex'}) );
465 my $reg = $localcfg->{'OnlyIncludeCallNoRegex'};
466 next COPYMAP unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex);
469 # Trim call number to a float and exclude based on Dewey Range
470 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
471 my $gt = $localcfg->{'DeweyGT'};
472 my $lt = $localcfg->{'DeweyLT'};
474 # FIXME if either config has an array just ditch for now
475 next COPYMAP if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY");
476 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
477 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
479 my $callno = $cn->label;
480 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
481 print STDERR $callno;
482 #note that we are making big assumptions about the call numbers in the db
484 # we have a range, exclude what's inbetween
486 next COPYMAP if $callno > $gt and $callno < $lt;
487 # we only have a top threshold, exclude everything below it
489 next COPYMAP if $callno < $lt;
490 # we only have a bottom threshold, exclude everything above it
492 next COPYMAP if $callno > $gt;
496 if($thisorg->parent_ou){
497 $thisorg = $orgs{$thisorg->parent_ou}
509 b => $orgs{$printlib}->shortname,
510 #b => $orgs{$owninglib}->shortname,
511 #b => $orgs{$circlib}->shortname,
512 c => $shelves{$cp->location}->name,
514 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
516 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
517 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
518 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
519 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
520 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
521 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
522 z => $statuses{$cp->status}->name,
527 $r->delete_field( $r->field('901') );
531 a => $bib->tcn_value,
532 b => $bib->tcn_source,
539 stats() if (! ($count{cp} % 100 ));