8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
26 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
28 my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29 my ($exclusion_ini,$collapse_to_depth);
35 'items' => \$holdings,
36 'mfhd' => \$export_mfhd,
37 'all' => \$all_records,
38 'location=s' => \$location,
39 'money=s' => \$dollarsign,
40 'config=s' => \$config,
41 'format=s' => \$format,
44 'encoding=s' => \$encoding,
45 'timeout=i' => \$timeout,
46 'force901' => \$force901,
47 'exclusion_ini=s' => \$exclusion_ini,
48 'collapse_to_depth=i' => \$collapse_to_depth,
52 $cfg = new Config::Simple($exclusion_ini) if ($exclusion_ini);
56 This script exports MARC authority, bibliographic, and serial holdings
57 records from an Evergreen database.
59 Input to this script can consist of a list of record IDs, with one record ID
60 per line, corresponding to the record ID in the Evergreen database table of
61 your requested record type.
63 Alternately, passing the --all option will attempt to export all records of
64 the specified type from the Evergreen database. The --all option starts at
65 record ID 1 and increments the ID by 1 until the largest ID in the database
66 is retrieved. This may not be very efficient for databases with large gaps
67 in their ID sequences.
70 --help or -h This screen.
71 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
72 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
73 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
74 --xml-idl or -x Location of the IDL XML
75 --timeout Timeout for exporting a single record; increase if you
76 are using --holdings and are exporting records that
77 have a lot of items attached to them.
78 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
79 --all or -a Export all records; ignores input list
81 Additional options for type = 'BIBLIO':
82 --items or -i Include items (holdings) in the output
83 --money Currency symbol to use in item price field [\$]
84 --mfhd Export serial MFHD records for associated bib records
85 Not compatible with --format=BRE
86 --location or -l MARC Location Code for holdings from
87 http://www.loc.gov/marc/organizations/orgshome.html
91 To export a set of USMARC records in a file named "output_file" based on the
92 IDs contained in a file named "list_of_ids":
93 cat list_of_ids | $0 > output_file
95 To export a set of MARC21XML authority records in a file named "output.xml"
96 for all authority records in the database:
97 $0 --format XML --type AUTHORITY --all > output.xml
104 $format = uc($format);
105 $encoding = uc($encoding);
107 binmode(STDOUT, ':raw') if ($encoding ne 'UTF-8');
108 binmode(STDOUT, ':utf8') if ($encoding eq 'UTF-8');
110 if (!grep { $format eq $_ } @formats) {
111 die "Please select a supported format. ".
112 "Right now that means one of [".
113 join('|',@formats). "]\n";
116 if ($format ne 'XML') {
117 my $type = 'MARC::File::' . $format;
122 # set default timeout and/or correct silly user who
123 # supplied a negative timeout; default timeout of
124 # 300 seconds if exporting items determined empirically.
125 $timeout = $holdings ? 300 : 1;
128 OpenSRF::System->bootstrap_client( config_file => $config );
131 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
134 Fieldmapper->import(IDL => $idl);
136 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
137 OpenILS::Utils::CStoreEditor::init();
138 my $editor = OpenILS::Utils::CStoreEditor->new();
140 print <<HEADER if ($format eq 'XML');
141 <?xml version="1.0" encoding="$encoding"?>
142 <collection xmlns='http://www.loc.gov/MARC21/slim'>
157 my $last_time = time;
158 my %count = ('bib' => 0, 'did' => 0);
163 if ($type eq 'biblio') {
164 $top_record = $editor->search_biblio_record_entry([
166 {order_by => { 'bre' => 'id DESC' }, limit => 1}
168 } elsif ($type eq 'authority') {
169 $top_record = $editor->search_authority_record_entry([
171 {order_by => { 'are' => 'id DESC' }, limit => 1}
174 for (my $i = 0; $i++ < $top_record;) {
178 while ( my $i = <> ) {
183 print "</collection>\n" if ($format eq 'XML');
185 $speed = $count{did} / (time - $start);
186 my $time = time - $start;
189 Exports Attempted : $count{bib}
190 Exports Completed : $count{did}
191 Overall Speed : $speed
192 Total Time Elapsed: $time seconds
201 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
202 my $s = $r->recv(timeout => $timeout);
204 warn "\n!!!!! Failed trying to read record $id\n";
208 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
212 warn "\n!!!!!! Timed out trying to read record $id\n";
221 if ($format eq 'ARE' or $format eq 'BRE') {
222 print OpenSRF::Utils::JSON->perl2JSON($bib);
230 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
231 if ($type eq 'biblio') {
232 add_bib_holdings($bib, $r);
235 if ($format eq 'XML') {
236 my $xml = $r->as_xml_record;
237 $xml =~ s/^<\?.+?\?>$//mo;
239 } elsif ($format eq 'UNIMARC') {
241 } elsif ($format eq 'USMARC') {
250 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
253 if ($export_mfhd and $type eq 'biblio') {
254 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
255 foreach my $mfhd (@$mfhds) {
257 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
259 if ($format eq 'XML') {
260 my $xml = $r->as_xml_record;
261 $xml =~ s/^<\?.+?\?>$//mo;
263 } elsif ($format eq 'UNIMARC') {
265 } elsif ($format eq 'USMARC') {
271 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
276 stats() if (! ($count{bib} % 50 ));
283 $speed = $count{did} / (time - $start);
285 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
286 my $cn_speed = $count{cn} / (time - $start);
287 my $cp_speed = $count{cp} / (time - $start);
289 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
290 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
296 $count{did_last} = $count{did};
297 $count{time_last} = time;
300 sub get_bib_locations {
301 print STDERR "Retrieving Org Units ... ";
302 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
304 while (my $o = $r->recv) {
305 die $r->failed->stringify if ($r->failed);
313 print STDERR "Retrieving Copy statuses ... ";
314 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
316 while (my $sta = $r->recv) {
317 die $r->failed->stringify if ($r->failed);
318 $sta = $sta->content;
320 $statuses{$sta->id} = $sta;
325 print STDERR "Retrieving OU types ... ";
326 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
328 while (my $outy = $r->recv) {
329 die $r->failed->stringify if ($r->failed);
330 $outy = $outy->content;
332 $outypes{$outy->id} = $outy;
337 print STDERR "Retrieving Shelving locations ... ";
338 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
340 while (my $s = $r->recv) {
341 die $r->failed->stringify if ($r->failed);
344 $shelves{$s->id} = $s;
349 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
352 sub add_bib_holdings {
356 my $cn_list = $bib->call_numbers;
357 if ($cn_list && @$cn_list) {
359 $count{cn} += @$cn_list;
361 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
362 if ($cp_list && @$cp_list) {
365 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
367 for my $cn ( @$cn_list ) {
368 my $cn_map_list = $cn_map{$cn->id};
370 COPYMAP: for my $cp ( @$cn_map_list ) {
374 my $owninglib = $cn->owning_lib;
375 my $circlib = $cp->circ_lib;
376 my $printlib = $cp->circ_lib;
379 my $thisorg = $orgs{$circlib};
381 if($collapse_to_depth){
382 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
383 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
384 if( $localcfg->{'DontCollapse'} ){
387 if($thisorg->parent_ou){
388 $thisorg = $orgs{$thisorg->parent_ou};
389 $printlib = $thisorg->id;
394 $thisorg = $orgs{$circlib};
398 # load the local config from the .ini file for exclusions
399 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
401 # if we see this setting, just skip that org
403 next COPYMAP if( $localcfg->{'ExcludeEntireOrg'} );
405 # what follows are exclusion rules
408 if($localcfg->{'Flags'}){
409 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
410 my @flags = ( (ref($localcfg->{'Flags'}) eq "ARRAY") ? @{$localcfg->{'Flags'}} : ($localcfg->{'Flags'}));
411 next COPYMAP if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't');
412 next COPYMAP if( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f');
413 next COPYMAP if( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f');
414 next COPYMAP if( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f');
416 # Excluded Circ Modifiers
417 if($localcfg->{'CircMods'}){
418 my $circmod = $cp->circ_modifier || "";
419 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
420 next COPYMAP if( grep { $_ eq $circmod } @circmods && @circmods);
422 # Inverse rule -- only include specified Circ Mods
423 if($localcfg->{'OnlyIncludeCircMods'}){
424 my $circmod = $cp->circ_modifier || "";
425 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
426 next COPYMAP unless( grep { $_ eq $circmod } @circmods && @circmods);
428 # Excluded Copy Statuses
429 if($localcfg->{'Statuses'}){
430 my @statuses = ( (ref($localcfg->{'Statuses'}) eq "ARRAY") ? @{$localcfg->{'Statuses'}} : ($localcfg->{'Statuses'}) );
431 next COPYMAP if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses);
434 if($localcfg->{'Locations'}){
435 my @locations = ( (ref($localcfg->{'Locations'}) eq "ARRAY") ? @{$localcfg->{'Locations'}} : ($localcfg->{'Locations'}) );
436 next COPYMAP if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations);
438 # Inverse rule - Only use the specified locations
439 if($localcfg->{'OnlyIncludeLocations'}){
440 my @locations = ( (ref($localcfg->{'OnlyIncludeLocations'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocations'}} : ($localcfg->{'Locations'}) );
441 next COPYMAP unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations);
443 # exclude based on a regex match to location names
444 if($localcfg->{'LocationRegex'}){
445 my @locregex = ( (ref($localcfg->{'LocationRegex'}) eq "ARRAY") ? @{$localcfg->{'LocationRegex'}} : ($localcfg->{'LocationRegex'}) );
446 my $reg = $localcfg->{'LocationRegex'};
447 next COPYMAP if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex);
449 # include based on a regex match to location names
450 if($localcfg->{'OnlyIncludeLocationRegex'}){
451 my @locregex = ( (ref($localcfg->{'OnlyIncludeLocationRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocationRegex'}} : ($localcfg->{'OnlyIncludeLocationRegex'}) );
452 my $reg = $localcfg->{'OnlyIncludeLocationRegex'};
453 next COPYMAP unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex);
455 # Exclude based on a callno regex
456 if($localcfg->{'CallNoRegex'}){
457 my @callnoregex = ( (ref($localcfg->{'CallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'CallNoRegex'}} : ($localcfg->{'CallNoRegex'}) );
458 my $reg = $localcfg->{'CallNoRegex'};
459 next COPYMAP if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex);
461 # Include based on a callno regex
462 if($localcfg->{'OnlyIncludeCallNoRegex'}){
463 my @callnoregex = ( (ref($localcfg->{'OnlyIncludeCallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeCallNoRegex'}} : ($localcfg->{'OnlyIncludeCallNoRegex'}) );
464 my $reg = $localcfg->{'OnlyIncludeCallNoRegex'};
465 next COPYMAP unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex);
468 # Trim call number to a float and exclude based on Dewey Range
469 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
470 my $gt = $localcfg->{'DeweyGT'};
471 my $lt = $localcfg->{'DeweyLT'};
473 # FIXME if either config has an array just ditch for now
474 next COPYMAP if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY");
475 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
476 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
478 my $callno = $cn->label;
479 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
480 print STDERR $callno;
481 #note that we are making big assumptions about the call numbers in the db
483 # we have a range, exclude what's inbetween
485 next COPYMAP if $callno > $gt and $callno < $lt;
486 # we only have a top threshold, exclude everything below it
488 next COPYMAP if $callno < $lt;
489 # we only have a bottom threshold, exclude everything above it
491 next COPYMAP if $callno > $gt;
495 if($thisorg->parent_ou){
496 $thisorg = $orgs{$thisorg->parent_ou}
508 b => $orgs{$printlib}->shortname,
509 #b => $orgs{$owninglib}->shortname,
510 #b => $orgs{$circlib}->shortname,
511 c => $shelves{$cp->location}->name,
513 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
515 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
516 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
517 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
518 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
519 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
520 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
521 z => $statuses{$cp->status}->name,
526 $r->delete_field( $r->field('901') );
530 a => $bib->tcn_value,
531 b => $bib->tcn_source,
538 stats() if (! ($count{cp} % 100 ));