initial import - marc custom export (for Outlook Online)
[sitka/sitka-tools.git] / marc_export_custom / marc_export_custom
CommitLineData
456d236e
JF
1#!/usr/bin/perl
2# vim:et:sw=4:ts=4:
3use strict;
4use warnings;
5use bytes;
6
7use OpenSRF::System;
8use OpenSRF::EX qw/:try/;
9use OpenSRF::AppSession;
10use OpenSRF::Utils::JSON;
11use OpenSRF::Utils::SettingsClient;
12use OpenILS::Application::AppUtils;
13use OpenILS::Utils::Fieldmapper;
14use OpenILS::Utils::CStoreEditor;
15
16use MARC::Record;
17use MARC::File::XML;
18use UNIVERSAL::require;
19
20use Time::HiRes qw/time/;
21use Getopt::Long;
22
23use Config::Simple;
24use Data::Dumper;
25
26my @formats = qw/USMARC UNIMARC XML BRE ARE/;
27
28my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef);
29my ($exclusion_ini,$collapse_to_depth);
30my $cfg;
31
32GetOptions(
33 'help' => \$help,
34 'items' => \$holdings,
35 'mfhd' => \$export_mfhd,
36 'all' => \$all_records,
37 'location=s' => \$location,
38 'money=s' => \$dollarsign,
39 'config=s' => \$config,
40 'format=s' => \$format,
41 'type=s' => \$type,
42 'xml-idl=s' => \$idl,
43 'encoding=s' => \$encoding,
44 'timeout=i' => \$timeout,
45 'exclusion_ini=s' => \$exclusion_ini,
46 'collapse_to_depth=i' => \$collapse_to_depth,
47
48);
49
50$cfg = new Config::Simple($exclusion_ini) if ($exclusion_ini);
51
52if ($help) {
53print <<"HELP";
54This script exports MARC authority, bibliographic, and serial holdings
55records from an Evergreen database.
56
57Input to this script can consist of a list of record IDs, with one record ID
58per line, corresponding to the record ID in the Evergreen database table of
59your requested record type.
60
61Alternately, passing the --all option will attempt to export all records of
62the specified type from the Evergreen database. The --all option starts at
63record ID 1 and increments the ID by 1 until the largest ID in the database
64is retrieved. This may not be very efficient for databases with large gaps
65in their ID sequences.
66
67Usage: $0 [options]
68 --help or -h This screen.
69 --config or -c Configuration file [/openils/conf/opensrf_core.xml]
70 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
71 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
72 --xml-idl or -x Location of the IDL XML
73 --timeout Timeout for exporting a single record; increase if you
74 are using --holdings and are exporting records that
75 have a lot of items attached to them.
76 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
77 --all or -a Export all records; ignores input list
78
79 Additional options for type = 'BIBLIO':
80 --items or -i Include items (holdings) in the output
81 --money Currency symbol to use in item price field [\$]
82 --mfhd Export serial MFHD records for associated bib records
83 Not compatible with --format=BRE
84 --location or -l MARC Location Code for holdings from
85 http://www.loc.gov/marc/organizations/orgshome.html
86
87Examples:
88
89To export a set of USMARC records in a file named "output_file" based on the
90IDs contained in a file named "list_of_ids":
91 cat list_of_ids | $0 > output_file
92
93To export a set of MARC21XML authority records in a file named "output.xml"
94for all authority records in the database:
95 $0 --format XML --type AUTHORITY --all > output.xml
96
97HELP
98 exit;
99}
100
101$type = lc($type);
102$format = uc($format);
103$encoding = uc($encoding);
104
105binmode(STDOUT, ':raw') if ($encoding ne 'UTF-8');
106binmode(STDOUT, ':utf8') if ($encoding eq 'UTF-8');
107
108if (!grep { $format eq $_ } @formats) {
109 die "Please select a supported format. ".
110 "Right now that means one of [".
111 join('|',@formats). "]\n";
112}
113
114if ($format ne 'XML') {
115 my $type = 'MARC::File::' . $format;
116 $type->require;
117}
118
119if ($timeout <= 0) {
120 # set default timeout and/or correct silly user who
121 # supplied a negative timeout; default timeout of
122 # 300 seconds if exporting items determined empirically.
123 $timeout = $holdings ? 300 : 1;
124}
125
126OpenSRF::System->bootstrap_client( config_file => $config );
127
128if (!$idl) {
129 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
130}
131
132Fieldmapper->import(IDL => $idl);
133
134my $ses = OpenSRF::AppSession->create('open-ils.cstore');
135OpenILS::Utils::CStoreEditor::init();
136my $editor = OpenILS::Utils::CStoreEditor->new();
137
138print <<HEADER if ($format eq 'XML');
139<?xml version="1.0" encoding="$encoding"?>
140<collection xmlns='http://www.loc.gov/MARC21/slim'>
141HEADER
142
143my %orgs;
144my %shelves;
145my %statuses;
146my %outypes;
147
148my $flesh = {};
149
150if ($holdings) {
151 get_bib_locations();
152}
153
154my $start = time;
155my $last_time = time;
156my %count = ('bib' => 0, 'did' => 0);
157my $speed = 0;
158
159if ($all_records) {
160 my $top_record = 0;
161 if ($type eq 'biblio') {
162 $top_record = $editor->search_biblio_record_entry([
163 {deleted => 'f'},
164 {order_by => { 'bre' => 'id DESC' }, limit => 1}
165 ])->[0]->id;
166 } elsif ($type eq 'authority') {
167 $top_record = $editor->search_authority_record_entry([
168 {deleted => 'f'},
169 {order_by => { 'are' => 'id DESC' }, limit => 1}
170 ])->[0]->id;
171 }
172 for (my $i = 0; $i++ < $top_record;) {
173 export_record($i);
174 }
175} else {
176 while ( my $i = <> ) {
177 export_record($i);
178 }
179}
180
181print "</collection>\n" if ($format eq 'XML');
182
183$speed = $count{did} / (time - $start);
184my $time = time - $start;
185print STDERR <<DONE;
186
187Exports Attempted : $count{bib}
188Exports Completed : $count{did}
189Overall Speed : $speed
190Total Time Elapsed: $time seconds
191
192DONE
193
194sub export_record {
195 my $id = shift;
196
197 my $bib;
198
199 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
200 my $s = $r->recv(timeout => $timeout);
201 if (!$s) {
202 warn "\n!!!!! Failed trying to read record $id\n";
203 return;
204 }
205 if ($r->failed) {
206 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
207 return;
208 }
209 if ($r->timed_out) {
210 warn "\n!!!!!! Timed out trying to read record $id\n";
211 return;
212 }
213 $bib = $s->content;
214 $r->finish;
215
216 $count{bib}++;
217 return unless $bib;
218
219 if ($format eq 'ARE' or $format eq 'BRE') {
220 print OpenSRF::Utils::JSON->perl2JSON($bib);
221 stats();
222 $count{did}++;
223 return;
224 }
225
226 try {
227
228 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
229 if ($type eq 'biblio') {
230 add_bib_holdings($bib, $r);
231 }
232
233 if ($format eq 'XML') {
234 my $xml = $r->as_xml_record;
235 $xml =~ s/^<\?.+?\?>$//mo;
236 print $xml;
237 } elsif ($format eq 'UNIMARC') {
238 print $r->as_usmarc;
239 } elsif ($format eq 'USMARC') {
240 print $r->as_usmarc;
241 }
242
243 $count{did}++;
244
245 } otherwise {
246 my $e = shift;
247 warn "\n$e\n";
248 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
249 };
250
251 if ($export_mfhd and $type eq 'biblio') {
252 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
253 foreach my $mfhd (@$mfhds) {
254 try {
255 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
256
257 if ($format eq 'XML') {
258 my $xml = $r->as_xml_record;
259 $xml =~ s/^<\?.+?\?>$//mo;
260 print $xml;
261 } elsif ($format eq 'UNIMARC') {
262 print $r->as_usmarc;
263 } elsif ($format eq 'USMARC') {
264 print $r->as_usmarc;
265 }
266 } otherwise {
267 my $e = shift;
268 warn "\n$e\n";
269 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
270 };
271 }
272 }
273
274 stats() if (! ($count{bib} % 50 ));
275}
276
277sub stats {
278 try {
279 no warnings;
280
281 $speed = $count{did} / (time - $start);
282
283 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
284 my $cn_speed = $count{cn} / (time - $start);
285 my $cp_speed = $count{cp} / (time - $start);
286
287 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
288 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
289 $speed,
290 $speed_now,
291 $cn_speed,
292 $cp_speed;
293 } otherwise {};
294 $count{did_last} = $count{did};
295 $count{time_last} = time;
296}
297
298sub get_bib_locations {
299 print STDERR "Retrieving Org Units ... ";
300 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
301
302 while (my $o = $r->recv) {
303 die $r->failed->stringify if ($r->failed);
304 $o = $o->content;
305 last unless ($o);
306 $orgs{$o->id} = $o;
307 }
308 $r->finish;
309 print STDERR "OK\n";
310
311 print STDERR "Retrieving Copy statuses ... ";
312 $r = $ses->request( 'open-ils.cstore.direct.config.copy_status.search', { id => { '!=' => undef } } );
313
314 while (my $sta = $r->recv) {
315 die $r->failed->stringify if ($r->failed);
316 $sta = $sta->content;
317 last unless ($sta);
318 $statuses{$sta->id} = $sta;
319 }
320 $r->finish;
321 print STDERR "OK\n";
322
323 print STDERR "Retrieving OU types ... ";
324 $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit_type.search', { id => { '!=' => undef } } );
325
326 while (my $outy = $r->recv) {
327 die $r->failed->stringify if ($r->failed);
328 $outy = $outy->content;
329 last unless ($outy);
330 $outypes{$outy->id} = $outy;
331 }
332 $r->finish;
333 print STDERR "OK\n";
334
335 print STDERR "Retrieving Shelving locations ... ";
336 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
337
338 while (my $s = $r->recv) {
339 die $r->failed->stringify if ($r->failed);
340 $s = $s->content;
341 last unless ($s);
342 $shelves{$s->id} = $s;
343 }
344 $r->finish;
345 print STDERR "OK\n";
346
347 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
348}
349
350sub add_bib_holdings {
351 my $bib = shift;
352 my $r = shift;
353
354 my $cn_list = $bib->call_numbers;
355 if ($cn_list && @$cn_list) {
356
357 $count{cn} += @$cn_list;
358
359 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
360 if ($cp_list && @$cp_list) {
361
362 my %cn_map;
363 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
364
365 for my $cn ( @$cn_list ) {
366 my $cn_map_list = $cn_map{$cn->id};
367
368 COPYMAP: for my $cp ( @$cn_map_list ) {
369 $count{cp}++;
370
371
372 my $owninglib = $cn->owning_lib;
373 my $circlib = $cp->circ_lib;
374 my $printlib = $cp->circ_lib;
375
376 if($cfg){
377 my $thisorg = $orgs{$circlib};
378
379 if($collapse_to_depth){
380 while ( $outypes{ $thisorg->ou_type }->depth > $collapse_to_depth ){
381 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
382 if( $localcfg->{'DontCollapse'} ){
383 last;
384 }
385 if($thisorg->parent_ou){
386 $thisorg = $orgs{$thisorg->parent_ou};
387 $printlib = $thisorg->id;
388 }
389 }
390 }
391
392 $thisorg = $orgs{$circlib};
393
394 while( $thisorg ){
395 print STDERR "here.";
396 # load the local config from the .ini file for exclusions
397 my $localcfg = $cfg->param(-block=> $thisorg->shortname);
398
399 # if we see this setting, just skip that org
400
401 next COPYMAP if( $localcfg->{'ExcludeEntireOrg'} );
402
403 # what follows are exclusion rules
404
405 # Excluded Flags
406 if($localcfg->{'Flags'}){
407 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
408 my @flags = ( (ref($localcfg->{'Flags'}) eq "ARRAY") ? @{$localcfg->{'Flags'}} : ($localcfg->{'Flags'}));
409 next COPYMAP if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't');
410 next COPYMAP if( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f');
411 next COPYMAP if( grep { $_ eq 'circulate' } @flags && $cp->circulate eq 'f');
412 next COPYMAP if( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f');
413 }
414 # Excluded Circ Modifiers
415 if($localcfg->{'CircMods'}){
416 my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
417 next COPYMAP if( grep { $_ eq $cp->circ_modifier } @circmods && @circmods);
418 }
419 # Excluded Copy Statuses
420 if($localcfg->{'Statuses'}){
421 my @statuses = ( (ref($localcfg->{'Statuses'}) eq "ARRAY") ? @{$localcfg->{'Statuses'}} : ($localcfg->{'Statuses'}) );
422 next COPYMAP if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses);
423 }
424 # Excluded Locations
425 if($localcfg->{'Locations'}){
426 my @locations = ( (ref($localcfg->{'Locations'}) eq "ARRAY") ? @{$localcfg->{'Locations'}} : ($localcfg->{'Locations'}) );
427 next COPYMAP if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations);
428 }
429 # Inverse rule - Only use the specified locations
430 if($localcfg->{'OnlyIncludeLocations'}){
431 my @locations = ( (ref($localcfg->{'OnlyIncludeLocations'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocations'}} : ($localcfg->{'Locations'}) );
432 next COPYMAP unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations);
433 }
434 # exclude based on a regex match to location names
435 if($localcfg->{'LocationRegex'}){
436 my @locregex = ( (ref($localcfg->{'LocationRegex'}) eq "ARRAY") ? @{$localcfg->{'LocationRegex'}} : ($localcfg->{'LocationRegex'}) );
437 my $reg = $localcfg->{'LocationRegex'};
438 next COPYMAP if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex);
439 }
440 # include based on a regex match to location names
441 if($localcfg->{'OnlyIncludeLocationRegex'}){
442 my @locregex = ( (ref($localcfg->{'OnlyIncludeLocationRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocationRegex'}} : ($localcfg->{'OnlyIncludeLocationRegex'}) );
443 my $reg = $localcfg->{'OnlyIncludeLocationRegex'};
444 next COPYMAP unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex);
445 }
446 # Exclude based on a callno regex
447 if($localcfg->{'CallNoRegex'}){
448 my @callnoregex = ( (ref($localcfg->{'CallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'CallNoRegex'}} : ($localcfg->{'CallNoRegex'}) );
449 my $reg = $localcfg->{'CallNoRegex'};
450 next COPYMAP if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex);
451 }
452 # Include based on a callno regex
453 if($localcfg->{'OnlyIncludeCallNoRegex'}){
454 my @callnoregex = ( (ref($localcfg->{'OnlyIncludeCallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeCallNoRegex'}} : ($localcfg->{'OnlyIncludeCallNoRegex'}) );
455 my $reg = $localcfg->{'OnlyIncludeCallNoRegex'};
456 next COPYMAP unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex);
457 }
458 # Trim call number to a float and exclude based on Dewey Range
459 if($localcfg->{'DeweyGT'} || $localcfg->{'DeweyLT'}){
460 my $gt = $localcfg->{'DeweyGT'};
461 my $lt = $localcfg->{'DeweyLT'};
462
463 # FIXME if either config has an array just ditch for now
464 next COPYMAP if (ref($gt) eq "ARRAY" or ref($lt) eq "ARRAY");
465 $gt =~ s/[^0-9\.]//g if $gt; #trim off anything not deweyish
466 $lt =~ s/[^0-9\.]//g if $lt; #trim off anything not deweyish
467
468 my $callno = $cn->label;
469 $callno =~ s/[^0-9\.]//g; #trim off anything not deweyish
470 print STDERR $callno;
471 #note that we are making big assumptions about the call numbers in the db
472
473 # we have a range, exclude what's inbetween
474 if($lt && $gt){
475 next COPYMAP if $callno > $gt and $callno < $lt;
476 # we only have a top threshold, exclude everything below it
477 } elsif ($lt){
478 next COPYMAP if $callno < $lt;
479 # we only have a bottom threshold, exclude everything above it
480 } elsif ($gt){
481 next COPYMAP if $callno > $gt;
482 }
483 }
484
485
486 if($thisorg->parent_ou){
487 $thisorg = $orgs{$thisorg->parent_ou}
488 } else {
489 $thisorg = ();
490 }
491
492 }
493 }
494
495 $r->append_fields(
496 MARC::Field->new(
497 852, '4', '',
498 a => $location,
499 b => $orgs{$printlib}->shortname,
500 #b => $orgs{$owninglib}->shortname,
501 #b => $orgs{$circlib}->shortname,
502 c => $shelves{$cp->location}->name,
503 j => $cn->label,
504 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
505 p => $cp->barcode,
506 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
507 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
508 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
509 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
510 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
511 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
512 z => $statuses{$cp->status}->name,
513 )
514 );
515
516 stats() if (! ($count{cp} % 100 ));
517 } # for cnmap
518 } # for cnlist
519 } # if block
520 } # if block
521} # sub