Merge branch 'master' of git+ssh://git.sitka.bclibraries.ca/sitka/sitka-tools
authorJames Fournie <jfournie@sitka.bclibraries.ca>
Mon, 14 Nov 2011 23:30:53 +0000 (15:30 -0800)
committerJames Fournie <jfournie@sitka.bclibraries.ca>
Mon, 14 Nov 2011 23:30:53 +0000 (15:30 -0800)
marc_export_custom/Makefile-marc_export [new file with mode: 0644]
marc_export_custom/bc_eln_media.ini
marc_export_custom/bc_eln_serials.ini
marc_export_custom/marc_export_custom
marc_export_custom/print901c.pl [new file with mode: 0755]
marc_export_custom/sitka-export-no-cotr.sh [changed mode: 0644->0755]
marc_export_custom/sitka-full-export-parallel.sh [new file with mode: 0755]
marc_export_custom/sitka-full-export.sh [changed mode: 0644->0755]

diff --git a/marc_export_custom/Makefile-marc_export b/marc_export_custom/Makefile-marc_export
new file mode 100644 (file)
index 0000000..01ae1f1
--- /dev/null
@@ -0,0 +1,4 @@
+MARC_EXPORT_BIN=./marc_export_custom
+%.marc: %.id
+       $(MARC_EXPORT_BIN) $(MARC_PARAMS) <$< --output-file $@.tmp 2>&1 | tee $@.log \
+       && mv $@.tmp $@
index 7b9cabe..ac5be7f 100644 (file)
@@ -35,4 +35,4 @@ ExcludeEntireOrg=true
 OnlyIncludeLocations=16mm film,CD-ROM,DVD,DVD magazine,Sound Effect\, ExCat CD\,CD accompanying book,sound recording,Special Collections - AV - Library Use Only,Video
 
 [BCREK]
-OnlyIncludeCircModifiers=dvd,video,videos,video-cassette
+OnlyIncludeCircMods=dvd,video,videos,video-cassette
index ed974bc..35c8b7f 100644 (file)
@@ -35,4 +35,4 @@ ExcludeEntireOrg=true
 OnlyIncludeLocations=Magazine,DVD magazine,Special Collections - Magazines - Library Use Only
 
 [BCREK]
-OnlyIncludeCircModifiers=magazine
+OnlyIncludeCircMods=magazine
index d6993c8..2728fde 100755 (executable)
@@ -29,6 +29,7 @@ my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeou
 my ($exclusion_ini,$collapse_to_depth, $output_file);
 my $cfg;
 my $force901;
+my $onlyholdings;
 
 GetOptions(
         'help'       => \$help,
@@ -46,10 +47,14 @@ GetOptions(
         'force901'  => \$force901,
         'exclusion_ini=s' => \$exclusion_ini,
         'collapse_to_depth=i' => \$collapse_to_depth,
-       'output-file=s' => \$output_file,
+        'onlyholdings' => \$onlyholdings,
+        'output-file=s' => \$output_file,
 );
 
-$cfg = new Config::Simple($exclusion_ini) if ($exclusion_ini);
+if ($exclusion_ini) {
+        die "exclusion ini file does not exist" unless (-r $exclusion_ini and -s $exclusion_ini);
+        $cfg = new Config::Simple($exclusion_ini) 
+}
 
 if ($help) {
 print <<"HELP";
@@ -86,6 +91,14 @@ Usage: $0 [options]
  --location or -l   MARC Location Code for holdings from
                     http://www.loc.gov/marc/organizations/orgshome.html
 
+ Options added by Sitka:
+ --force901                 Force-add 901 fields
+ --exclusion_ini FILENAME   Config::Simple based INI file for excluding holdings from the export
+ --collapse_to_depth 2      Depth to collapse holdings.  Any holdings at a depth below 
+                            will be collapsed up to the parent org unit at the set depth
+ --onlyholdings             Clean out 852s before adding new ones, and only export items that
+                            successfully recieved an 852 field
+
 Examples:
 
 To export a set of USMARC records in a file named "output_file" based on the
@@ -238,7 +251,20 @@ sub export_record {
 
         my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
         if ($type eq 'biblio') {
-            add_bib_holdings($bib, $r);
+
+            if($onlyholdings){
+                # Remove old 852 fields
+                my @f = $r->field('852');
+                $r->delete_fields(@f) if @f;
+                # Add new 852 fields 
+                add_bib_holdings($bib, $r);
+                # Check that at least one 852 was added
+                @f = $r->field('852');
+                # If not, we should NOT add this item to the export 
+                return unless @f;
+            } else {
+                add_bib_holdings($bib, $r);
+            }
         }
 
         if($force901){
@@ -253,22 +279,36 @@ sub export_record {
             );
         }
 
+        my $recordstr = undef;
+
         if ($format eq 'XML') {
             my $xml = $r->as_xml_record;
             $xml =~ s/^<\?.+?\?>$//mo;
-            print $outfh $xml;
+            $recordstr = $xml;
         } elsif ($format eq 'UNIMARC') {
-            print $outfh $r->as_usmarc;
+            $recordstr = $r->as_usmarc;
         } elsif ($format eq 'USMARC') {
-            print $outfh $r->as_usmarc;
+            $recordstr = $r->as_usmarc;
         }
+        eval {
+            if($format eq  'UNIMARC' or $format eq 'USMARC') {
+                my $rec = MARC::File::USMARC->decode($recordstr);
+                #throw Error::Simple('Reparsed MARC is not identical') if($recordstr ne $rec->as_usmarc);
+            } elsif($format eq 'XML') {
+                my $rec = MARC::Record->new_from_xml($recordstr, 'utf8', 'UNIMARC');
+                #my $tmp = $rec->as_xml_record;
+                #$tmp =~ s/^<\?.+?\?>$//mo;
+                #throw Error::Simple('Reparsed XML is not identical') if($tmp ne $recordstr);
+            }
+        } or throw Error::Simple("Failed to parse MARC record back: $!");
+        print $outfh $recordstr;
 
         $count{did}++;
 
     } otherwise {
         my $e = shift;
         my $errorid = $id;
-       chomp($errorid);
+        chomp($errorid);
         chomp($e);
         warn "\nERROR ON RECORD $errorid: $e\n";
         import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
@@ -435,18 +475,21 @@ sub add_bib_holdings {
                         while( $thisorg ){
                             # load the local config from the .ini file for exclusions
                             my $localcfg = $cfg->param(-block=> $thisorg->shortname);
+                            my $cfgparam;
 
                             # if we see this setting, just skip that org
 
-                            if( $localcfg->{'ExcludeEntireOrg'} ) 
-                            { skipnote($bib->id,"ExcludeEntireOrg"); next COPYMAP; } 
+                            $cfgparam = 'ExcludeEntireOrg'; 
+                            if( $localcfg->{$cfgparam} ) 
+                            { skipnote($bib->id, $cfgparam); next COPYMAP; } 
 
                             # what follows are exclusion rules
                     
                             # Excluded Flags
-                            if($localcfg->{'Flags'}){
+                            $cfgparam = 'Flags'; 
+                            if($localcfg->{$cfgparam}){
                                 # this little line is just forcing scalars into an array so we can 'use strict' with Config::Simple
-                                my @flags = ( (ref($localcfg->{'Flags'}) eq "ARRAY") ? @{$localcfg->{'Flags'}} : ($localcfg->{'Flags'}));
+                                my @flags = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}));
                                 if( grep { $_ eq 'reference' } @flags && $cp->ref eq 't')
                                 { skipnote($bib->id,"Flags: reference"); next COPYMAP; } 
                                 elsif( grep { $_ eq 'unholdable' } @flags && $cp->holdable eq 'f')
@@ -456,65 +499,75 @@ sub add_bib_holdings {
                                 elsif( grep { $_ eq 'hidden' } @flags && $cp->opac_visible eq 'f')
                                 { skipnote($bib->id,"Flags: hidden"); next COPYMAP; } 
                             }
+
                             # Excluded Circ Modifiers
-                            if($localcfg->{'CircMods'}){
+                            $cfgparam = 'CircMods'; 
+                            if($localcfg->{$cfgparam}){
                                 my $circmod = $cp->circ_modifier || "";
-                                my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
+                                my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
                                 if( grep { $_ eq $circmod } @circmods && @circmods)
-                                { skipnote($bib->id,"CircMods"); next COPYMAP; } 
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; } 
                             }
                             # Inverse rule -- only include specified Circ Mods
-                            if($localcfg->{'OnlyIncludeCircMods'}){
+                            $cfgparam = 'OnlyIncludeCircMods'; 
+                            if($localcfg->{$cfgparam}){
                                 my $circmod = $cp->circ_modifier || "";
-                                my @circmods = ( (ref($localcfg->{'CircMods'}) eq "ARRAY") ? @{$localcfg->{'CircMods'}} : ($localcfg->{'CircMods'}) );
-                                unless( grep { $_ eq $circmod } @circmods && @circmods)
-                                { skipnote($bib->id,"OnlyIncludeCircMods"); next COPYMAP; } 
+                                my @circmods = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
+                                unless( grep { $_ and $_ eq $circmod } @circmods && @circmods)
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; } 
                             }
                             # Excluded Copy Statuses
-                            if($localcfg->{'Statuses'}){
-                                my @statuses = ( (ref($localcfg->{'Statuses'}) eq "ARRAY") ? @{$localcfg->{'Statuses'}} : ($localcfg->{'Statuses'}) );
+                            $cfgparam = 'Statuses'; 
+                            if($localcfg->{$cfgparam}){
+                                my @statuses = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
                                 if( grep { $_ eq $statuses{$cp->status}->name } @statuses && @statuses)
-                                { skipnote($bib->id,"Statuses"); next COPYMAP; } 
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; } 
                             }
                             # Excluded Locations
-                            if($localcfg->{'Locations'}){
-                                my @locations = ( (ref($localcfg->{'Locations'}) eq "ARRAY") ? @{$localcfg->{'Locations'}} : ($localcfg->{'Locations'}) );
+                            $cfgparam = 'Locations'; 
+                            if($localcfg->{$cfgparam}){
+                                my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
                                 if( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
-                                { skipnote($bib->id,"Locations"); next COPYMAP; }
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; }
                             }
                             # Inverse rule - Only use the specified locations
-                            if($localcfg->{'OnlyIncludeLocations'}){
-                                my @locations = ( (ref($localcfg->{'OnlyIncludeLocations'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocations'}} : ($localcfg->{'Locations'}) );
+                            $cfgparam = 'OnlyIncludeLocations'; 
+                            if($localcfg->{$cfgparam}){
+                                my @locations = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{'Locations'}) );
                                 unless( grep { $_ eq $shelves{$cp->location}->name } @locations && @locations)
-                                { skipnote($bib->id,"OnlyIncludeLocations"); next COPYMAP; } 
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; } 
                             }
                             # exclude based on a regex match to location names
-                            if($localcfg->{'LocationRegex'}){
-                                my @locregex = ( (ref($localcfg->{'LocationRegex'}) eq "ARRAY") ? @{$localcfg->{'LocationRegex'}} : ($localcfg->{'LocationRegex'}) );
-                                my $reg = $localcfg->{'LocationRegex'};
+                            $cfgparam = 'LocationRegex'; 
+                            if($localcfg->{$cfgparam}){
+                                my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
+                                my $reg = $localcfg->{$cfgparam};
                                 if( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
-                                { skipnote($bib->id,"LocationRegex"); next COPYMAP; }
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; }
                             }
                             # include based on a regex match to location names
-                            if($localcfg->{'OnlyIncludeLocationRegex'}){
-                                my @locregex = ( (ref($localcfg->{'OnlyIncludeLocationRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeLocationRegex'}} : ($localcfg->{'OnlyIncludeLocationRegex'}) );
-                                my $reg = $localcfg->{'OnlyIncludeLocationRegex'};
+                            $cfgparam = 'OnlyIncludeLocationRegex'; 
+                            if($localcfg->{$cfgparam}){
+                                my @locregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
+                                my $reg = $localcfg->{$cfgparam};
                                 unless( grep { $shelves{$cp->location}->name =~ m/($reg)/ } @locregex && @locregex)
-                                { skipnote($bib->id,"OnlyIncludeLocationRegex"); next COPYMAP; } 
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; } 
                             }
                             # Exclude based on a callno regex
-                            if($localcfg->{'CallNoRegex'}){
-                                my @callnoregex = ( (ref($localcfg->{'CallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'CallNoRegex'}} : ($localcfg->{'CallNoRegex'}) );
-                                my $reg = $localcfg->{'CallNoRegex'};
+                            $cfgparam = 'CallNoRegex'; 
+                            if($localcfg->{$cfgparam}){
+                                my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
+                                my $reg = $localcfg->{$cfgparam};
                                 if( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
-                                { skipnote($bib->id,"CallNoRegex"); next COPYMAP; }
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; }
                             }
                             # Include based on a callno regex
-                            if($localcfg->{'OnlyIncludeCallNoRegex'}){
-                                my @callnoregex = ( (ref($localcfg->{'OnlyIncludeCallNoRegex'}) eq "ARRAY") ? @{$localcfg->{'OnlyIncludeCallNoRegex'}} : ($localcfg->{'OnlyIncludeCallNoRegex'}) );
-                                my $reg = $localcfg->{'OnlyIncludeCallNoRegex'};
+                            $cfgparam = 'OnlyIncludeCallNoRegex'; 
+                            if($localcfg->{$cfgparam}){
+                                my @callnoregex = ( (ref($localcfg->{$cfgparam}) eq "ARRAY") ? @{$localcfg->{$cfgparam}} : ($localcfg->{$cfgparam}) );
+                                my $reg = $localcfg->{$cfgparam};
                                 unless( grep { $cn->label =~ m/($reg)/ } @callnoregex && @callnoregex)
-                                { skipnote($bib->id,"OnlyIncludeCallNoRegex"); next COPYMAP; } 
+                                { skipnote($bib->id,$cfgparam); next COPYMAP; } 
                             }
 
                             # Trim call number to a float and exclude based on Dewey Range
@@ -581,12 +634,16 @@ sub add_bib_holdings {
 
 
                     stats() if (! ($count{cp} % 100 ));
-                } # for cnmap
-            } # for cnlist
-        } # if block
-    } # if block
+                } # COPYMAP: for my $cp ( @$cn_map_list )
+            } # for my $cn ( @$cn_list )
+        } # if ($cp_list && @$cp_list)
+    } # if ($cn_list && @$cn_list) 
 } # sub
 
 sub skipnote { 
-       printf(STDOUT "Skipped %s due to config: %s\n",$1,$2); 
+        my $id = shift;
+        my $note = shift;
+        my $outf = *STDERR;
+        $outf = *STDOUT if($output_file) ;
+        printf($outf "Skipped %s due to config: %s\n",$id,$note); 
 }
diff --git a/marc_export_custom/print901c.pl b/marc_export_custom/print901c.pl
new file mode 100755 (executable)
index 0000000..70e3fd2
--- /dev/null
@@ -0,0 +1,34 @@
+#!/usr/bin/perl
+use MARC::Batch;
+use strict;
+use warnings;
+use Encode qw(:fallback_all);
+
+my $batch = MARC::Batch->new( 'USMARC', @ARGV );
+my $oldencoding = $PerlIO::encoding::fallback;
+$batch->strict_off();
+$batch->warnings_off();
+
+my ($count_raw, $count_attempted, $count_901c) = (0, 0, 0);
+eval {
+       while ( my $marc = $batch->next ) {
+           $PerlIO::encoding::fallback = Encode::WARN_ON_ERR | Encode::PERLQQ;
+           $count_attempted++;
+           eval {
+               my $field_901c = $marc->subfield(901,"c");
+               if(defined $field_901c) {
+                       chomp $field_901c;
+                       $count_901c++ if ($field_901c =~ /^.+$/);
+                       printf "%s\n",$field_901c;
+               }
+               $count_raw++;
+           };
+           warn "Inner: $@" if $@;
+       }
+};
+warn "Outer: $@" if $@;
+
+$PerlIO::encoding::fallback = $oldencoding;
+printf STDERR "Count (attempted): %d\n",$count_attempted;
+printf STDERR "Count (raw): %d\n",$count_raw;
+printf STDERR "Count (901c): %d\n",$count_901c;
old mode 100644 (file)
new mode 100755 (executable)
diff --git a/marc_export_custom/sitka-full-export-parallel.sh b/marc_export_custom/sitka-full-export-parallel.sh
new file mode 100755 (executable)
index 0000000..394e1da
--- /dev/null
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+DIR=`dirname $0`
+#cd /srv/openils/bin
+
+PGUSER=evergreen
+PGHOST=db1.sitka.bclibraries.ca
+PSQL="psql -A -t -U $PGUSER -h $PGHOST"
+DATABASE=evergreen
+#FULL_QUERY="select distinct id from biblio.record_entry where not deleted"
+FULL_QUERY="select distinct acn.record FROM asset.call_number acn join biblio.record_entry bre on (acn.record = bre.id) where not bre.deleted and bre.active and not acn.deleted"
+POSTSEC_QUERY="select distinct record FROM asset.call_number where owning_lib in (select distinct id from actor.org_unit_descendants(27)) and not deleted and label != '##URI##';"
+NONEXPORTED_QUERY="select distinct acn.record FROM asset.call_number acn join biblio.record_entry bre on (acn.record = bre.id) where bre.deleted or not bre.active or acn.deleted"
+MARC_PARAMS="--config /srv/openils/conf/opensrf_core.xml --items --location SITKA --collapse_to_depth 2 --timeout 300 --force901 --encoding UTF-8"
+MARC_EXPORT_BIN="${DIR}/marc_export_custom"
+MARC_EXPORT="${MARC_EXPORT_BIN} ${MARC_PARAMS}"
+
+[ -z "${T}" ] && T=$(date -u +%Y%m%dT%H%M%SZ)
+[ -z "${OUTDIR}" ] && OUTDIR=$(mktemp --tmpdir -d outlook_export.XXXXXXX)
+
+process_batch() {
+       local f="$1" sql="$2" split_count="$3"
+       if [ ! -f ${OUTDIR}/${f}-${T}.id ]; then
+               echo "Getting IDs"
+               $PSQL -c "$sql" $DATABASE | sort | uniq >${OUTDIR}/${f}-${T}.id
+       fi
+       if [ ! -f ${OUTDIR}/${f}.id.targets ]; then
+               echo "Splitting work"
+               split_suffix ${OUTDIR}/${f}-${T}.id ${OUTDIR}/${f}-${T}-split. .id $split_count
+               echo "Making targets"
+               find ${OUTDIR} -name "${f}-${T}-split.[0-9]*[0-9].id" \
+               | sed 's,.id$,.marc,g' \
+               >${OUTDIR}/${f}.id.targets
+       fi
+
+       make ${MAKEOPTS} -f Makefile-marc_export \
+       MARC_EXPORT_BIN="${MARC_EXPORT_BIN}" \
+       MARC_PARAMS="$MARC_PARAMS --exclusion_ini ${f}.ini" \
+       $(<${OUTDIR}/${f}.id.targets)
+
+       find ${OUTDIR} -name "${f}-${T}-split*.marc" \
+       -exec cat \{} \; \
+       >${OUTDIR}/${f}-${T}.marc
+}
+
+split_suffix() {
+       local input="$1" output_prefix="$2" output_suffix="$3" split_count="$4"
+       split -a 7 -d -C ${split_count} ${input} ${output_prefix}
+       find $(dirname ${output_prefix}) -name "$(basename ${output_prefix})*" -exec mv -f \{\} \{\}${output_suffix} \;
+}
+
+# media for BC ELN post-secondary libraries
+export SPLIT_COUNT=10000
+F=bc_eln_media SQL="$POSTSEC_QUERY"
+process_batch "$F" "${SQL}" $SPLIT_COUNT
+
+# serials for BC ELN post-secondary libraries
+export SPLIT_COUNT=10000
+F=bc_eln_serials SQL="$POSTSEC_QUERY"
+process_batch "$F" "${SQL}" $SPLIT_COUNT
+
+export SPLIT_COUNT=100000
+# full dump for outlook public libraries
+F=sitka_full_outlook SQL="$FULL_QUERY"
+process_batch "$F" "${SQL}" $SPLIT_COUNT
+
old mode 100644 (file)
new mode 100755 (executable)