new stat tool.
authorRobin H. Johnson <rjohnson@sitka.bclibraries.ca>
Thu, 16 Aug 2012 05:42:49 +0000 (22:42 -0700)
committerRobin H. Johnson <rjohnson@sitka.bclibraries.ca>
Thu, 16 Aug 2012 05:42:49 +0000 (22:42 -0700)
marc_export_custom/Makefile-marc_export
marc_export_custom/stats-distribution.pl [new file with mode: 0755]
marc_export_custom/stats.sh

index 01ae1f1..d41b8e4 100644 (file)
@@ -1,4 +1,8 @@
 MARC_EXPORT_BIN=./marc_export_custom
+MARC_STATS=./stats-distribution.pl
 %.marc: %.id
        $(MARC_EXPORT_BIN) $(MARC_PARAMS) <$< --output-file $@.tmp 2>&1 | tee $@.log \
        && mv $@.tmp $@
+%.marc-stats.csv: %.marc
+       $(MARC_STATS) $< >$@.tmp && mv $@.tmp $@
+       
diff --git a/marc_export_custom/stats-distribution.pl b/marc_export_custom/stats-distribution.pl
new file mode 100755 (executable)
index 0000000..325f8db
--- /dev/null
@@ -0,0 +1,52 @@
+#!/usr/bin/perl
+use MARC::Batch;
+use strict;
+use warnings;
+my $batch = MARC::Batch->new( 'USMARC', @ARGV );
+$batch->strict_off();
+$batch->warnings_off();
+my %bibs;
+my %holdings;
+my $count_raw = 0;
+my $count_copies = 0;
+my $count_901c = 0;
+while ( my $marc = $batch->next ) {
+       $count_raw++;
+       my $field_901c = $marc->subfield(901,"c");
+       if(defined $field_901c) {
+               chomp $field_901c;
+               $count_901c++ if ($field_901c =~ /^.+$/);
+       }
+
+       my @field_852 = $marc->field(852);
+       my %holding_libs;
+       if(@field_852) {
+               FIELD852: foreach my $my852 (@field_852) {
+                       my $my852b = $my852->subfield('b');
+                       next FIELD852 unless (defined $my852b and $my852b =~ /^.+$/);
+                       chomp $my852b;
+                       $holdings{$my852b} = 0 unless exists($holdings{$my852b});
+                       $holdings{$my852b}++;
+                       $holding_libs{$my852b} = 1;
+               }
+       }
+       foreach(keys %holding_libs) {
+               $bibs{$_} = 0 unless exists($bibs{$_});
+               $bibs{$_}++;
+       }
+}
+
+foreach my $lib (sort keys %bibs) {
+       printf("bibs,%s,%d\n",$lib,$bibs{$lib});
+}
+foreach my $lib (sort keys %holdings) {
+       printf("copies,%s,%d\n",$lib,$holdings{$lib});
+       $count_copies += $holdings{$lib};
+}
+
+printf "summary,distinct-bibs-raw,%d\n",$count_raw;
+printf "summary,distinct-bibs-901c,%d\n",$count_901c;
+printf "summary,distinct-copies,%d\n",$count_copies;
+
+
+# vim: ts=2 sw=2
index 82e53ba..42d4ddc 100755 (executable)
@@ -1,5 +1,6 @@
 #!/bin/bash
-files=$(ls *.marc.bz2 *.marc 2>/dev/null |grep -v split |sed 's,.bz2,,g')
+basedir=$(dirname $0)
+files=$(ls *.marc.bz2 *.marc 2>/dev/null |grep -v split |sed 's,\.bz2,,g' | sort | uniq)
 for i in $files; do 
        s=$i.stats
        j=$i.bz2
@@ -15,7 +16,9 @@ for i in $files; do
                echo "Missing $i or $j"
                continue
        fi
-       ls -la $i $j | tee $s
+       echo -n '' >$s
+       $basedir/stats-distribution.pl $i | tee $s-holding-distribution.csv
+       ls -la $i $j | tee -a $s
        sha1sum $i $j | tee -a $s
        marcdump --stats --quiet --noprint $i | tee -a ${s}
        rm -f $i