Final code for MUCN EBSCO upload.
authorRobin H. Johnson <rjohnson@sitka.bclibraries.ca>
Tue, 17 Sep 2013 22:22:06 +0000 (15:22 -0700)
committerRobin H. Johnson <rjohnson@sitka.bclibraries.ca>
Tue, 17 Sep 2013 22:22:06 +0000 (15:22 -0700)
marc_export_custom/email-template-ebsco.sh [new file with mode: 0755]
marc_export_custom/mucn-ebsco-full-export-parallel.sh [new file with mode: 0755]
marc_export_custom/mucn-full-export-parallel-ebsco-wrapper.sh [new file with mode: 0755]
marc_export_custom/mucn_ebsco.ini [new file with mode: 0644]

diff --git a/marc_export_custom/email-template-ebsco.sh b/marc_export_custom/email-template-ebsco.sh
new file mode 100755 (executable)
index 0000000..2e603d1
--- /dev/null
@@ -0,0 +1,52 @@
+#!/bin/bash
+t1=$1
+t1=${t1/T/ }
+t1=${t1/??Z}
+t2=$2
+t2=${t2/T/ }
+t2=${t2/??Z}
+
+if [ -z "$t1" -o -z "$t2" ]; then
+       echo "Usage: $0 'Generate timestamp' 'Submit timestamp'" 1>&2
+fi
+
+cat <<EOF
+Date: $(date -uR)
+From: "Robin H. Johnson" <rjohnson@sitka.bclibraries.ca>
+Subject: UCN EBSCO export $(date +%Y/%m/%d -d "$t1") notification
+To: "Daniel Mertens" <danielmertens@ebsco.com>, "Kathy Ellerton" <kellerton@ucn.ca> 
+Cc: "Robin H. Johnson" <rjohnson@sitka.bclibraries.ca>, "Sharon Herbert" <sherbert@sitka.bclibraries.ca>
+
+Full automated UCN EBSCO export
+
+Please remember to review the shortcodes*csv file for any new shortcodes.
+
+Notes:
+(none, this was automated)
+
+Directory for uploads:
+mucn/$T
+
+Submission timestamp:
+$(date -uR -d "$t2")
+
+Generation timestamp:
+$(date -uR -d "$t1")
+
+Submission method:
+FTP
+
+Compressed sizes in bytes:
+$(head -n2 *stats | awk '/.marc.bz2$/{print $9,$5}')
+
+Uncompressed size in bytes:
+$(head -n2 *stats | awk '/.marc$/{print $9,$5}')
+$(find -name '*.csv' -printf '%f %s\n')
+
+Number of records submitted:
+$(grep -h Filename -A5 $i *stats | awk '/.marc$/{print $3,$1}')
+
+SHA1 checksums:
+$(egrep '^[a-f0-9]{32}' *stats -h |sort -k +2)
+$(sha1sum *.csv)
+EOF
diff --git a/marc_export_custom/mucn-ebsco-full-export-parallel.sh b/marc_export_custom/mucn-ebsco-full-export-parallel.sh
new file mode 100755 (executable)
index 0000000..1aa2211
--- /dev/null
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+DIR=$(readlink -f $(dirname $0))
+#cd /srv/openils/bin
+
+PGUSER=evergreen
+PGHOST=db1.sitka.bclibraries.ca
+PSQL="psql -A -t -U $PGUSER -h $PGHOST"
+DATABASE=evergreen
+MUCN_QUERY="select distinct record FROM asset.call_number where owning_lib in (select distinct id from actor.org_unit_descendants(1400)) and not deleted and label != '##URI##';"
+MARC_PARAMS="--config /srv/openils/conf/opensrf_core.xml --items --location MUCN --collapse_to_depth 2 --timeout 300 --force901 --encoding UTF-8 --onlyholdings --quiet"
+MARC_EXPORT_BIN="${DIR}/marc_export_custom"
+MARC_EXPORT="${MARC_EXPORT_BIN} ${MARC_PARAMS}"
+
+[ -z "${T}" ] && T=$(date -u +%Y%m%dT%H%M%SZ)
+[ -z "${OUTDIR}" ] && OUTDIR=$(mktemp --tmpdir -d outlook_export.XXXXXXX)
+
+process_batch() {
+       local f="$1" sql="$2" split_count="$3"
+       if [ ! -f ${OUTDIR}/${f}-${T}.id ]; then
+               echo "Getting IDs"
+               $PSQL -c "$sql" $DATABASE | sort | uniq >${OUTDIR}/${f}-${T}.id
+       fi
+       if [ ! -f ${OUTDIR}/${f}.id.targets ]; then
+               echo "Splitting work"
+               split_suffix ${OUTDIR}/${f}-${T}.id ${OUTDIR}/${f}-${T}-split. .id $split_count
+               echo "Making targets"
+               find ${OUTDIR} -name "${f}-${T}-split.[0-9]*[0-9].id" \
+               | sed 's,.id$,.marc,g' \
+               >${OUTDIR}/${f}.id.targets
+       fi
+
+       make ${MAKEOPTS} -f Makefile-marc_export \
+       MARC_EXPORT_BIN="${MARC_EXPORT_BIN}" \
+       MARC_PARAMS="$MARC_PARAMS --exclusion_ini ${f}.ini" \
+       $(<${OUTDIR}/${f}.id.targets)
+
+       find ${OUTDIR} -name "${f}-${T}-split*.marc" \
+       -exec cat \{} \; \
+       >${OUTDIR}/${f}-${T}.marc
+}
+
+split_suffix() {
+       local input="$1" output_prefix="$2" output_suffix="$3" split_count="$4"
+       split -a 7 -d -C ${split_count} ${input} ${output_prefix}
+       find $(dirname ${output_prefix}) -name "$(basename ${output_prefix})*" -exec mv -f \{\} \{\}${output_suffix} \;
+}
+
+# media for BC ELN post-secondary libraries
+export SPLIT_COUNT=10000
+F=mucn_ebsco SQL="$MUCN_QUERY"
+process_batch "$F" "${SQL}" $SPLIT_COUNT
diff --git a/marc_export_custom/mucn-full-export-parallel-ebsco-wrapper.sh b/marc_export_custom/mucn-full-export-parallel-ebsco-wrapper.sh
new file mode 100755 (executable)
index 0000000..6e696d9
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/bash
+# This is meant to be fire-and-forget
+# Only use it if you trust the output!
+export DIR=$(readlink -f $(dirname $0))
+if [ -z "${MAKEOPTS}" ]; then
+       echo "MAKEOPTS env variable is not set"
+       echo "If you really want this to be serialized, export MAKEOPTS=-j1"
+       echo "Otherwise set it about 1.5x your number of CPUs!"
+       exit 1
+fi
+[ -z "${T}" ] && export T=$(date -u +%Y%m%dT%H%M%SZ)
+[ -z "${OUTDIR}" ] && export OUTDIR=$(mktemp --tmpdir -d ebsco_export.XXXXXXX)
+cd $OUTDIR
+$DIR/fetch-shortcodes.sh
+cd $DIR
+time ./mucn-ebsco-full-export-parallel.sh
+cd $OUTDIR
+mkdir split
+mv *split* split/
+pbzip2 -9nvk -p4 *{marc,log}
+pbzip2 -9nv -p4 split/*{marc,log}
+$DIR/stats.sh
+cp $DIR/email-template-ebsco.sh $OUTDIR/
+cat >lftp.cmd <<EOF
+open ftp://cat00975a:neJUJ4ez@ftp.epnet.com/
+mkdir -p mucn/$T
+cd mucn/$T
+mput *csv *stats
+mput *bz2
+rels -l
+EOF
+lftp -f lftp.cmd
+chmod +x email-template-ebsco.sh
+./email-template-ebsco.sh "$T" "@$(date +%s)" >email.txt
+/usr/lib/sendmail <email.txt
diff --git a/marc_export_custom/mucn_ebsco.ini b/marc_export_custom/mucn_ebsco.ini
new file mode 100644 (file)
index 0000000..2c97e56
--- /dev/null
@@ -0,0 +1,62 @@
+; The only library we want to include is pathed: SITKA/SPRUCE/MUCN
+
+[SITKA]
+Flags=hidden
+
+; parent_ou=1
+[BBGVL]
+ExcludeEntireOrg=true
+[BBNCLF]
+ExcludeEntireOrg=true
+[BCK]
+ExcludeEntireOrg=true
+[BNCLF]
+ExcludeEntireOrg=true
+[BNELF]
+ExcludeEntireOrg=true
+[GOVLIBS]
+ExcludeEntireOrg=true
+[ISLANDLINK]
+ExcludeEntireOrg=true
+[K12]
+ExcludeEntireOrg=true
+[POSTSECONDARY]
+ExcludeEntireOrg=true
+[SGIC]
+ExcludeEntireOrg=true
+[SPRUCE]
+; included
+[UNFEDERATED]
+ExcludeEntireOrg=true
+
+; parent_ou=1000
+[BOISSEVAIN]
+ExcludeEntireOrg=true
+[MANITOU]
+ExcludeEntireOrg=true
+[MB]
+ExcludeEntireOrg=true
+[MDB]
+ExcludeEntireOrg=true
+[MPLP]
+ExcludeEntireOrg=true
+[MS]
+ExcludeEntireOrg=true
+[MSOG]
+ExcludeEntireOrg=true
+[MSSC]
+ExcludeEntireOrg=true
+[MUCN]
+; This is the only library we want!
+Statuses=Discard/Weed
+[MWPL]
+ExcludeEntireOrg=true
+[SCRL]
+ExcludeEntireOrg=true
+[SIRL]
+ExcludeEntireOrg=true
+[SPECIAL]
+ExcludeEntireOrg=true
+[STEROSE]
+ExcludeEntireOrg=true
+