add parallel script for BSLW (no holdings, with separate TNRD export)
authorJames Fournie <jfournie@sitka.bclibraries.ca>
Fri, 30 Mar 2012 21:40:23 +0000 (14:40 -0700)
committerJames Fournie <jfournie@sitka.bclibraries.ca>
Fri, 30 Mar 2012 21:40:23 +0000 (14:40 -0700)
marc_export_custom/sitka-full-export-bslw.sh [new file with mode: 0755]

diff --git a/marc_export_custom/sitka-full-export-bslw.sh b/marc_export_custom/sitka-full-export-bslw.sh
new file mode 100755 (executable)
index 0000000..c317a8f
--- /dev/null
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+DIR=`dirname $0`
+#cd /srv/openils/bin
+
+PGUSER=evergreen
+PGHOST=db1.sitka.bclibraries.ca
+PSQL="psql -A -t -U $PGUSER -h $PGHOST"
+DATABASE=evergreen
+FULL_QUERY="select id from biblio.record_entry where not deleted"
+TNRD_QUERY="select distinct record FROM asset.call_number where owning_lib in (select distinct id from actor.org_unit_descendants(50)) and not deleted;"
+MARC_PARAMS="--config /srv/openils/conf/opensrf_core.xml --timeout 300 --force901 --encoding UTF-8"
+MARC_EXPORT_BIN="${DIR}/marc_export_custom"
+MARC_EXPORT="${MARC_EXPORT_BIN} ${MARC_PARAMS}"
+
+[ -z "${T}" ] && T=$(date -u +%Y%m%dT%H%M%SZ)
+[ -z "${OUTDIR}" ] && OUTDIR=$(mktemp --tmpdir -d backstage_export.XXXXXXX)
+
+process_batch() {
+       local f="$1" sql="$2" split_count="$3"
+       if [ ! -f ${OUTDIR}/${f}-${T}.id ]; then
+               echo "Getting IDs"
+               $PSQL -c "$sql" $DATABASE | sort | uniq >${OUTDIR}/${f}-${T}.id
+       fi
+       if [ ! -f ${OUTDIR}/${f}.id.targets ]; then
+               echo "Splitting work"
+               split_suffix ${OUTDIR}/${f}-${T}.id ${OUTDIR}/${f}-${T}-split. .id $split_count
+               echo "Making targets"
+               find ${OUTDIR} -name "${f}-${T}-split.[0-9]*[0-9].id" \
+               | sed 's,.id$,.marc,g' \
+               >${OUTDIR}/${f}.id.targets
+       fi
+
+       make ${MAKEOPTS} -f Makefile-marc_export \
+       MARC_EXPORT_BIN="${MARC_EXPORT_BIN}" \
+       MARC_PARAMS="$MARC_PARAMS" \
+       $(<${OUTDIR}/${f}.id.targets)
+
+       find ${OUTDIR} -name "${f}-${T}-split*.marc" \
+       -exec cat \{} \; \
+       >${OUTDIR}/${f}-${T}.marc
+}
+
+split_suffix() {
+       local input="$1" output_prefix="$2" output_suffix="$3" split_count="$4"
+       split -a 7 -d -C ${split_count} ${input} ${output_prefix}
+       find $(dirname ${output_prefix}) -name "$(basename ${output_prefix})*" -exec mv -f \{\} \{\}${output_suffix} \;
+}
+
+# TNRD dump
+export SPLIT_COUNT=10000
+F=tnrd SQL="$TNRD_QUERY"
+process_batch "$F" "${SQL}" $SPLIT_COUNT
+
+export SPLIT_COUNT=100000
+# full dump for all libraries
+F=sitka_full_backstage SQL="$FULL_QUERY"
+process_batch "$F" "${SQL}" $SPLIT_COUNT
+