#FULL_QUERY="select distinct id from biblio.record_entry where not deleted"
FULL_QUERY="select distinct acn.record FROM asset.call_number acn join biblio.record_entry bre on (acn.record = bre.id) where not bre.deleted and bre.active and not acn.deleted"
POSTSEC_QUERY="select distinct record FROM asset.call_number where owning_lib in (select distinct id from actor.org_unit_descendants(27)) and not deleted and label != '##URI##';"
+NONEXPORTED_QUERY="select distinct acn.record FROM asset.call_number acn join biblio.record_entry bre on (acn.record = bre.id) where bre.deleted or not bre.active or acn.deleted"
MARC_PARAMS="--config /srv/openils/conf/opensrf_core.xml --items --location SITKA --collapse_to_depth 2 --timeout 300 --force901 --encoding UTF-8"
MARC_EXPORT_BIN="${DIR}/marc_export_custom"
MARC_EXPORT="${MARC_EXPORT_BIN} ${MARC_PARAMS}"
-SPLIT_COUNT=50000
[ -z "${T}" ] && T=$(date -u +%Y%m%dT%H%M%SZ)
[ -z "${OUTDIR}" ] && OUTDIR=$(mktemp --tmpdir -d outlook_export.XXXXXXX)
process_batch() {
- F="$1" SQL="$2"
- if [ ! -f ${OUTDIR}/${F}-${T}.id ]; then
+ local f="$1" sql="$2" split_count="$3"
+ if [ ! -f ${OUTDIR}/${f}-${T}.id ]; then
echo "Getting IDs"
- $PSQL -c "$SQL" $DATABASE | sort | uniq >${OUTDIR}/${F}-${T}.id
+ $PSQL -c "$sql" $DATABASE | sort | uniq >${OUTDIR}/${f}-${T}.id
fi
- if [ ! -f ${OUTDIR}/${F}.id.targets ]; then
+ if [ ! -f ${OUTDIR}/${f}.id.targets ]; then
echo "Splitting work"
- split_suffix ${OUTDIR}/${F}-${T}.id ${OUTDIR}/${F}-${T}-split. .id
+ split_suffix ${OUTDIR}/${f}-${T}.id ${OUTDIR}/${f}-${T}-split. .id $split_count
echo "Making targets"
- find ${OUTDIR} -name "${F}-${T}-split.[0-9]*[0-9].id" \
+ find ${OUTDIR} -name "${f}-${T}-split.[0-9]*[0-9].id" \
| sed 's,.id$,.marc,g' \
- >${OUTDIR}/${F}.id.targets
+ >${OUTDIR}/${f}.id.targets
fi
make ${MAKEOPTS} -f Makefile-marc_export \
MARC_EXPORT_BIN="${MARC_EXPORT_BIN}" \
- MARC_PARAMS="$MARC_PARAMS --exclusion_ini ${F}.ini" \
- $(<${OUTDIR}/${F}.id.targets)
+ MARC_PARAMS="$MARC_PARAMS --exclusion_ini ${f}.ini" \
+ $(<${OUTDIR}/${f}.id.targets)
- find ${OUTDIR} -name "${F}-${T}-split*.marc" \
+ find ${OUTDIR} -name "${f}-${T}-split*.marc" \
-exec cat \{} \; \
- >${OUTDIR}/${F}-${T}.marc
+ >${OUTDIR}/${f}-${T}.marc
}
split_suffix() {
- INPUT="$1" OUTPUT_PREFIX="$2" OUTPUT_SUFFIX="$3"
- split -a 7 -d -C ${SPLIT_COUNT} ${INPUT} ${OUTPUT_PREFIX}
- find $(dirname ${OUTPUT_PREFIX}) -name "$(basename ${OUTPUT_PREFIX})*" -exec mv -f \{\} \{\}${OUTPUT_SUFFIX} \;
+ local input="$1" output_prefix="$2" output_suffix="$3" split_count="$4"
+ split -a 7 -d -C ${split_count} ${input} ${output_prefix}
+ find $(dirname ${output_prefix}) -name "$(basename ${output_prefix})*" -exec mv -f \{\} \{\}${output_suffix} \;
}
# media for BC ELN post-secondary libraries
+export SPLIT_COUNT=10000
F=bc_eln_media SQL="$POSTSEC_QUERY"
-process_batch "$F" "${SQL}"
+process_batch "$F" "${SQL}" $SPLIT_COUNT
# serials for BC ELN post-secondary libraries
+export SPLIT_COUNT=10000
F=bc_eln_serials SQL="$POSTSEC_QUERY"
-process_batch "$F" "${SQL}"
+process_batch "$F" "${SQL}" $SPLIT_COUNT
+export SPLIT_COUNT=100000
# full dump for outlook public libraries
F=sitka_full_outlook SQL="$FULL_QUERY"
-process_batch "$F" "${SQL}"
+process_batch "$F" "${SQL}" $SPLIT_COUNT