Avoid dupes in input ids, add outlook publics.
authorRobin H. Johnson <rjohnson@sitka.bclibraries.ca>
Fri, 7 Oct 2011 20:54:25 +0000 (13:54 -0700)
committerRobin H. Johnson <rjohnson@sitka.bclibraries.ca>
Fri, 7 Oct 2011 20:57:43 +0000 (13:57 -0700)
marc_export_custom/sitka-full-export-parallel.sh

index a26d4f5..cd89c8c 100755 (executable)
@@ -15,7 +15,6 @@ MARC_EXPORT_BIN="${DIR}/marc_export_custom"
 MARC_EXPORT="${MARC_EXPORT_BIN} ${MARC_PARAMS}"
 SPLIT_COUNT=50000
 
 MARC_EXPORT="${MARC_EXPORT_BIN} ${MARC_PARAMS}"
 SPLIT_COUNT=50000
 
-
 [ -z "${T}" ] && T=$(date -u +%Y%m%dT%H%M%SZ)
 [ -z "${OUTDIR}" ] && OUTDIR=$(mktemp --tmpdir -d outlook_export.XXXXXXX)
 
 [ -z "${T}" ] && T=$(date -u +%Y%m%dT%H%M%SZ)
 [ -z "${OUTDIR}" ] && OUTDIR=$(mktemp --tmpdir -d outlook_export.XXXXXXX)
 
@@ -23,7 +22,7 @@ process_batch() {
        F="$1" SQL="$2"
        if [ ! -f ${OUTDIR}/${F}-${T}.id ]; then
                echo "Getting IDs"
        F="$1" SQL="$2"
        if [ ! -f ${OUTDIR}/${F}-${T}.id ]; then
                echo "Getting IDs"
-               $PSQL -c "$SQL" $DATABASE >${OUTDIR}/${F}-${T}.id
+               $PSQL -c "$SQL" $DATABASE | sort | uniq >${OUTDIR}/${F}-${T}.id
        fi
        if [ ! -f ${OUTDIR}/${F}.id.targets ]; then
                echo "Splitting work"
        fi
        if [ ! -f ${OUTDIR}/${F}.id.targets ]; then
                echo "Splitting work"
@@ -50,14 +49,15 @@ split_suffix() {
        find $(dirname ${OUTPUT_PREFIX}) -name "$(basename ${OUTPUT_PREFIX})*" -exec mv -f \{\} \{\}${OUTPUT_SUFFIX} \;
 }
 
        find $(dirname ${OUTPUT_PREFIX}) -name "$(basename ${OUTPUT_PREFIX})*" -exec mv -f \{\} \{\}${OUTPUT_SUFFIX} \;
 }
 
-# full dump for outlook public libraries
-F=sitka_full_outlook SQL="$FULL_QUERY"
-#process_batch "$F" "${SQL}"
-
 # media for BC ELN post-secondary libraries
 F=bc_eln_media SQL="$POSTSEC_QUERY"
 # media for BC ELN post-secondary libraries
 F=bc_eln_media SQL="$POSTSEC_QUERY"
-#process_batch "$F" "${SQL}"
+process_batch "$F" "${SQL}"
 
 # serials for BC ELN post-secondary libraries
 F=bc_eln_serials SQL="$POSTSEC_QUERY"
 process_batch "$F" "${SQL}"
 
 # serials for BC ELN post-secondary libraries
 F=bc_eln_serials SQL="$POSTSEC_QUERY"
 process_batch "$F" "${SQL}"
+
+# full dump for outlook public libraries
+F=sitka_full_outlook SQL="$FULL_QUERY"
+process_batch "$F" "${SQL}"
+