/* * age-circs.sql - a script to age/anonymize patrons' circ history. * * Copyright (C) 2008 Equinox Software, Inc. * Copyright (C) 2014 BC Libraries Cooperative * Author: Jeff Davis * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ BEGIN; \echo 'utility function for testing interval' CREATE OR REPLACE FUNCTION sitka.is_valid_interval(TEXT) RETURNS BOOL AS $$ BEGIN RETURN CASE WHEN $1::INTERVAL IS NULL THEN FALSE ELSE TRUE END; EXCEPTION WHEN OTHERS THEN RETURN FALSE; END; $$ LANGUAGE plpgsql; \echo 'exception matrix table' CREATE TABLE IF NOT EXISTS sitka.age_circ_exception_matrix ( id SERIAL NOT NULL, active BOOLEAN NOT NULL DEFAULT TRUE, org_unit INT REFERENCES actor.org_unit (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, grp INT REFERENCES permission.grp_tree (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED ); \echo 'function for finding exceptions' CREATE OR REPLACE FUNCTION sitka.has_age_circ_exception(circ_id BIGINT) RETURNS BOOL AS $$ DECLARE potential_aged_circ action.circulation%ROWTYPE; usr actor.usr%ROWTYPE; matching_exceptions INT; BEGIN matching_exceptions := 0; -- TODO: should we match exceptions against the entire circ chain? SELECT * INTO potential_aged_circ FROM action.circulation WHERE id = circ_id; SELECT * INTO usr FROM actor.usr WHERE id = potential_aged_circ.usr; SELECT count(*) INTO matching_exceptions FROM sitka.age_circ_exception_matrix m LEFT JOIN permission.grp_ancestors_distance( usr.profile ) upgad ON m.grp = upgad.id LEFT JOIN actor.org_unit_ancestors_distance( potential_aged_circ.circ_lib ) ctoua ON m.org_unit = ctoua.id WHERE m.active AND (m.grp IS NULL OR upgad.id IS NOT NULL) AND (m.org_unit IS NULL OR ctoua.id IS NOT NULL); IF matching_exceptions > 0 THEN RETURN TRUE; END IF; RETURN FALSE; END; $$ LANGUAGE plpgsql; -- function for testing whether circ should be aged CREATE OR REPLACE FUNCTION sitka.age_circ_test(circ_id BIGINT, cutoff DATE, skip_basic_tests BOOL DEFAULT FALSE) RETURNS BOOLEAN AS $$ DECLARE potential_aged_circ action.circulation%ROWTYPE; balance NUMERIC; circ_chain_finish TIMESTAMP WITH TIME ZONE; usr_keep_start TEXT; usr_keep_age TEXT; usr_keep_date DATE; circ_cutoff DATE; has_exception BOOL; BEGIN -- use the cutoff param as the default cutoff date for the current circ circ_cutoff := cutoff; -- grab our circ SELECT * INTO potential_aged_circ FROM action.circulation WHERE id = circ_id; -- When you are batch-deleting old circs, you would normally gather a list of -- closed circs with no outstanding balance, and then pass each of those -- circs through the age_circ_test function to check for further exceptions. -- In that situation, we don't want to waste time and processing power -- re-checking whether the circs are closed or have outstanding balances, -- so we provide an option to skip those basic tests. IF skip_basic_tests IS FALSE THEN IF potential_aged_circ.xact_finish IS NULL THEN RETURN FALSE; END IF; SELECT balance_owed INTO balance FROM money.materialized_billable_xact_summary WHERE id = circ_id; IF balance != 0 THEN RETURN FALSE; END IF; END IF; -- renewals create a "chain" of circs; find the last circ in the current chain SELECT MAX(xact_finish) INTO circ_chain_finish FROM action.circ_chain(potential_aged_circ.id); -- user's history.circ.retention_start setting, if any, overrides default cutoff date SELECT value INTO usr_keep_start FROM actor.usr_setting WHERE usr = potential_aged_circ.usr AND name = 'history.circ.retention_start'; IF usr_keep_start IS NOT NULL THEN circ_cutoff := oils_json_to_text(usr_keep_start)::DATE; END IF; -- trim cutoff to user's history.circ.retention_age, if set SELECT oils_json_to_text(value) INTO usr_keep_age FROM actor.usr_setting WHERE usr = potential_aged_circ.usr AND name = 'history.circ.retention_age'; IF usr_keep_age IS NOT NULL AND sitka.is_valid_interval(usr_keep_age) IS TRUE THEN SELECT (now() - usr_keep_age::INTERVAL)::DATE INTO usr_keep_date; IF usr_keep_date IS NOT NULL AND usr_keep_date < circ_cutoff THEN circ_cutoff := usr_keep_date; END IF; END IF; -- don't age the chain if the last circ is later than the cutoff IF circ_chain_finish IS NULL OR circ_chain_finish > circ_cutoff THEN RETURN FALSE; END IF; -- don't age the circ if it matches a defined exception has_exception := FALSE; SELECT * INTO has_exception FROM sitka.has_age_circ_exception(potential_aged_circ.id); IF has_exception IS TRUE THEN RETURN FALSE; END IF; -- this circ passes all our tests RETURN TRUE; EXCEPTION WHEN OTHERS THEN RAISE WARNING 'Cannot age circ %, error during age circ test', circ_id; RETURN FALSE; END; $$ LANGUAGE plpgsql; \echo 'table for aged circs' CREATE TABLE IF NOT EXISTS sitka.aged_circs ( id BIGINT NOT NULL, delete_date DATE NOT NULL ); \echo 'table for aged circ count' CREATE TABLE IF NOT EXISTS sitka.aged_circs_by_user ( usr BIGINT NOT NULL, month INT NOT NULL, year INT NOT NULL, count INT NOT NULL DEFAULT 0, UNIQUE (usr, month, year) ); CREATE OR REPLACE FUNCTION sitka.age_circs(cutoff DATE, max_circ_count BIGINT) RETURNS INT AS $$ DECLARE today DATE; potential_aged_circ BIGINT; can_age_circ BOOL; circs_aged INT; BEGIN today := now()::DATE; -- TODO: more strictness here? CREATE TEMPORARY TABLE circs_to_age AS SELECT c.id FROM action.circulation c JOIN money.materialized_billable_xact_summary m ON c.id = m.id WHERE c.xact_finish IS NOT NULL AND c.xact_finish < cutoff AND m.balance_owed = 0 ORDER BY c.xact_start LIMIT max_circ_count; -- for each circ that we might want to age... FOR potential_aged_circ IN SELECT id FROM circs_to_age LOOP can_age_circ := NULL; -- test whether we can age each circ SELECT * INTO can_age_circ FROM sitka.age_circ_test(potential_aged_circ, cutoff, TRUE); IF can_age_circ IS FALSE OR can_age_circ IS NULL THEN DELETE FROM circs_to_age WHERE id = potential_aged_circ; END IF; END LOOP; -- update user circ counts CREATE TEMPORARY TABLE usr_circ_totals AS SELECT a.usr, (EXTRACT(month FROM a.xact_start)) AS month, (EXTRACT(year FROM a.xact_start)) AS year, count(*) AS fresh_circs FROM action.circulation a JOIN circs_to_age b ON a.id = b.id GROUP BY 1,2,3; UPDATE sitka.aged_circs_by_user a SET count = count + b.fresh_circs FROM usr_circ_totals b WHERE a.usr = b.usr AND a.month = b.month AND a.year = b.year; INSERT INTO sitka.aged_circs_by_user (usr, month, year, count) SELECT usr, month, year, fresh_circs FROM usr_circ_totals WHERE (usr, month, year) NOT IN (SELECT usr, month, year FROM sitka.aged_circs_by_user); DROP TABLE usr_circ_totals; -- old circs may have a null copy_location, but action.aged_circulation doesn't allow that UPDATE action.circulation a SET copy_location = 1 FROM circs_to_age b WHERE a.id = b.id AND a.copy_location IS NULL; -- this will set off the age_circs trigger on action.circulation DELETE FROM action.circulation WHERE id IN (SELECT id FROM circs_to_age); UPDATE action.aged_circulation SET usr_post_code = NULL, usr_birth_year = NULL WHERE usr_post_code IS NOT NULL OR usr_birth_year IS NOT NULL; INSERT INTO sitka.aged_circs (id, delete_date) SELECT id, today FROM circs_to_age; SELECT count(*) INTO circs_aged FROM circs_to_age; DROP TABLE circs_to_age; RETURN circs_aged; END; $$ LANGUAGE plpgsql; SAVEPOINT ready; ---- To actually age circs, do something like the following: --SELECT count(*) FROM action.aged_circulation; --SELECT * FROM sitka.age_circs( (now() - '2 years'::INTERVAL)::DATE, NULL ); --SELECT count(*) FROM action.aged_circulation; -- --\echo 'SAVEPOINT done:' --SAVEPOINT done; -- --COMMIT;