After this, I realised that since I tended to do aggregate my reports on a per month basis, per month partitions might help speed things up. Here's a small pgsql script that inserts a row into the proper partition based on the log's timestamp. If the partition doesn't exist, it's auto created. It's not pretty/optimized but it works pretty well.
CREATE OR REPLACE FUNCTION logs_insert_func()
RETURNS TRIGGER AS $$
DECLARE
ourTable varchar;
ourTableExists integer;
ourFirstOfMonth date;
ourFirstOfNextMonth date;
ourInsertSTMT TEXT;
ourCreateSTMT TEXT;
ourMasterTable TEXT;
BEGIN
-- The table we'll inherit from
ourMasterTable := 'logs';
-- Get the partition table names ~ master_year_month
SELECT ourMasterTable|| '_' || EXTRACT(ISOYEAR FROM NEW.log_time) || '_' || EXTRACT(MONTH FROM NEW.log_time) into ourTable;
-- Create our insert statement
ourInsertSTMT := 'INSERT INTO '|| ourTable || ' (status,log_time,svc_time,ip_addr,query) VALUES (';
ourInsertSTMT := ourInsertSTMT|| NEW.status ||',';
ourInsertSTMT := ourInsertSTMT|| quote_nullable(NEW.log_time) ||',';
ourInsertSTMT := ourInsertSTMT|| NEW.svc_time ||',';
ourInsertSTMT := ourInsertSTMT|| quote_nullable(NEW.ip_addr) ||',';
ourInsertSTMT := ourInsertSTMT|| quote_nullable(NEW.query);
ourInsertSTMT := ourInsertSTMT || ')';
--
--Try execute it
EXECUTE ourInsertSTMT;
-- Phew! We didn't Except
RETURN NULL;
EXCEPTION
WHEN OTHERS THEN
-- Insert failed. Let's check whether the table exists
SELECT count(*) into ourTableExists
FROM pg_catalog.pg_class c
WHERE c.relname = ourTable;
-- If it doesn't exist, try create it
IF ourTableExists = 0 THEN
-- First of this month and next month
SELECT date_trunc('month', NEW.log_time) into ourFirstOfMonth;
SELECT (ourFirstOfMonth + interval '1 month')::date into ourFirstOfNextMonth;
-- Create partition with range
ourCreateSTMT := 'CREATE TABLE '|| ourTable || '(';
ourCreateSTMT := ourCreateSTMT ||' CHECK ( log_time >=' || quote_nullable(ourFirstOfMonth) ;
ourCreateSTMT := ourCreateSTMT ||' AND log_time < DATE '|| quote_nullable(ourFirstOfNextMonth) || ')';
ourCreateSTMT := ourCreateSTMT ||') INHERITS (' || ourMasterTable || ')';
RAISE NOTICE 'Attempting to create a new table with STMT %',ourCreateSTMT;
EXECUTE ourCreateSTMT;
-- Retry to insert row
EXECUTE ourInsertSTMT;
IF NOT found THEN
RAISE NOTICE 'Error inserting into created partition % for %',ourTable,ourInsertSTMT;
END IF;
ELSE
RAISE NOTICE 'Error inserting into existing partition % for %',ourTable,ourInsertSTMT;
END IF;
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
The master table:
CREATE TABLE logs
(
id bigserial NOT NULL,
status smallint,
log_time timestamp without time zone,
svc_time real,
ip_addr inet,
query character varying(2048),
CONSTRAINT logs_id PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
* Trigger to be called on each insert
CREATE TRIGGER logs_insert_trigger
BEFORE INSERT
ON logs
FOR EACH ROW
EXECUTE PROCEDURE logs_insert_func();
An example insert auto-creates the relevant partitions:
# insert into logs (status,log_time,svc_time,ip_addr,query) (select status,log_time,svc_time,ip_addr,query from whois_logs);
NOTICE: Attempting to create a new table with STMT CREATE TABLE logs_2009_1( CHECK ( log_time >='2010-01-01' AND log_time < DATE '2010-02-01')) INHERITS (logs)
NOTICE: Attempting to create a new table with STMT CREATE TABLE logs_2009_5( CHECK ( log_time >='2009-05-01' AND log_time < DATE '2009-06-01')) INHERITS (logs)
NOTICE: Attempting to create a new table with STMT CREATE TABLE logs_2006_9( CHECK ( log_time >='2006-09-01' AND log_time < DATE '2006-10-01')) INHERITS (logs)
Works for me. Yay!
No comments:
Post a Comment