1. 自动收集场景
2. 自动收集原理
这些信息可以通过 “pg_stat_all_tables视图” 查询,也可以通过下面函数进行查询。
pg_stat_get_tuples_inserted --表累积insert条数 pg_stat_get_tuples_updated --表累积update条数 pg_stat_get_tuples_deleted --表累积delete条数 pg_stat_get_tuples_changed --表自上次analyze以来,修改的条数 pg_stat_get_last_analyze_time --查询最近一次analyze时间
因此,根据共享内存中 "表自上次analyze以来修改过的条数" 是否超过一定阈值,就可以判定是否需要做analyze了。
3. 自动收集阈值
3.1 全局阈值
autovacuum_analyze_threshold #表触发analyze的最小修改量 autovacuum_analyze_scale_factor #表触发analyze时的修改百分比
当"表自上次analyze以来修改的条数" >= autovacuum_analyze_threshold + 表估算大小 * autovacuum_analyze_scale_factor时,需要自动触发analyze。
3.2 表级阈值
--设置表级阈值 ALTER TABLE item SET (autovacuum_analyze_threshold=50); ALTER TABLE item SET (autovacuum_analyze_scale_factor=0.1); --查询阈值 postgres=# select pg_options_to_table(reloptions) from pg_class where relname='item'; pg_options_to_table --------------------------------------- (autovacuum_analyze_threshold,50) (autovacuum_analyze_scale_factor,0.1) (2 rows) --重置阈值 ALTER TABLE item RESET (autovacuum_analyze_threshold); ALTER TABLE item RESET (autovacuum_analyze_scale_factor);
3.3 查看表的修改量是否超过了阈值(仅当前CN)
postgres=# select pg_stat_get_local_analyze_status('t_analyze'::regclass); pg_stat_get_local_analyze_status ---------------------------------- Analyze not needed (1 row)
4. 自动收集方式
当查询中存在“统计信息完全缺失”或“修改量达到analyze阈值”的表,且执行计划不采取FQS (Fast Query Shipping)执行时,则通过autoanalyze控制此场景下表统计信息的自动收集。此时,查询语句会等待统计信息收集成功后,生成更优的执行计划,再执行原查询语句。
6.1 冻结表的distinct值
postgres=# alter table lineitem alter l_orderkey set (n_distinct=0.9); ALTER TABLE postgres=# select relname,attname,attoptions from pg_attribute a,pg_class c where c.oid=a.attrelid and attname='l_orderkey'; relname | attname | attoptions ----------+------------+------------------ lineitem | l_orderkey | {n_distinct=0.9} (1 row) postgres=# alter table lineitem alter l_orderkey reset (n_distinct); ALTER TABLE postgres=# select relname,attname,attoptions from pg_attribute a,pg_class c where c.oid=a.attrelid and attname='l_orderkey'; relname | attname | attoptions ----------+------------+------------ lineitem | l_orderkey | (1 row)
6.2. 冻结表的全部统计信息
alter table table_name set frozen_stats=true;
7. 手动查看表是否需要做analyze
a. 不想在业务高峰期时触发数据库后台任务,所以不愿意打开autovacuum来触发analyze,怎么办?
b. 业务修改了一批表,想立即对这些表马上做一次analyze,又不知道都有哪些表,怎么办?
c. 业务高峰来临前想对临近阈值的表都做一次analyze,怎么办?
7.1 判断表是否需要analyze(串行版,适用于所有历史版本)
-- the function for get all pg_stat_activity information in all CN of current cluster. CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text) RETURNS BOOl AS $$ DECLARE row_data record; coor_name record; fet_active text; fetch_coor text; relTuples int4; changedTuples int4:= 0; rel_anl_threshold int4; rel_anl_scale_factor float4; sys_anl_threshold int4; sys_anl_scale_factor float4; anl_threshold int4; anl_scale_factor float4; need_analyze bool := false; BEGIN --Get all the node names fetch_coor := 'SELECT node_name FROM pgxc_node WHERE node_type=''C'''; FOR coor_name IN EXECUTE(fetch_coor) LOOP fet_active := 'EXECUTE DIRECT ON (' || coor_name.node_name || ') ''SELECT pg_stat_get_tuples_changed(oid) from pg_class where relname = ''''|| table_name ||'''';'''; FOR row_data IN EXECUTE(fet_active) LOOP changedTuples = changedTuples + row_data.pg_stat_get_tuples_changed; END LOOP; END LOOP; EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples; EXECUTE 'show autovacuum_analyze_threshold;' into sys_anl_threshold; EXECUTE 'show autovacuum_analyze_scale_factor;' into sys_anl_scale_factor; EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_threshold'') as value from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_threshold; EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_scale_factor'') as value from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_scale_factor; --dbms_output.put_line('relTuples='||relTuples||'; sys_anl_threshold='||sys_anl_threshold||'; sys_anl_scale_factor='||sys_anl_scale_factor||'; rel_anl_threshold='||rel_anl_threshold||'; rel_anl_scale_factor='||rel_anl_scale_factor||';'); if rel_anl_threshold IS NOT NULL then anl_threshold = rel_anl_threshold; else anl_threshold = sys_anl_threshold; end if; if rel_anl_scale_factor IS NOT NULL then anl_scale_factor = rel_anl_scale_factor; else anl_scale_factor = sys_anl_scale_factor; end if; if changedTuples > anl_threshold + anl_scale_factor * relTuples then need_analyze := true; end if; return need_analyze; END; $$ LANGUAGE 'plpgsql';
7.2 判断表是否需要analyze(并行版,适用于支持并行执行框架的版本)
-- the function for get all pg_stat_activity information in all CN of current cluster. --SELECT sum(a) FROM pg_catalog.pgxc_parallel_query('cn', 'SELECT 1::int FROM pg_class LIMIT 10') AS (a int); 利用并发执行框架 CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text) RETURNS BOOl AS $$ DECLARE relTuples int4; changedTuples int4:= 0; rel_anl_threshold int4; rel_anl_scale_factor float4; sys_anl_threshold int4; sys_anl_scale_factor float4; anl_threshold int4; anl_scale_factor float4; need_analyze bool := false; BEGIN --Get all the node names EXECUTE 'SELECT sum(a) FROM pg_catalog.pgxc_parallel_query(''cn'', ''SELECT pg_stat_get_tuples_changed(oid)::int4 from pg_class where relname = ''''|| table_name ||'''';'') AS (a int4);' into changedTuples; EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples; EXECUTE 'show autovacuum_analyze_threshold;' into sys_anl_threshold; EXECUTE 'show autovacuum_analyze_scale_factor;' into sys_anl_scale_factor; EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_threshold'') as value from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_threshold; EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_scale_factor'') as value from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_scale_factor; dbms_output.put_line('relTuples='||relTuples||'; sys_anl_threshold='||sys_anl_threshold||'; sys_anl_scale_factor='||sys_anl_scale_factor||'; rel_anl_threshold='||rel_anl_threshold||'; rel_anl_scale_factor='||rel_anl_scale_factor||';'); if rel_anl_threshold IS NOT NULL then anl_threshold = rel_anl_threshold; else anl_threshold = sys_anl_threshold; end if; if rel_anl_scale_factor IS NOT NULL then anl_scale_factor = rel_anl_scale_factor; else anl_scale_factor = sys_anl_scale_factor; end if; if changedTuples > anl_threshold + anl_scale_factor * relTuples then need_analyze := true; end if; return need_analyze; END; $$ LANGUAGE 'plpgsql';
7.3 判断表是否需要analyze(自定义阈值)
-- the function for get all pg_stat_activity information in all CN of current cluster. CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text, int anl_threshold, float anl_scale_factor) RETURNS BOOl AS $$ DECLARE relTuples int4; changedTuples int4:= 0; need_analyze bool := false; BEGIN --Get all the node names EXECUTE 'SELECT sum(a) FROM pg_catalog.pgxc_parallel_query(''cn'', ''SELECT pg_stat_get_tuples_changed(oid)::int4 from pg_class where relname = ''''|| table_name ||'''';'') AS (a int4);' into changedTuples; EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples; if changedTuples > anl_threshold + anl_scale_factor * relTuples then need_analyze := true; end if; return need_analyze; END; $$ LANGUAGE 'plpgsql';
