From e1bdb102846f56e4296f8a3ed6793485f25c54da Mon Sep 17 00:00:00 2001 From: Andrew Atkinson Date: Fri, 6 Jun 2025 15:09:00 -0500 Subject: [PATCH] Report on cardinality for columns for all tables Show n_distinct, show estimated rows Exclude non-app db schema --- high_cardinality_columns.sql | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 high_cardinality_columns.sql diff --git a/high_cardinality_columns.sql b/high_cardinality_columns.sql new file mode 100644 index 0000000..35147af --- /dev/null +++ b/high_cardinality_columns.sql @@ -0,0 +1,23 @@ +-- run "analyze" to analyze all tables +SELECT + schemaname, + tablename, + attname AS column, + n_distinct, + CASE WHEN n_distinct >= 0 THEN + n_distinct::text + ELSE + ('~' || (- n_distinct * rel.reltuples)) -- negative means "estimated fraction of table" + END AS estimated_distinct, + rel.reltuples::numeric AS estimated_rows +FROM + pg_stats s + JOIN pg_class rel ON rel.relname = s.tablename + JOIN pg_namespace nsp ON nsp.oid = rel.relnamespace + AND nsp.nspname = s.schemaname +WHERE + n_distinct IS NOT NULL +AND schemaname NOT IN ('pg_catalog','information_schema') +ORDER BY + ABS(n_distinct) DESC +LIMIT 10;