Skip to content

Instantly share code, notes, and snippets.

@rdyv
Last active April 1, 2024 19:16
Show Gist options
  • Save rdyv/e2d8f9f6ebecc69f608e77303fc9e474 to your computer and use it in GitHub Desktop.
Save rdyv/e2d8f9f6ebecc69f608e77303fc9e474 to your computer and use it in GitHub Desktop.
Postgres Cheatsheet
/*
***********************************************
psql -h host -U user db: Connect to database
psql -E: Describe the underlaying queries of the \ commands
psql -l: List all database and exit
\q: Quit
\c db_name: Switch database
\l: Show databases
\d table_name: Show table info
\d+: Show extended table info
\x: Toggle expanded display
\du: List users and their privilages
***********************************************
*/
-----------------------------------------------
-- Locks
-----------------------------------------------
-- See all the locks
SELECT t.relname, l.locktype, page, virtualtransaction, pid, mode, granted
FROM pg_locks l, pg_stat_all_tables t
WHERE l.relation = t.relid
ORDER BY relation ASC;
-- See all the locks with queries
SELECT activity.client_addr, activity.pid, activity.usename, activity.query, blocking.pid AS blocking_id, blocking.query AS blocking_query
FROM pg_stat_activity AS activity
JOIN pg_stat_activity AS blocking ON blocking.pid = ANY(pg_blocking_pids(activity.pid));
-- See all the locks with queries and lock age
SELECT a.datname, l.relation::regclass, l.transactionid, l.mode, l.GRANTED, a.usename, a.query, a.query_start, age(now(), a.query_start) AS "age", a.pid
FROM pg_stat_activity a
JOIN pg_locks l ON l.pid = a.pid
ORDER BY a.query_start;
-- Get queries by PID
select l.pid, l.mode, sa.pid, sa.query
from pg_locks l
inner join pg_stat_activity sa
on l.pid = sa.pid
where l.mode like '%xclusive%';
-----------------------------------------------
-- Indexes
-----------------------------------------------
-- Create index in production without exclusively lock table
CREATE INDEX CONCURRENTLY idx_table_pid ON table(pid);
-- Create GIN index over
-- gin_trgm_ops: Index using trigrams over selected columns. A trigram is a DS that hold 3 letters of a word. Essentially, Postgres will break down each text column down into trigrams and use that in the index when we search against it.
-- Caveat: Input query must be at least 3 letters, as Postgres will need to be able to extract at least one trigram from the input query in order to use our trigram index.
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX lol_idx ON table_name
USING gin (col_1 gin_trgm_ops, col_2 gin_trgm_ops);
-----------------------------------------------
-- Operations
-----------------------------------------------
-- kill running query
SELECT pg_cancel_backend(procpid);
-- kill idle query
SELECT pg_terminate_backend(procpid);
-- Garbage-collect and optionally analyze a database
VACUUM (VERBOSE, ANALYZE);
-- Dump database on remote host to file
$ pg_dump -U username -h hostname databasename > dump.sql
-- Import dump into existing database
$ psql -d newdb -f dump.sql
-----------------------------------------------
-- Stats
-----------------------------------------------
-- Get Vacuum stats
SELECT relname,last_vacuum, last_autovacuum, last_analyze, last_autoanalyze
FROM pg_stat_user_tables;
-- All databases with sizes w/ or w/o indexes
SELECT datname, pg_size_pretty(pg_database_size(datname))
FROM pg_database
ORDER BY pg_database_size(datname) DESC;
-- All tables with sizes with indexes
SELECT nspname || '.' || relname AS "relation", pg_size_pretty(pg_total_relation_size(C.oid)) AS "total_size"
FROM pg_class C
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE nspname NOT IN ('pg_catalog', 'information_schema') AND C.relkind <> 'i' AND nspname !~ '^pg_toast'
ORDER BY pg_total_relation_size(C.oid) DESC;
-- Cache hit ratio for tables
SELECT sum(heap_blks_read) as heap_read,
sum(heap_blks_hit) as heap_hit,
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
FROM pg_statio_user_tables;
-- Cache hit ratio for indexes (Should be ~99%)
SELECT sum(idx_blks_read) as idx_read,
sum(idx_blks_hit) as idx_hit,
(sum(idx_blks_hit) - sum(idx_blks_read)) / sum(idx_blks_hit) as ratio
FROM pg_statio_user_indexes;
-- How much a table is utilizing its indexes (Should be ~99% for tables with >10k rows)
SELECT relname, 100 * idx_scan / (seq_scan + idx_scan) percent_of_times_index_used, n_live_tup rows_in_table
FROM pg_stat_user_tables
WHERE seq_scan + idx_scan > 0
ORDER BY n_live_tup DESC;
-- Long running queries (>2m)
SELECT now() - query_start as "runtime", usename, datname, client_addr, client_hostname, wait_event, state, query
FROM pg_stat_activity
WHERE now() - query_start > '2 minutes'::interval
ORDER BY runtime DESC;
-----------------------------------------------
-- Random
-----------------------------------------------
-- Estimated Row count of table, when table becomes too large to execute count(*)
SELECT schemaname,relname,n_live_tup
FROM pg_stat_user_tables WHERE relname ='table_name'
-- Generate a million rows of random data
SELECT md5(random()::text), md5(random()::text)
FROM (SELECT *
FROM generate_series(1,1000000) AS id) AS x;
-- Compare timestamp column by date string
SELECT *
FROM table
WHERE timestamp_column::date = date '2015-07-15';
-- Compare timestamp column by time string
SELECT *
FROM table
WHERE timestamp_column >= timestamp '2015-07-15 00:00:00'
-- Is current postgres is master?
SELECT pg_is_in_recovery()
-- If it's true, you're on a slave. If false, master.
-- Replication lag on slave
SELECT now() - pg_last_xact_replay_timestamp() AS replication_delay;
-- Get query timeout
SHOW statement_timeout;
-- Get query by PID
SELECT pid, usename AS username, datname AS database_name, query, application_name, backend_start, state, state_change
FROM pg_stat_activity
WHERE pid = 'chosen_session';
-----------------------------------------------
-- Curruption
-----------------------------------------------
-- Can be caused by deferred fsync, network attacked storage, hardware issues, Soft-RAIDS edge conditions.
-- Storage is cheap, have enough storage space to make a full file-system level copy.
-- Backup postgres
-- What only exists on one drive you do not truly possess.
-- An untested backup strategy isn’t one, Test the backups.
pg_start_backup()
-- Kill a postgres child pid
pg_terminate_backend(PID)
-- Check consistency
-- Reads every single row in the database. Great for finding lurking corruption.
-- pgstattuple extension can also be used.
pg_dump > /dev/null
-- Index corruption
-- The most common kind of corruption.
-- Drop the index in a transaction, and confirm that solves the problem. If so, rebuild the index.
-- If not, it’s probably not index corruption.
-- Does not repair corruption; just rebuild the index.
-- Bad data pages
-- Checksum failures, complaints about bad headers, etc.
-- Can you do a pg_dump of the table? Reads every row, output should be clean.
-- zero_damaged_pages = on
-- Can you SELECT around them?
-- Do a COPY out of the good data, drop table, COPY back in.
-- Or do a CREATE TABLE from the SELECT, rename appropriately.
-- DELETE just the bad rows by ctid, if you can isolate them.
-- In case of toast error
-- e.g. unexpected chunk number 0 (expected 1) for toast value 123 in pg_toast_456
-- Check which relation this chunk belongs to
SELECT 456::regclass;
-- Get currupted chunk table for toast id
SELECT 456::regclass from pg_class where relname = 'mytable';
-- Then repair this chunk
REINDEX table pg_toast.pg_toast_456
-- Or, possibly reindex the whole table
REINDEX table mytable
-- If autovacuum is not enabled, run
VACUUM ANALYZE mytable;
-- In some cases, this is not enough. Apply binary search to find corrupt chunk.
SELECT count(*) FROM mytable;
SELECT * FROM mytable ORDER BY id LIMIT 5000 OFFSET 0;
SELECT * FROM mytable ORDER BY id LIMIT 5000 OFFSET 5000;
-- Go on until we receive 'Missing chunk...' error. Use binary search to get the corrupt row and delete by id.
-- Here is script to automated this. Corrupted segment should ideally be at the end of the table.
/*
#!/bin/bash
j=0
while [ $j -lt 1000 ]; do //here the total number of table rows
psql -U pgsql -d mydatabase -c "SELECT * FROM mytable LIMIT 1 offset $j" >/dev/null || echo $j
j=$($j+1)
done
*/
-- Read dead tuples.
SELECT relname, n_dead_tup FROM pg_stat_user_tables;
-- Table Disk usage
SELECT relname AS "table_name", pg_size_pretty(pg_table_size(C.oid)) AS "table_size"
FROM pg_class C
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE nspname NOT IN ('pg_catalog', 'information_schema') AND nspname !~ '^pg_toast' AND relkind IN ('r')
ORDER BY pg_table_size(C.oid)
DESC LIMIT 1;
-- Last time autovacuum ran.
SELECT relname, last_vacuum, last_autovacuum FROM pg_stat_user_tables;
-- Check if autovacuum is enabled.
SELECT name, setting FROM pg_settings WHERE name='autovacuum';
SELECT * from pg_settings where category like 'Autovacuum';
-- Session states
SELECT xact_start, state, usename FROM pg_stat_activity;
-- Transactions ids left before a wraparound by autovacuum.
SELECT oid::regclass::text AS table, age(relfrozenxid) AS xid_age, mxid_age(relminmxid) AS mxid_age, least(
(SELECT setting::int
FROM pg_settings WHERE name = 'autovacuum_freeze_max_age') - age(relfrozenxid),
(SELECT setting::int
FROM pg_settings WHERE name = 'autovacuum_multixact_freeze_max_age') - mxid_age(relminmxid)
) AS tx_before_wraparound_vacuum, pg_size_pretty(pg_total_relation_size(oid)) AS size, pg_stat_get_last_autovacuum_time(oid) AS last_autovacuum
FROM pg_class
WHERE relfrozenxid != 0 AND oid > 16384 ORDER BY tx_before_wraparound_vacuum;
-----------------------------------------------
-- Tools
-----------------------------------------------
-- pg_hexedit: Experimental hex editor toolkit, can be used for low-level analysis of PostgreSQL relation files.
-- It should only be run against a copy of the database.
-- pg_visibility: Examining the visibility map (VM) and page-level visibility information of a table.
-- It also provides functions to check the integrity of a visibility map and to force it to be rebuilt. For postgres 9.6+.
-- amcheck: Verify the logical consistency of the structure of indexes.
-- If the structure appears to be valid, no error is raised. For postgres 10+.
-- For postgres pre-10: petergeoghegan/amcheck
-----------------------------------------------
-- Links
-----------------------------------------------
-- PGConf-2017 Corruption: https://thebuild.com/presentations/corruption-pgconf-2017.pdf
-- [Table bloat](https://github.com/ioguix/pgsql-bloat-estimation/blob/master/table/table_bloat.sql)
-- [Index bloat](https://github.com/ioguix/pgsql-bloat-estimation/blob/master/btree/btree_bloat.sql)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment