Skip to content

Instantly share code, notes, and snippets.

@cabecada
Last active October 7, 2024 16:41
Show Gist options
  • Save cabecada/feccb0c7e4303e8da40833ccf47132c0 to your computer and use it in GitHub Desktop.
Save cabecada/feccb0c7e4303e8da40833ccf47132c0 to your computer and use it in GitHub Desktop.
simulate wraparound and recovery using xid_wraparound extension
#this is mimicking https://github.com/postgres/postgres/blob/a68159ff2b32f290b1136e2940470d50b8491301/src/test/modules/xid_wraparound/t/002_limits.pl
#but many times users dont look at source code, so these examples are missed
```
./configure --prefix=/opt/postgresql --with-openssl --enable-debug --enable-profiling --enable-cassert --enable-tap-tests CFLAGS="-ggdb -Og -g3 -fno-omit-frame-pointer -DLOCK_DEBUG -DBTREE_BUILD_STATS -DWAL_DEBUG "
make
cd /home/ubuntu/github/postgres/src/test/modules/xid_wraparound
make check PG_TEST_EXTRA='xid_wraparound'
sudo make install
sudo chown -R postgres:postgres /opt/postgresql
sudo chown -R 744 /opt/postgresql
```
#we are trying to mock wraparound using extension xid_wraparound and then fake consume xids
#once the wraparound triggers, forcing the db into readonly mode, we vacuum full the dbs to get the db back to writable mode
#things can get quite complicated in real world as these limits are hit when resources consumption are already too high
#but the point of this was to know what it means for alerts and logs and let developers know open tx and not closing them is a problem
# we ensure to keep console quiet
postgres@ubuntu:/tmp$ grep message db1/postgresql.auto.conf
log_min_messages = error
client_min_messages = error
#start the db
postgres@ubuntu:/tmp$ p1 start
waiting for server to start.... done
server started
postgres@ubuntu:/tmp$ psql
psql (18devel)
Type "help" for help.
postgres=# create extension xid_wraparound;
CREATE EXTENSION
-- with autovacuum_enabled = off, we ensure autovacuum is not triggered unless we hit the limits
postgres=# CREATE TABLE wraparoundtest(t text) WITH (autovacuum_enabled = off);
CREATE TABLE
postgres=# INSERT INTO wraparoundtest values ('start');
INSERT 0 1
postgres=# -- Start a background session, which holds a transaction open, preventing autovacuum from advancing relfrozenxid and datfrozenxid.
postgres=# \! psql -c 'BEGIN; INSERT INTO wraparoundtest VALUES ($$oldxact$$); select pg_sleep(10000);' &
-- the above tx never closes, so that insert value is not visible
postgres=# select * from wraparoundtest;
t
-------
start
(1 row)
postgres=# -- consume 1 billion transactions while open tx above
postgres=# SELECT consume_xids(1000000000); -- i have reduced log vebosity to error to keep console quiet
consume_xids
--------------
1000000743
(1 row)
postgres=# INSERT INTO wraparoundtest values ('after 1 billion');
INSERT 0 1
postgres=# -- consume 1 billion transactions more while open tx above
postgres=# SELECT consume_xids(1000000000); -- i have reduced log vebosity to error to keep console quiet
consume_xids
--------------
2000000756
(1 row)
postgres=# INSERT INTO wraparoundtest values ('after 2 billion');
INSERT 0 1
postgres=# -- We are now just under 150 million XIDs away from wraparound, Continue consuming XIDs, in batches of 10 million, until we get the warning:
postgres=# /*
postgres*# # WARNING: database "postgres" must be vacuumed within 3000024 transactions
# HINT: To avoid a database shutdown, execute a database-wide VACUUM in that database.
# You might also need to commit or roll back old prepared transactions, or drop stale replication slots.
postgres*# */
postgres-# ;
postgres=# select consume_xids(20000000);
consume_xids
--------------
2020000759
(1 row)
postgres=# select consume_xids(100000000);
consume_xids
--------------
2120000760
(1 row)
postgres=# select consume_xids(100000000); -- we will now see the warning
ERROR: database is not accepting commands that assign new transaction IDs to avoid wraparound data loss in database "postgres"
HINT: Execute a database-wide VACUUM in that database.
You might also need to commit or roll back old prepared transactions, or drop stale replication slots.
postgres=# -- now our db will not allow any new transactions
postgres=# INSERT INTO wraparoundtest values ('after wraparound'); -- this will fail and no more txid will be generated
ERROR: database is not accepting commands that assign new transaction IDs to avoid wraparound data loss in database "postgres"
HINT: Execute a database-wide VACUUM in that database.
You might also need to commit or roll back old prepared transactions, or drop stale replication slots.
postgres=# select * from wraparoundtest;
t
-----------------
start
after 1 billion
after 2 billion
(3 rows)
postgres=# \x
Expanded display is on.
postgres=# select * from pg_stat_activity where query ~ 'sleep' and pid <> pg_backend_pid();
-[ RECORD 1 ]----+--------------------------------------------------------------------------------
datid | 5
datname | postgres
pid | 14423
leader_pid |
usesysid | 10
usename | postgres
application_name | psql
client_addr |
client_hostname |
client_port | -1
backend_start | 2024-10-07 16:05:21.812371+00
xact_start | 2024-10-07 16:05:21.814932+00
query_start | 2024-10-07 16:05:21.814932+00
state_change | 2024-10-07 16:05:21.814936+00
wait_event_type | Timeout
wait_event | PgSleep
state | active
backend_xid | 742
backend_xmin | 742
query_id |
query | BEGIN; INSERT INTO wraparoundtest VALUES ($$oldxact$$); select pg_sleep(10000);
backend_type | client backend
-- our tx age is well beyond the setting we have in conf
postgres=# \x
Expanded display is off.
postgres=# SELECT datname
, age(datfrozenxid)
, current_setting('autovacuum_freeze_max_age')
FROM pg_database
ORDER BY 2 DESC;
datname | age | current_setting
-----------+------------+-----------------
postgres | 2144484419 | 200000000
template1 | 2144484419 | 200000000
template0 | 2144484419 | 200000000
(3 rows)
-- some standard queries that confirm the same
postgres=# WITH max_age AS (
SELECT 2000000000 as max_old_xid
, setting AS autovacuum_freeze_max_age
FROM pg_catalog.pg_settings
WHERE name = 'autovacuum_freeze_max_age' )
, per_database_stats AS (
SELECT datname
, m.max_old_xid::int
, m.autovacuum_freeze_max_age::int
, age(d.datfrozenxid) AS oldest_current_xid
FROM pg_catalog.pg_database d
JOIN max_age m ON (true)
WHERE d.datallowconn )
SELECT max(oldest_current_xid) AS oldest_current_xid
, max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound
, max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac
FROM per_database_stats;
oldest_current_xid | percent_towards_wraparound | percent_towards_emergency_autovac
--------------------+----------------------------+-----------------------------------
2144484419 | 107 | 1072
(1 row)
postgres=# select pg_cancel_backend(14423);
BEGIN
INSERT 0 1
ERROR: canceling statement due to user request
pg_cancel_backend
-------------------
t
(1 row)
postgres=# WITH max_age AS (
SELECT 2000000000 as max_old_xid
, setting AS autovacuum_freeze_max_age
FROM pg_catalog.pg_settings
WHERE name = 'autovacuum_freeze_max_age' )
, per_database_stats AS (
SELECT datname
, m.max_old_xid::int
, m.autovacuum_freeze_max_age::int
, age(d.datfrozenxid) AS oldest_current_xid
FROM pg_catalog.pg_database d
JOIN max_age m ON (true)
WHERE d.datallowconn )
SELECT max(oldest_current_xid) AS oldest_current_xid
, max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound
, max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac
FROM per_database_stats;
oldest_current_xid | percent_towards_wraparound | percent_towards_emergency_autovac
--------------------+----------------------------+-----------------------------------
2144484419 | 107 | 1072
(1 row)
postgres=# \x
Expanded display is on.
postgres=# select * from pg_stat_activity where query ~ 'sleep' and pid <> pg_backend_pid();
(0 rows)
postgres=# -- we now have to run vacuum full in single user mode on the dbs to make it allow writes again
postgres=#
\q
postgres@ubuntu:/tmp$ p1 stop
waiting for server to shut down.... done
server stopped
# running in single user mode
postgres@ubuntu:/tmp$ postgres --single -D db1 postgres
PostgreSQL stand-alone backend 18devel
backend> vacuum full;
postgres@ubuntu:/tmp$ postgres --single -D db1 template1
PostgreSQL stand-alone backend 18devel
backend> vacuum full;
postgres@ubuntu:/tmp$ postgres --single -D db1 template0
PostgreSQL stand-alone backend 18devel
backend> vacuum full;
#now we start the database and run the same monitoring queries to ensure no more past transactions
postgres@ubuntu:/tmp$ p1 start
waiting for server to start.... done
server started
postgres@ubuntu:/tmp$ psql
psql (18devel)
Type "help" for help.
postgres=# WITH max_age AS (
SELECT 2000000000 as max_old_xid
, setting AS autovacuum_freeze_max_age
FROM pg_catalog.pg_settings
WHERE name = 'autovacuum_freeze_max_age' )
, per_database_stats AS (
SELECT datname
, m.max_old_xid::int
, m.autovacuum_freeze_max_age::int
, age(d.datfrozenxid) AS oldest_current_xid
FROM pg_catalog.pg_database d
JOIN max_age m ON (true)
WHERE d.datallowconn )
SELECT max(oldest_current_xid) AS oldest_current_xid
, max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound
, max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac
FROM per_database_stats;
oldest_current_xid | percent_towards_wraparound | percent_towards_emergency_autovac
--------------------+----------------------------+-----------------------------------
205 | 0 | 0
(1 row)
postgres=# INSERT INTO wraparoundtest values ('after wraparound followed by vacuum full of all dbs'); -- this will now succeed
INSERT 0 1
postgres=# select * from wraparoundtest;
t
-----------------------------------------------------
start
after 1 billion
after 2 billion
after wraparound followed by vacuum full of all dbs
(4 rows)
postgres=#
\q
postgres@ubuntu:/tmp$ psql
psql (18devel)
Type "help" for help.
postgres=# SELECT datname
, age(datfrozenxid)
, current_setting('autovacuum_freeze_max_age')
FROM pg_database
ORDER BY 2 DESC;
datname | age | current_setting
-----------+-----+-----------------
postgres | 219 | 200000000
template1 | 150 | 200000000
template0 | 82 | 200000000
(3 rows)
postgres=#
\q
postgres@ubuntu:/tmp$ p1 stop
waiting for server to shut down.... done
server stopped
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment