Last active
October 7, 2024 16:41
-
-
Save cabecada/feccb0c7e4303e8da40833ccf47132c0 to your computer and use it in GitHub Desktop.
simulate wraparound and recovery using xid_wraparound extension
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#this is mimicking https://github.com/postgres/postgres/blob/a68159ff2b32f290b1136e2940470d50b8491301/src/test/modules/xid_wraparound/t/002_limits.pl | |
#but many times users dont look at source code, so these examples are missed | |
``` | |
./configure --prefix=/opt/postgresql --with-openssl --enable-debug --enable-profiling --enable-cassert --enable-tap-tests CFLAGS="-ggdb -Og -g3 -fno-omit-frame-pointer -DLOCK_DEBUG -DBTREE_BUILD_STATS -DWAL_DEBUG " | |
make | |
cd /home/ubuntu/github/postgres/src/test/modules/xid_wraparound | |
make check PG_TEST_EXTRA='xid_wraparound' | |
sudo make install | |
sudo chown -R postgres:postgres /opt/postgresql | |
sudo chown -R 744 /opt/postgresql | |
``` | |
#we are trying to mock wraparound using extension xid_wraparound and then fake consume xids | |
#once the wraparound triggers, forcing the db into readonly mode, we vacuum full the dbs to get the db back to writable mode | |
#things can get quite complicated in real world as these limits are hit when resources consumption are already too high | |
#but the point of this was to know what it means for alerts and logs and let developers know open tx and not closing them is a problem | |
# we ensure to keep console quiet | |
postgres@ubuntu:/tmp$ grep message db1/postgresql.auto.conf | |
log_min_messages = error | |
client_min_messages = error | |
#start the db | |
postgres@ubuntu:/tmp$ p1 start | |
waiting for server to start.... done | |
server started | |
postgres@ubuntu:/tmp$ psql | |
psql (18devel) | |
Type "help" for help. | |
postgres=# create extension xid_wraparound; | |
CREATE EXTENSION | |
-- with autovacuum_enabled = off, we ensure autovacuum is not triggered unless we hit the limits | |
postgres=# CREATE TABLE wraparoundtest(t text) WITH (autovacuum_enabled = off); | |
CREATE TABLE | |
postgres=# INSERT INTO wraparoundtest values ('start'); | |
INSERT 0 1 | |
postgres=# -- Start a background session, which holds a transaction open, preventing autovacuum from advancing relfrozenxid and datfrozenxid. | |
postgres=# \! psql -c 'BEGIN; INSERT INTO wraparoundtest VALUES ($$oldxact$$); select pg_sleep(10000);' & | |
-- the above tx never closes, so that insert value is not visible | |
postgres=# select * from wraparoundtest; | |
t | |
------- | |
start | |
(1 row) | |
postgres=# -- consume 1 billion transactions while open tx above | |
postgres=# SELECT consume_xids(1000000000); -- i have reduced log vebosity to error to keep console quiet | |
consume_xids | |
-------------- | |
1000000743 | |
(1 row) | |
postgres=# INSERT INTO wraparoundtest values ('after 1 billion'); | |
INSERT 0 1 | |
postgres=# -- consume 1 billion transactions more while open tx above | |
postgres=# SELECT consume_xids(1000000000); -- i have reduced log vebosity to error to keep console quiet | |
consume_xids | |
-------------- | |
2000000756 | |
(1 row) | |
postgres=# INSERT INTO wraparoundtest values ('after 2 billion'); | |
INSERT 0 1 | |
postgres=# -- We are now just under 150 million XIDs away from wraparound, Continue consuming XIDs, in batches of 10 million, until we get the warning: | |
postgres=# /* | |
postgres*# # WARNING: database "postgres" must be vacuumed within 3000024 transactions | |
# HINT: To avoid a database shutdown, execute a database-wide VACUUM in that database. | |
# You might also need to commit or roll back old prepared transactions, or drop stale replication slots. | |
postgres*# */ | |
postgres-# ; | |
postgres=# select consume_xids(20000000); | |
consume_xids | |
-------------- | |
2020000759 | |
(1 row) | |
postgres=# select consume_xids(100000000); | |
consume_xids | |
-------------- | |
2120000760 | |
(1 row) | |
postgres=# select consume_xids(100000000); -- we will now see the warning | |
ERROR: database is not accepting commands that assign new transaction IDs to avoid wraparound data loss in database "postgres" | |
HINT: Execute a database-wide VACUUM in that database. | |
You might also need to commit or roll back old prepared transactions, or drop stale replication slots. | |
postgres=# -- now our db will not allow any new transactions | |
postgres=# INSERT INTO wraparoundtest values ('after wraparound'); -- this will fail and no more txid will be generated | |
ERROR: database is not accepting commands that assign new transaction IDs to avoid wraparound data loss in database "postgres" | |
HINT: Execute a database-wide VACUUM in that database. | |
You might also need to commit or roll back old prepared transactions, or drop stale replication slots. | |
postgres=# select * from wraparoundtest; | |
t | |
----------------- | |
start | |
after 1 billion | |
after 2 billion | |
(3 rows) | |
postgres=# \x | |
Expanded display is on. | |
postgres=# select * from pg_stat_activity where query ~ 'sleep' and pid <> pg_backend_pid(); | |
-[ RECORD 1 ]----+-------------------------------------------------------------------------------- | |
datid | 5 | |
datname | postgres | |
pid | 14423 | |
leader_pid | | |
usesysid | 10 | |
usename | postgres | |
application_name | psql | |
client_addr | | |
client_hostname | | |
client_port | -1 | |
backend_start | 2024-10-07 16:05:21.812371+00 | |
xact_start | 2024-10-07 16:05:21.814932+00 | |
query_start | 2024-10-07 16:05:21.814932+00 | |
state_change | 2024-10-07 16:05:21.814936+00 | |
wait_event_type | Timeout | |
wait_event | PgSleep | |
state | active | |
backend_xid | 742 | |
backend_xmin | 742 | |
query_id | | |
query | BEGIN; INSERT INTO wraparoundtest VALUES ($$oldxact$$); select pg_sleep(10000); | |
backend_type | client backend | |
-- our tx age is well beyond the setting we have in conf | |
postgres=# \x | |
Expanded display is off. | |
postgres=# SELECT datname | |
, age(datfrozenxid) | |
, current_setting('autovacuum_freeze_max_age') | |
FROM pg_database | |
ORDER BY 2 DESC; | |
datname | age | current_setting | |
-----------+------------+----------------- | |
postgres | 2144484419 | 200000000 | |
template1 | 2144484419 | 200000000 | |
template0 | 2144484419 | 200000000 | |
(3 rows) | |
-- some standard queries that confirm the same | |
postgres=# WITH max_age AS ( | |
SELECT 2000000000 as max_old_xid | |
, setting AS autovacuum_freeze_max_age | |
FROM pg_catalog.pg_settings | |
WHERE name = 'autovacuum_freeze_max_age' ) | |
, per_database_stats AS ( | |
SELECT datname | |
, m.max_old_xid::int | |
, m.autovacuum_freeze_max_age::int | |
, age(d.datfrozenxid) AS oldest_current_xid | |
FROM pg_catalog.pg_database d | |
JOIN max_age m ON (true) | |
WHERE d.datallowconn ) | |
SELECT max(oldest_current_xid) AS oldest_current_xid | |
, max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound | |
, max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac | |
FROM per_database_stats; | |
oldest_current_xid | percent_towards_wraparound | percent_towards_emergency_autovac | |
--------------------+----------------------------+----------------------------------- | |
2144484419 | 107 | 1072 | |
(1 row) | |
postgres=# select pg_cancel_backend(14423); | |
BEGIN | |
INSERT 0 1 | |
ERROR: canceling statement due to user request | |
pg_cancel_backend | |
------------------- | |
t | |
(1 row) | |
postgres=# WITH max_age AS ( | |
SELECT 2000000000 as max_old_xid | |
, setting AS autovacuum_freeze_max_age | |
FROM pg_catalog.pg_settings | |
WHERE name = 'autovacuum_freeze_max_age' ) | |
, per_database_stats AS ( | |
SELECT datname | |
, m.max_old_xid::int | |
, m.autovacuum_freeze_max_age::int | |
, age(d.datfrozenxid) AS oldest_current_xid | |
FROM pg_catalog.pg_database d | |
JOIN max_age m ON (true) | |
WHERE d.datallowconn ) | |
SELECT max(oldest_current_xid) AS oldest_current_xid | |
, max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound | |
, max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac | |
FROM per_database_stats; | |
oldest_current_xid | percent_towards_wraparound | percent_towards_emergency_autovac | |
--------------------+----------------------------+----------------------------------- | |
2144484419 | 107 | 1072 | |
(1 row) | |
postgres=# \x | |
Expanded display is on. | |
postgres=# select * from pg_stat_activity where query ~ 'sleep' and pid <> pg_backend_pid(); | |
(0 rows) | |
postgres=# -- we now have to run vacuum full in single user mode on the dbs to make it allow writes again | |
postgres=# | |
\q | |
postgres@ubuntu:/tmp$ p1 stop | |
waiting for server to shut down.... done | |
server stopped | |
# running in single user mode | |
postgres@ubuntu:/tmp$ postgres --single -D db1 postgres | |
PostgreSQL stand-alone backend 18devel | |
backend> vacuum full; | |
postgres@ubuntu:/tmp$ postgres --single -D db1 template1 | |
PostgreSQL stand-alone backend 18devel | |
backend> vacuum full; | |
postgres@ubuntu:/tmp$ postgres --single -D db1 template0 | |
PostgreSQL stand-alone backend 18devel | |
backend> vacuum full; | |
#now we start the database and run the same monitoring queries to ensure no more past transactions | |
postgres@ubuntu:/tmp$ p1 start | |
waiting for server to start.... done | |
server started | |
postgres@ubuntu:/tmp$ psql | |
psql (18devel) | |
Type "help" for help. | |
postgres=# WITH max_age AS ( | |
SELECT 2000000000 as max_old_xid | |
, setting AS autovacuum_freeze_max_age | |
FROM pg_catalog.pg_settings | |
WHERE name = 'autovacuum_freeze_max_age' ) | |
, per_database_stats AS ( | |
SELECT datname | |
, m.max_old_xid::int | |
, m.autovacuum_freeze_max_age::int | |
, age(d.datfrozenxid) AS oldest_current_xid | |
FROM pg_catalog.pg_database d | |
JOIN max_age m ON (true) | |
WHERE d.datallowconn ) | |
SELECT max(oldest_current_xid) AS oldest_current_xid | |
, max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound | |
, max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac | |
FROM per_database_stats; | |
oldest_current_xid | percent_towards_wraparound | percent_towards_emergency_autovac | |
--------------------+----------------------------+----------------------------------- | |
205 | 0 | 0 | |
(1 row) | |
postgres=# INSERT INTO wraparoundtest values ('after wraparound followed by vacuum full of all dbs'); -- this will now succeed | |
INSERT 0 1 | |
postgres=# select * from wraparoundtest; | |
t | |
----------------------------------------------------- | |
start | |
after 1 billion | |
after 2 billion | |
after wraparound followed by vacuum full of all dbs | |
(4 rows) | |
postgres=# | |
\q | |
postgres@ubuntu:/tmp$ psql | |
psql (18devel) | |
Type "help" for help. | |
postgres=# SELECT datname | |
, age(datfrozenxid) | |
, current_setting('autovacuum_freeze_max_age') | |
FROM pg_database | |
ORDER BY 2 DESC; | |
datname | age | current_setting | |
-----------+-----+----------------- | |
postgres | 219 | 200000000 | |
template1 | 150 | 200000000 | |
template0 | 82 | 200000000 | |
(3 rows) | |
postgres=# | |
\q | |
postgres@ubuntu:/tmp$ p1 stop | |
waiting for server to shut down.... done | |
server stopped |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment