Skip to content

Instantly share code, notes, and snippets.

@piotrkochan
Last active February 14, 2025 09:52
Show Gist options
  • Save piotrkochan/564bef05dd17cf57b0c72c3e5b8adb58 to your computer and use it in GitHub Desktop.
Save piotrkochan/564bef05dd17cf57b0c72c3e5b8adb58 to your computer and use it in GitHub Desktop.
proxysql alerts
groups:
- name: ProxySQL_Cluster
rules:
# Alert for cluster sync conflicts
- alert: ProxySQLClusterSyncConflicts
expr: |
sum by (module_name) (
rate(proxysql_cluster_syn_conflict_total[5m])
) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "ProxySQL cluster sync conflicts detected"
description: "Module {{ $labels.module_name }} is experiencing synchronization conflicts"
# Alert for cluster pull failures
- alert: ProxySQLClusterPullFailures
expr: |
sum by (module_name) (
rate(proxysql_cluster_pulled_total{status="failure"}[5m])
) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "ProxySQL cluster pull failures detected"
description: "Module {{ $labels.module_name }} is failing to pull configurations from peers"
# Alert for overall cluster sync health
- alert: ProxySQLClusterSyncUnhealthy
expr: |
(
sum(rate(proxysql_cluster_pulled_total{status="failure"}[15m]))
/
(sum(rate(proxysql_cluster_pulled_total{status="success"}[15m])) + sum(rate(proxysql_cluster_pulled_total{status="failure"}[15m])))
* 100
) > 10
for: 10m
labels:
severity: critical
annotations:
summary: "ProxySQL cluster synchronization is unhealthy"
description: "More than 10% of cluster synchronization attempts are failing"
# Alert for specific critical modules sync failures
- alert: ProxySQLCriticalModuleSyncFailure
expr: |
sum by (module_name) (
rate(proxysql_cluster_pulled_total{
status="failure",
module_name=~"mysql_users|mysql_servers|mysql_query_rules"
}[10m])
) > 0
for: 5m
labels:
severity: critical
annotations:
summary: "Critical module sync failure"
description: "Failed to sync critical module {{ $labels.module_name }} across the cluster"
# Alert for version conflicts
- alert: ProxySQLClusterVersionConflict
expr: |
sum by (module_name) (
rate(proxysql_cluster_syn_conflict_total{reason="version_one"}[5m])
) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "ProxySQL cluster version conflicts detected"
description: "Module {{ $labels.module_name }} is experiencing version conflicts between nodes"
groups:
- name: ProxySQL_Hostgroups
rules:
# Alert when a hostgroup becomes empty (all servers offline)
- alert: ProxySQLEmptyHostgroup
expr: |
# This looks at connection pool status
sum by (hostgroup) (proxysql_connpool_conns_status{hostgroup=~".*"}) == 0
and
# Ensure the hostgroup has connections configured
sum by (hostgroup) (proxysql_connpool_conns_total{hostgroup=~".*"}) > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Hostgroup {{ $labels.hostgroup }} has no available servers"
description: "All servers in hostgroup {{ $labels.hostgroup }} are unavailable. This could indicate a major outage."
# Alert for monitoring if a high percentage of connections in a hostgroup are unavailable
- alert: ProxySQLHighUnavailableConnectionsInHostgroup
expr: |
(
sum by (hostgroup) (proxysql_connpool_conns_status{hostgroup=~".*", status!="OK"})
/
sum by (hostgroup) (proxysql_connpool_conns_total{hostgroup=~".*"})
) * 100 > 50
for: 2m
labels:
severity: warning
annotations:
summary: "High number of unavailable connections in hostgroup {{ $labels.hostgroup }}"
description: "More than 50% of connections in hostgroup {{ $labels.hostgroup }} are currently unavailable"
groups:
- name: ProxySQL
rules:
# Critical Alerts
- alert: ProxySQLDown
expr: up{job="proxysql"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "ProxySQL instance is down"
description: "ProxySQL instance has been down for more than 1 minute"
- alert: ProxySQLHighConnectionFailures
expr: rate(proxysql_connpool_get_conn_failure_total[5m]) > 10
for: 5m
labels:
severity: critical
annotations:
summary: "High rate of connection failures"
description: "ProxySQL is experiencing connection failures at rate of {{ $value }} per second"
- alert: ProxySQLNearMaxConnections
expr: proxysql_client_connections_connected / proxysql_mysql_max_connections * 100 > 90
for: 5m
labels:
severity: critical
annotations:
summary: "Approaching maximum connections limit"
description: "ProxySQL is using {{ $value }}% of maximum configured connections"
# Warning Alerts
- alert: ProxySQLHighErrorRate
expr: rate(proxysql_generated_error_packets_total[5m]) > 5
for: 10m
labels:
severity: warning
annotations:
summary: "High error packet rate"
description: "ProxySQL is generating error packets at rate of {{ $value }} per second"
- alert: ProxySQLSlowQueries
expr: rate(proxysql_slow_queries_total[5m]) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "High number of slow queries"
description: "ProxySQL is experiencing {{ $value }} slow queries per second"
- alert: ProxySQLHighMemoryUsage
expr: proxysql_mysql_frontend_buffers_bytes + proxysql_mysql_backend_buffers_bytes + proxysql_mysql_session_internal_bytes > 1073741824 # 1GB
for: 15m
labels:
severity: warning
annotations:
summary: "High memory usage"
description: "ProxySQL memory usage exceeds 1GB"
# Monitor Alerts
- alert: ProxySQLMonitorFailures
expr: |
sum(rate(proxysql_mysql_monitor_connect_check_total{status="err"}[5m]) +
rate(proxysql_mysql_monitor_ping_check_total{status="err"}[5m]) +
rate(proxysql_mysql_monitor_read_only_check_total{status="err"}[5m])) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "ProxySQL monitor checks failing"
description: "One or more ProxySQL monitor checks are failing"
- alert: ProxySQLHighBackendLatency
expr: rate(proxysql_backend_query_time_seconds_total[5m]) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "High backend query latency"
description: "Backend query latency is higher than 1 second on average"
# Info Alerts
- alert: ProxySQLHighAbortedConnections
expr: rate(proxysql_client_connections_total{status="aborted"}[15m]) > 5
for: 15m
labels:
severity: info
annotations:
summary: "High number of aborted connections"
description: "ProxySQL is experiencing {{ $value }} aborted connections per second"
- alert: ProxySQLSQLInjectionAttempts
expr: rate(proxysql_automatic_detected_sql_injection_total[5m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "SQL injection attempts detected"
description: "ProxySQL has detected potential SQL injection attempts"
# Performance Alerts
- alert: ProxySQLPoolExhaustion
expr: rate(proxysql_myhgm_myconnpool_get_total[5m]) - rate(proxysql_myhgm_myconnpool_get_ok_total[5m]) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "Connection pool exhaustion"
description: "ProxySQL connection pool is frequently unable to provide connections"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment