Skip to content

Instantly share code, notes, and snippets.

@alexey-sh
Created August 23, 2025 10:57
Show Gist options
  • Save alexey-sh/4203e8e6ceafbfe89519a6d10588adec to your computer and use it in GitHub Desktop.
Save alexey-sh/4203e8e6ceafbfe89519a6d10588adec to your computer and use it in GitHub Desktop.
docker compose update with rollback
#!/bin/bash
# Zero Downtime Rolling Update Script with Automatic Rollback
# Usage: ./rolling_update.sh <service_name> <new_image> [options]
set -euo pipefail
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BACKUP_DIR="${SCRIPT_DIR}/backups"
LOG_FILE="${SCRIPT_DIR}/rolling_update.log"
HEALTH_CHECK_TIMEOUT=300 # 5 minutes
HEALTH_CHECK_INTERVAL=5 # 5 seconds
ROLLBACK_ON_FAILURE=true
PARALLEL_INSTANCES=1 # How many instances to update at once
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Logging function
log() {
echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
error() {
log "${RED}ERROR: $1${NC}"
}
success() {
log "${GREEN}SUCCESS: $1${NC}"
}
warning() {
log "${YELLOW}WARNING: $1${NC}"
}
info() {
log "${BLUE}INFO: $1${NC}"
}
# Help function
show_help() {
cat << EOF
Zero Downtime Rolling Update Script
Usage: $0 <service_name> <new_image> [options]
Arguments:
service_name Name of the service to update
new_image New Docker image to deploy
Options:
--health-url URL HTTP endpoint to check for health (default: http://localhost:8080/health)
--health-timeout SECONDS Timeout for health checks (default: 300)
--health-interval SECONDS Interval between health checks (default: 5)
--parallel COUNT Number of instances to update simultaneously (default: 1)
--no-rollback Disable automatic rollback on failure
--backup-dir PATH Directory to store backups (default: ./backups)
--dry-run Show what would be done without executing
--help Show this help message
Examples:
$0 web-app nginx:1.21.0
$0 api-server myapp:v2.1.0 --health-url http://localhost:3000/api/health
$0 worker worker-image:latest --parallel 2 --no-rollback
EOF
}
# Parse command line arguments
parse_args() {
if [[ $# -lt 2 ]]; then
show_help
exit 1
fi
SERVICE_NAME="$1"
NEW_IMAGE="$2"
shift 2
HEALTH_URL="http://localhost:8080/health"
DRY_RUN=false
while [[ $# -gt 0 ]]; do
case $1 in
--health-url)
HEALTH_URL="$2"
shift 2
;;
--health-timeout)
HEALTH_CHECK_TIMEOUT="$2"
shift 2
;;
--health-interval)
HEALTH_CHECK_INTERVAL="$2"
shift 2
;;
--parallel)
PARALLEL_INSTANCES="$2"
shift 2
;;
--no-rollback)
ROLLBACK_ON_FAILURE=false
shift
;;
--backup-dir)
BACKUP_DIR="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
--help)
show_help
exit 0
;;
*)
error "Unknown option: $1"
show_help
exit 1
;;
esac
done
}
# Create necessary directories
setup_directories() {
mkdir -p "$BACKUP_DIR"
mkdir -p "$(dirname "$LOG_FILE")"
}
# Check if Docker is running and accessible
check_docker() {
if ! docker info >/dev/null 2>&1; then
error "Docker is not running or not accessible"
exit 1
fi
}
# Get current running containers for the service
get_current_containers() {
docker ps --filter "label=service=$SERVICE_NAME" --format "{{.ID}}" 2>/dev/null || true
}
# Get current image of the service
get_current_image() {
local container_id
container_id=$(get_current_containers | head -n1)
if [[ -n "$container_id" ]]; then
docker inspect "$container_id" --format "{{.Image}}" 2>/dev/null || echo ""
else
echo ""
fi
}
# Health check function
health_check() {
local url="$1"
local timeout="${2:-$HEALTH_CHECK_TIMEOUT}"
local interval="${3:-$HEALTH_CHECK_INTERVAL}"
info "Performing health check on $url"
local elapsed=0
while [[ $elapsed -lt $timeout ]]; do
if curl -f -s -m 10 "$url" >/dev/null 2>&1; then
success "Health check passed"
return 0
fi
info "Health check failed, retrying in ${interval}s... (${elapsed}s/${timeout}s)"
sleep "$interval"
elapsed=$((elapsed + interval))
done
error "Health check failed after ${timeout}s"
return 1
}
# Create backup of current state
create_backup() {
local timestamp
timestamp=$(date +"%Y%m%d_%H%M%S")
local backup_file="${BACKUP_DIR}/${SERVICE_NAME}_${timestamp}.json"
info "Creating backup of current state"
local containers
containers=$(get_current_containers)
if [[ -n "$containers" ]]; then
{
echo "{"
echo " \"service\": \"$SERVICE_NAME\","
echo " \"timestamp\": \"$timestamp\","
echo " \"containers\": ["
local first=true
while IFS= read -r container_id; do
if [[ "$first" == "true" ]]; then
first=false
else
echo ","
fi
echo -n " {"
echo -n "\"id\": \"$container_id\", "
echo -n "\"image\": \"$(docker inspect "$container_id" --format "{{.Config.Image}}")\", "
echo -n "\"name\": \"$(docker inspect "$container_id" --format "{{.Name}}" | sed 's/^\//')\""
echo -n "}"
done <<< "$containers"
echo ""
echo " ]"
echo "}"
} > "$backup_file"
success "Backup created: $backup_file"
echo "$backup_file"
else
warning "No containers found for service $SERVICE_NAME"
return 1
fi
}
# Pull new image
pull_new_image() {
info "Pulling new image: $NEW_IMAGE"
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would pull: $NEW_IMAGE"
return 0
fi
if docker pull "$NEW_IMAGE"; then
success "Successfully pulled $NEW_IMAGE"
else
error "Failed to pull $NEW_IMAGE"
return 1
fi
}
# Start new container with the new image
start_new_container() {
local old_container_id="$1"
local new_name="${SERVICE_NAME}_new_$$"
info "Starting new container with image: $NEW_IMAGE"
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would start new container: $new_name"
echo "dry_run_container_id"
return 0
fi
# Get configuration from old container
local old_config
old_config=$(docker inspect "$old_container_id" --format '{{json .Config}}' 2>/dev/null)
local old_host_config
old_host_config=$(docker inspect "$old_container_id" --format '{{json .HostConfig}}' 2>/dev/null)
# Extract environment variables
local env_vars
env_vars=$(echo "$old_config" | jq -r '.Env[]?' 2>/dev/null | sed 's/^/-e /' | tr '\n' ' ' || echo "")
# Extract port mappings
local port_mappings
port_mappings=$(echo "$old_host_config" | jq -r '.PortBindings | to_entries[]? | "-p " + .value[0].HostPort + ":" + .key' 2>/dev/null | tr '\n' ' ' || echo "")
# Extract volumes
local volumes
volumes=$(echo "$old_host_config" | jq -r '.Binds[]?' 2>/dev/null | sed 's/^/-v /' | tr '\n' ' ' || echo "")
# Start new container
local new_container_id
new_container_id=$(docker run -d \
--name "$new_name" \
--label "service=$SERVICE_NAME" \
--label "update_session=$$" \
$env_vars \
$port_mappings \
$volumes \
"$NEW_IMAGE" 2>/dev/null)
if [[ -n "$new_container_id" ]]; then
success "Started new container: $new_container_id"
echo "$new_container_id"
else
error "Failed to start new container"
return 1
fi
}
# Perform rolling update
rolling_update() {
local containers
containers=$(get_current_containers)
if [[ -z "$containers" ]]; then
error "No running containers found for service: $SERVICE_NAME"
return 1
fi
info "Starting rolling update for service: $SERVICE_NAME"
info "Current containers: $(echo "$containers" | wc -l)"
local updated_containers=()
local failed=false
# Process containers in batches
while IFS= read -r container_id; do
info "Updating container: $container_id"
# Start new container
local new_container_id
if new_container_id=$(start_new_container "$container_id"); then
# Wait for new container to be healthy
if [[ "$DRY_RUN" == "false" ]]; then
sleep 5 # Give container time to start
if health_check "$HEALTH_URL"; then
success "New container is healthy, stopping old container"
# Stop old container
docker stop "$container_id" >/dev/null 2>&1 || warning "Failed to stop container $container_id"
docker rm "$container_id" >/dev/null 2>&1 || warning "Failed to remove container $container_id"
updated_containers+=("$new_container_id")
else
error "New container failed health check"
# Clean up failed container
docker stop "$new_container_id" >/dev/null 2>&1 || true
docker rm "$new_container_id" >/dev/null 2>&1 || true
failed=true
break
fi
else
updated_containers+=("$new_container_id")
fi
else
error "Failed to start new container for $container_id"
failed=true
break
fi
info "Container update completed successfully"
done <<< "$containers"
if [[ "$failed" == "true" ]]; then
error "Rolling update failed"
return 1
else
success "Rolling update completed successfully"
return 0
fi
}
# Rollback to previous state
rollback() {
local backup_file="$1"
error "Rolling back to previous state"
info "Using backup: $backup_file"
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would rollback using: $backup_file"
return 0
fi
# Stop current containers
local current_containers
current_containers=$(get_current_containers)
if [[ -n "$current_containers" ]]; then
info "Stopping current containers"
while IFS= read -r container_id; do
docker stop "$container_id" >/dev/null 2>&1 || warning "Failed to stop container $container_id"
docker rm "$container_id" >/dev/null 2>&1 || warning "Failed to remove container $container_id"
done <<< "$current_containers"
fi
# Restore from backup
if [[ -f "$backup_file" ]]; then
info "Restoring containers from backup"
local container_count
container_count=$(jq -r '.containers | length' "$backup_file" 2>/dev/null || echo "0")
for ((i=0; i<container_count; i++)); do
local container_image
container_image=$(jq -r ".containers[$i].image" "$backup_file" 2>/dev/null)
local container_name
container_name=$(jq -r ".containers[$i].name" "$backup_file" 2>/dev/null)
if [[ -n "$container_image" && "$container_image" != "null" ]]; then
info "Restoring container: $container_name with image: $container_image"
# Pull the old image if needed
docker pull "$container_image" >/dev/null 2>&1 || true
# Start container (simplified - you may need to restore full configuration)
docker run -d \
--name "${container_name}_restored_$$" \
--label "service=$SERVICE_NAME" \
"$container_image" >/dev/null 2>&1 || error "Failed to restore container $container_name"
fi
done
success "Rollback completed"
else
error "Backup file not found: $backup_file"
return 1
fi
}
# Main function
main() {
parse_args "$@"
setup_directories
check_docker
info "Starting rolling update process"
info "Service: $SERVICE_NAME"
info "New image: $NEW_IMAGE"
info "Health URL: $HEALTH_URL"
info "Dry run: $DRY_RUN"
# Create backup
local backup_file
if backup_file=$(create_backup); then
info "Backup created successfully"
else
if [[ "$ROLLBACK_ON_FAILURE" == "true" ]]; then
error "Failed to create backup, aborting update"
exit 1
else
warning "Failed to create backup, but continuing without rollback capability"
fi
fi
# Pull new image
if ! pull_new_image; then
error "Failed to pull new image, aborting update"
exit 1
fi
# Perform rolling update
if rolling_update; then
success "Rolling update completed successfully!"
# Clean up old images (optional)
info "Cleaning up old images..."
docker image prune -f >/dev/null 2>&1 || true
else
error "Rolling update failed!"
if [[ "$ROLLBACK_ON_FAILURE" == "true" && -n "${backup_file:-}" ]]; then
warning "Initiating automatic rollback..."
if rollback "$backup_file"; then
success "Rollback completed successfully"
exit 1 # Still exit with error since update failed
else
error "Rollback also failed!"
exit 2
fi
else
error "No rollback performed (disabled or no backup available)"
exit 1
fi
fi
}
# Run main function with all arguments
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment