Created
August 23, 2025 10:57
-
-
Save alexey-sh/4203e8e6ceafbfe89519a6d10588adec to your computer and use it in GitHub Desktop.
docker compose update with rollback
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Zero Downtime Rolling Update Script with Automatic Rollback | |
# Usage: ./rolling_update.sh <service_name> <new_image> [options] | |
set -euo pipefail | |
# Configuration | |
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
BACKUP_DIR="${SCRIPT_DIR}/backups" | |
LOG_FILE="${SCRIPT_DIR}/rolling_update.log" | |
HEALTH_CHECK_TIMEOUT=300 # 5 minutes | |
HEALTH_CHECK_INTERVAL=5 # 5 seconds | |
ROLLBACK_ON_FAILURE=true | |
PARALLEL_INSTANCES=1 # How many instances to update at once | |
# Colors for output | |
RED='\033[0;31m' | |
GREEN='\033[0;32m' | |
YELLOW='\033[1;33m' | |
BLUE='\033[0;34m' | |
NC='\033[0m' # No Color | |
# Logging function | |
log() { | |
echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" | |
} | |
error() { | |
log "${RED}ERROR: $1${NC}" | |
} | |
success() { | |
log "${GREEN}SUCCESS: $1${NC}" | |
} | |
warning() { | |
log "${YELLOW}WARNING: $1${NC}" | |
} | |
info() { | |
log "${BLUE}INFO: $1${NC}" | |
} | |
# Help function | |
show_help() { | |
cat << EOF | |
Zero Downtime Rolling Update Script | |
Usage: $0 <service_name> <new_image> [options] | |
Arguments: | |
service_name Name of the service to update | |
new_image New Docker image to deploy | |
Options: | |
--health-url URL HTTP endpoint to check for health (default: http://localhost:8080/health) | |
--health-timeout SECONDS Timeout for health checks (default: 300) | |
--health-interval SECONDS Interval between health checks (default: 5) | |
--parallel COUNT Number of instances to update simultaneously (default: 1) | |
--no-rollback Disable automatic rollback on failure | |
--backup-dir PATH Directory to store backups (default: ./backups) | |
--dry-run Show what would be done without executing | |
--help Show this help message | |
Examples: | |
$0 web-app nginx:1.21.0 | |
$0 api-server myapp:v2.1.0 --health-url http://localhost:3000/api/health | |
$0 worker worker-image:latest --parallel 2 --no-rollback | |
EOF | |
} | |
# Parse command line arguments | |
parse_args() { | |
if [[ $# -lt 2 ]]; then | |
show_help | |
exit 1 | |
fi | |
SERVICE_NAME="$1" | |
NEW_IMAGE="$2" | |
shift 2 | |
HEALTH_URL="http://localhost:8080/health" | |
DRY_RUN=false | |
while [[ $# -gt 0 ]]; do | |
case $1 in | |
--health-url) | |
HEALTH_URL="$2" | |
shift 2 | |
;; | |
--health-timeout) | |
HEALTH_CHECK_TIMEOUT="$2" | |
shift 2 | |
;; | |
--health-interval) | |
HEALTH_CHECK_INTERVAL="$2" | |
shift 2 | |
;; | |
--parallel) | |
PARALLEL_INSTANCES="$2" | |
shift 2 | |
;; | |
--no-rollback) | |
ROLLBACK_ON_FAILURE=false | |
shift | |
;; | |
--backup-dir) | |
BACKUP_DIR="$2" | |
shift 2 | |
;; | |
--dry-run) | |
DRY_RUN=true | |
shift | |
;; | |
--help) | |
show_help | |
exit 0 | |
;; | |
*) | |
error "Unknown option: $1" | |
show_help | |
exit 1 | |
;; | |
esac | |
done | |
} | |
# Create necessary directories | |
setup_directories() { | |
mkdir -p "$BACKUP_DIR" | |
mkdir -p "$(dirname "$LOG_FILE")" | |
} | |
# Check if Docker is running and accessible | |
check_docker() { | |
if ! docker info >/dev/null 2>&1; then | |
error "Docker is not running or not accessible" | |
exit 1 | |
fi | |
} | |
# Get current running containers for the service | |
get_current_containers() { | |
docker ps --filter "label=service=$SERVICE_NAME" --format "{{.ID}}" 2>/dev/null || true | |
} | |
# Get current image of the service | |
get_current_image() { | |
local container_id | |
container_id=$(get_current_containers | head -n1) | |
if [[ -n "$container_id" ]]; then | |
docker inspect "$container_id" --format "{{.Image}}" 2>/dev/null || echo "" | |
else | |
echo "" | |
fi | |
} | |
# Health check function | |
health_check() { | |
local url="$1" | |
local timeout="${2:-$HEALTH_CHECK_TIMEOUT}" | |
local interval="${3:-$HEALTH_CHECK_INTERVAL}" | |
info "Performing health check on $url" | |
local elapsed=0 | |
while [[ $elapsed -lt $timeout ]]; do | |
if curl -f -s -m 10 "$url" >/dev/null 2>&1; then | |
success "Health check passed" | |
return 0 | |
fi | |
info "Health check failed, retrying in ${interval}s... (${elapsed}s/${timeout}s)" | |
sleep "$interval" | |
elapsed=$((elapsed + interval)) | |
done | |
error "Health check failed after ${timeout}s" | |
return 1 | |
} | |
# Create backup of current state | |
create_backup() { | |
local timestamp | |
timestamp=$(date +"%Y%m%d_%H%M%S") | |
local backup_file="${BACKUP_DIR}/${SERVICE_NAME}_${timestamp}.json" | |
info "Creating backup of current state" | |
local containers | |
containers=$(get_current_containers) | |
if [[ -n "$containers" ]]; then | |
{ | |
echo "{" | |
echo " \"service\": \"$SERVICE_NAME\"," | |
echo " \"timestamp\": \"$timestamp\"," | |
echo " \"containers\": [" | |
local first=true | |
while IFS= read -r container_id; do | |
if [[ "$first" == "true" ]]; then | |
first=false | |
else | |
echo "," | |
fi | |
echo -n " {" | |
echo -n "\"id\": \"$container_id\", " | |
echo -n "\"image\": \"$(docker inspect "$container_id" --format "{{.Config.Image}}")\", " | |
echo -n "\"name\": \"$(docker inspect "$container_id" --format "{{.Name}}" | sed 's/^\//')\"" | |
echo -n "}" | |
done <<< "$containers" | |
echo "" | |
echo " ]" | |
echo "}" | |
} > "$backup_file" | |
success "Backup created: $backup_file" | |
echo "$backup_file" | |
else | |
warning "No containers found for service $SERVICE_NAME" | |
return 1 | |
fi | |
} | |
# Pull new image | |
pull_new_image() { | |
info "Pulling new image: $NEW_IMAGE" | |
if [[ "$DRY_RUN" == "true" ]]; then | |
info "[DRY RUN] Would pull: $NEW_IMAGE" | |
return 0 | |
fi | |
if docker pull "$NEW_IMAGE"; then | |
success "Successfully pulled $NEW_IMAGE" | |
else | |
error "Failed to pull $NEW_IMAGE" | |
return 1 | |
fi | |
} | |
# Start new container with the new image | |
start_new_container() { | |
local old_container_id="$1" | |
local new_name="${SERVICE_NAME}_new_$$" | |
info "Starting new container with image: $NEW_IMAGE" | |
if [[ "$DRY_RUN" == "true" ]]; then | |
info "[DRY RUN] Would start new container: $new_name" | |
echo "dry_run_container_id" | |
return 0 | |
fi | |
# Get configuration from old container | |
local old_config | |
old_config=$(docker inspect "$old_container_id" --format '{{json .Config}}' 2>/dev/null) | |
local old_host_config | |
old_host_config=$(docker inspect "$old_container_id" --format '{{json .HostConfig}}' 2>/dev/null) | |
# Extract environment variables | |
local env_vars | |
env_vars=$(echo "$old_config" | jq -r '.Env[]?' 2>/dev/null | sed 's/^/-e /' | tr '\n' ' ' || echo "") | |
# Extract port mappings | |
local port_mappings | |
port_mappings=$(echo "$old_host_config" | jq -r '.PortBindings | to_entries[]? | "-p " + .value[0].HostPort + ":" + .key' 2>/dev/null | tr '\n' ' ' || echo "") | |
# Extract volumes | |
local volumes | |
volumes=$(echo "$old_host_config" | jq -r '.Binds[]?' 2>/dev/null | sed 's/^/-v /' | tr '\n' ' ' || echo "") | |
# Start new container | |
local new_container_id | |
new_container_id=$(docker run -d \ | |
--name "$new_name" \ | |
--label "service=$SERVICE_NAME" \ | |
--label "update_session=$$" \ | |
$env_vars \ | |
$port_mappings \ | |
$volumes \ | |
"$NEW_IMAGE" 2>/dev/null) | |
if [[ -n "$new_container_id" ]]; then | |
success "Started new container: $new_container_id" | |
echo "$new_container_id" | |
else | |
error "Failed to start new container" | |
return 1 | |
fi | |
} | |
# Perform rolling update | |
rolling_update() { | |
local containers | |
containers=$(get_current_containers) | |
if [[ -z "$containers" ]]; then | |
error "No running containers found for service: $SERVICE_NAME" | |
return 1 | |
fi | |
info "Starting rolling update for service: $SERVICE_NAME" | |
info "Current containers: $(echo "$containers" | wc -l)" | |
local updated_containers=() | |
local failed=false | |
# Process containers in batches | |
while IFS= read -r container_id; do | |
info "Updating container: $container_id" | |
# Start new container | |
local new_container_id | |
if new_container_id=$(start_new_container "$container_id"); then | |
# Wait for new container to be healthy | |
if [[ "$DRY_RUN" == "false" ]]; then | |
sleep 5 # Give container time to start | |
if health_check "$HEALTH_URL"; then | |
success "New container is healthy, stopping old container" | |
# Stop old container | |
docker stop "$container_id" >/dev/null 2>&1 || warning "Failed to stop container $container_id" | |
docker rm "$container_id" >/dev/null 2>&1 || warning "Failed to remove container $container_id" | |
updated_containers+=("$new_container_id") | |
else | |
error "New container failed health check" | |
# Clean up failed container | |
docker stop "$new_container_id" >/dev/null 2>&1 || true | |
docker rm "$new_container_id" >/dev/null 2>&1 || true | |
failed=true | |
break | |
fi | |
else | |
updated_containers+=("$new_container_id") | |
fi | |
else | |
error "Failed to start new container for $container_id" | |
failed=true | |
break | |
fi | |
info "Container update completed successfully" | |
done <<< "$containers" | |
if [[ "$failed" == "true" ]]; then | |
error "Rolling update failed" | |
return 1 | |
else | |
success "Rolling update completed successfully" | |
return 0 | |
fi | |
} | |
# Rollback to previous state | |
rollback() { | |
local backup_file="$1" | |
error "Rolling back to previous state" | |
info "Using backup: $backup_file" | |
if [[ "$DRY_RUN" == "true" ]]; then | |
info "[DRY RUN] Would rollback using: $backup_file" | |
return 0 | |
fi | |
# Stop current containers | |
local current_containers | |
current_containers=$(get_current_containers) | |
if [[ -n "$current_containers" ]]; then | |
info "Stopping current containers" | |
while IFS= read -r container_id; do | |
docker stop "$container_id" >/dev/null 2>&1 || warning "Failed to stop container $container_id" | |
docker rm "$container_id" >/dev/null 2>&1 || warning "Failed to remove container $container_id" | |
done <<< "$current_containers" | |
fi | |
# Restore from backup | |
if [[ -f "$backup_file" ]]; then | |
info "Restoring containers from backup" | |
local container_count | |
container_count=$(jq -r '.containers | length' "$backup_file" 2>/dev/null || echo "0") | |
for ((i=0; i<container_count; i++)); do | |
local container_image | |
container_image=$(jq -r ".containers[$i].image" "$backup_file" 2>/dev/null) | |
local container_name | |
container_name=$(jq -r ".containers[$i].name" "$backup_file" 2>/dev/null) | |
if [[ -n "$container_image" && "$container_image" != "null" ]]; then | |
info "Restoring container: $container_name with image: $container_image" | |
# Pull the old image if needed | |
docker pull "$container_image" >/dev/null 2>&1 || true | |
# Start container (simplified - you may need to restore full configuration) | |
docker run -d \ | |
--name "${container_name}_restored_$$" \ | |
--label "service=$SERVICE_NAME" \ | |
"$container_image" >/dev/null 2>&1 || error "Failed to restore container $container_name" | |
fi | |
done | |
success "Rollback completed" | |
else | |
error "Backup file not found: $backup_file" | |
return 1 | |
fi | |
} | |
# Main function | |
main() { | |
parse_args "$@" | |
setup_directories | |
check_docker | |
info "Starting rolling update process" | |
info "Service: $SERVICE_NAME" | |
info "New image: $NEW_IMAGE" | |
info "Health URL: $HEALTH_URL" | |
info "Dry run: $DRY_RUN" | |
# Create backup | |
local backup_file | |
if backup_file=$(create_backup); then | |
info "Backup created successfully" | |
else | |
if [[ "$ROLLBACK_ON_FAILURE" == "true" ]]; then | |
error "Failed to create backup, aborting update" | |
exit 1 | |
else | |
warning "Failed to create backup, but continuing without rollback capability" | |
fi | |
fi | |
# Pull new image | |
if ! pull_new_image; then | |
error "Failed to pull new image, aborting update" | |
exit 1 | |
fi | |
# Perform rolling update | |
if rolling_update; then | |
success "Rolling update completed successfully!" | |
# Clean up old images (optional) | |
info "Cleaning up old images..." | |
docker image prune -f >/dev/null 2>&1 || true | |
else | |
error "Rolling update failed!" | |
if [[ "$ROLLBACK_ON_FAILURE" == "true" && -n "${backup_file:-}" ]]; then | |
warning "Initiating automatic rollback..." | |
if rollback "$backup_file"; then | |
success "Rollback completed successfully" | |
exit 1 # Still exit with error since update failed | |
else | |
error "Rollback also failed!" | |
exit 2 | |
fi | |
else | |
error "No rollback performed (disabled or no backup available)" | |
exit 1 | |
fi | |
fi | |
} | |
# Run main function with all arguments | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment