Last active
September 2, 2020 12:15
-
-
Save ndbroadbent/d394f8a6890eddcaeafe9223e8b50be5 to your computer and use it in GitHub Desktop.
A powerful CI caching tool for Google Cloud Storage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
# CI Cache Script for Google Cloud Storage | |
# TIP: Set CI_CACHE_VERBOSE=true while testing the script | |
# to show a list of all files that are compressed/extracted. | |
# Note that you might see "tar: write error" if there are too many | |
# files in the verbose output. | |
# Examples | |
# ---------------------------------------------------------------- | |
# | |
# - A simple CI job that always downloads the cache (if present), | |
# runs bundle install, and then uploads the new cache. | |
# CI_COMMIT_REF_NAME is the current git branch. | |
# (This is the same behavior as the GitLab CI cache.) | |
# | |
# CACHE_KEY="gems-$CI_COMMIT_REF_NAME" | |
# ci_cache download $CACHE_KEY | |
# bundle install --path vendor/bundle | |
# ci_cache upload $CACHE_KEY vendor/bundle .bundle/config | |
# | |
# | |
# - A CI job that only runs bundle install and uploads the new cache | |
# if there is a change in Gemfile or Gemfile.lock. | |
# CI_COMMIT_REF_NAME is the current git branch. | |
# (This uses an isolated cache for each branch. So CI will need to run | |
# bundle install from scratch each time you push a new branch. Look at the | |
# next example to see how this can be solved with a "fallback" cache key.) | |
# | |
# CACHE_KEY="gems-$CI_COMMIT_REF_NAME" | |
# BUNDLER_SOURCE_FILES="Gemfile Gemfile.lock" | |
# | |
# ci_cache download $CACHE_KEY | |
# if ! ci_cache check_hash $CACHE_KEY $BUNDLER_SOURCE_FILES; then | |
# bundle install --path vendor/bundle | |
# ci_cache upload $CACHE_KEY vendor/bundle .bundle/config | |
# ci_cache update_hash $CACHE_KEY $BUNDLER_SOURCE_FILES | |
# fi | |
# | |
# | |
# - A CI job that only runs bundle install and uploads the new cache | |
# if there is a change in Gemfile or Gemfile.lock. | |
# If a cache doesn't already exist for the current git branch, | |
# then fall back to downloading the cache for the master branch. | |
# | |
# CACHE_KEY="gems-$CI_COMMIT_REF_NAME" | |
# FALLBACK_CACHE_KEY="gems-master" | |
# BUNDLER_SOURCE_FILES="Gemfile Gemfile.lock" | |
# | |
# ci_cache download $CACHE_KEY $FALLBACK_CACHE_KEY | |
# if ! ci_cache check_hash_with_fallback \ | |
# $CACHE_KEY $FALLBACK_CACHE_KEY $BUNDLER_SOURCE_FILES; then | |
# bundle install --path vendor/bundle | |
# ci_cache upload $CACHE_KEY vendor/bundle .bundle/config | |
# ci_cache update_hash $CACHE_KEY $BUNDLER_SOURCE_FILES | |
# fi | |
if ! which gcloud > /dev/null 2>&1 || ! which gsutil > /dev/null 2>&1 ; then | |
echo "Could not find gcloud and/or gsutil." >&2 | |
echo "Please install the Google Cloud SDK: https://cloud.google.com/sdk/docs" >&2 | |
exit 1 | |
fi | |
if [ -z "$CI_CACHE_GCS_BUCKET" ]; then | |
echo "CI_CACHE_GCS_BUCKET environment variable is required!" >&2 | |
exit 1 | |
fi | |
if [ -z "$CI_CACHE_GCS_AUTH" ]; then | |
echo "CI_CACHE_GCS_AUTH environment variable is required!" >&2 | |
exit 1 | |
fi | |
COMMAND="$1" | |
# Argument Validation | |
# -------------------------------------------------- | |
case "$COMMAND" in | |
# The following commands support a cache key as the first argument. | |
remote_hash|check_hash|check_hash_with_fallback|update_hash|update_hash_manual|upload|download) | |
CACHE_KEY="$2" | |
if [ -z "$CACHE_KEY" ]; then | |
echo "The '$COMMAND' command requires a <cache key> argument." >&2 | |
exit 1 | |
fi | |
CACHE_KEY_REGEX="^[0-9A-Za-z_-]+$" | |
if ! [[ $CACHE_KEY =~ $CACHE_KEY_REGEX ]]; then | |
echo "Cache key '$CACHE_KEY' does not match: $CACHE_KEY_REGEX" >&2 | |
exit 1 | |
fi | |
case "$COMMAND" in | |
# The following commands support a fallback cache key as the second argument. | |
remote_hash|check_hash_with_fallback|download) | |
FALLBACK_CACHE_KEY="$3" | |
if [ -n "$FALLBACK_CACHE_KEY" ]; then | |
if ! [[ $FALLBACK_CACHE_KEY =~ $CACHE_KEY_REGEX ]]; then | |
echo "Fallback cache key '$FALLBACK_CACHE_KEY' \ | |
does not match: $CACHE_KEY_REGEX" >&2 | |
exit 1 | |
fi | |
# Always skip the fallback if it's the same as the first cache key. | |
if [ "$CACHE_KEY" = "$FALLBACK_CACHE_KEY" ]; then | |
unset FALLBACK_CACHE_KEY | |
fi | |
elif [ "$COMMAND" = "check_hash_with_fallback" ]; then | |
echo "Fallback cache key is required for check_hash_with_fallback" >&2 | |
exit 1 | |
fi | |
esac | |
esac | |
# Shared Functions | |
# -------------------------------------------------- | |
function calculate_sha256_hash() { | |
if [ -z "$1" ]; then | |
echo "Please provide at least one source file!" >&2 | |
exit 1 | |
fi | |
FILE_HASHES=() | |
for SOURCE_FILE in "$@"; do | |
if ! [ -f "$SOURCE_FILE" ]; then | |
echo "$SOURCE_FILE does not exist!" >&2 | |
return 1 | |
fi | |
FILE_HASHES+=($(sha256sum "$SOURCE_FILE" | cut -f1)) | |
done | |
echo "${FILE_HASHES[@]}" | sha256sum | cut -d" " -f 1 | |
} | |
function fetch_remote_sha256_hash() { | |
GCS_HASH_PATH=$(mktemp /tmp/cache-$1.XXXXXXXX) | |
GCS_HASH_KEY="$1.hash" | |
GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$GCS_HASH_KEY" | |
gsutil cp "$GCS_LOCATION" "$GCS_HASH_PATH" || true | |
if ! [ -f "$GCS_HASH_PATH" ]; then return; fi | |
cat "$GCS_HASH_PATH" | |
rm "$GCS_HASH_PATH" | |
} | |
# Commands | |
# -------------------------------------------------- | |
case "$COMMAND" in | |
authenticate) | |
if [ -z "$CI_CACHE_GCS_AUTH" ]; then | |
echo "CI_CACHE_GCS_AUTH environment variable is required!" >&2 | |
exit 1 | |
fi | |
GCS_AUTH_BASE64_PATH=$(mktemp /tmp/gcs_auth.json.base64.XXXXXXXX) | |
GCS_AUTH_PATH=$(mktemp /tmp/gcs_auth.json.XXXXXXXX) | |
echo "$CI_CACHE_GCS_AUTH" > "$GCS_AUTH_BASE64_PATH" | |
openssl base64 -d -A -in "$GCS_AUTH_BASE64_PATH" -out "$GCS_AUTH_PATH" | |
gcloud auth activate-service-account --key-file "$GCS_AUTH_PATH" | |
rm "$GCS_AUTH_BASE64_PATH" "$GCS_AUTH_PATH" | |
;; | |
local_hash) | |
calculate_sha256_hash "${@:2}" | |
;; | |
remote_hash) | |
REMOTE_HASH=$(fetch_remote_sha256_hash "$CACHE_KEY") | |
if [ -n "$REMOTE_HASH" ]; then | |
echo "$REMOTE_HASH" | |
exit | |
fi | |
if [ -n "$FALLBACK_CACHE_KEY" ]; then | |
fetch_remote_sha256_hash "$FALLBACK_CACHE_KEY" | |
fi | |
;; | |
check_hash|check_hash_with_fallback) | |
if [ "$COMMAND" = "check_hash" ]; then | |
LOCAL_HASH=$(calculate_sha256_hash "${@:3}") | |
else | |
LOCAL_HASH=$(calculate_sha256_hash "${@:4}") | |
fi | |
REMOTE_HASH=$(fetch_remote_sha256_hash "$CACHE_KEY") | |
if [ -z "$REMOTE_HASH" ]; then | |
echo "Could not find remote hash for cache key: $CACHE_KEY" >&2 | |
# Note: FALLBACK_CACHE_KEY can be unset if it's the same as CACHE_KEY. | |
if [ "$COMMAND" = "check_hash_with_fallback" ] && [ -n "$FALLBACK_CACHE_KEY" ]; then | |
REMOTE_HASH=$(fetch_remote_sha256_hash "$FALLBACK_CACHE_KEY") | |
IS_FALLBACK_KEY=true | |
if [ -z "$REMOTE_HASH" ]; then | |
echo "Could not find remote hash for fallback cache key: $FALLBACK_CACHE_KEY" >&2 | |
fi | |
fi | |
fi | |
if [ -z "$REMOTE_HASH" ]; then exit 1; fi | |
if [ "$REMOTE_HASH" = "$LOCAL_HASH" ]; then | |
if [ -n "$IS_FALLBACK_KEY" ]; then | |
echo "Hash has not changed for the $FALLBACK_CACHE_KEY fallback cache \ | |
($LOCAL_HASH)" >&2 | |
else | |
echo "Hash has not changed for the $CACHE_KEY cache ($LOCAL_HASH)" >&2 | |
fi | |
exit | |
fi | |
if [ -n "$IS_FALLBACK_KEY" ]; then | |
echo "Hash changed for the $FALLBACK_CACHE_KEY fallback cache!" >&2 | |
else | |
echo "Hash changed for the $CACHE_KEY cache!" >&2 | |
fi | |
echo "=> Previous: $REMOTE_HASH" >&2 | |
echo "=> Current: $LOCAL_HASH" >&2 | |
exit 1 | |
;; | |
update_hash|update_hash_manual) | |
if [ "$COMMAND" = "update_hash" ]; then | |
LOCAL_HASH=$(calculate_sha256_hash "${@:3}") | |
else | |
LOCAL_HASH="$3" | |
fi | |
GCS_HASH_PATH=$(mktemp /tmp/cache-$CACHE_KEY.XXXXXXXX) | |
echo "$LOCAL_HASH" > "$GCS_HASH_PATH" | |
GCS_HASH_KEY="$CACHE_KEY.hash" | |
GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$GCS_HASH_KEY" | |
echo "Updating hash for $CACHE_KEY cache ($GCS_LOCATION)..." | |
echo "=> New hash: $LOCAL_HASH" | |
gsutil cp "$GCS_HASH_PATH" "$GCS_LOCATION" | |
rm "$GCS_HASH_PATH" | |
;; | |
upload) | |
CACHE_PATH=$(mktemp /tmp/cache-$CACHE_KEY.XXXXXXXX) | |
echo "Saving files to gzip archive for $CACHE_KEY cache: $CACHE_PATH" | |
TAR_FLAGS="cz" | |
if [ -n "$CI_CACHE_VERBOSE" ]; then TAR_FLAGS="${TAR_FLAGS}v"; fi | |
tar "-$TAR_FLAGS" -f "$CACHE_PATH" "${@:3}" | |
GCS_KEY="$CACHE_KEY.tar.gz" | |
GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$GCS_KEY" | |
echo "Uploading $CACHE_KEY cache to $GCS_LOCATION..." | |
gsutil cp "$CACHE_PATH" "$GCS_LOCATION" | |
rm -f "$CACHE_PATH" | |
;; | |
download) | |
CACHE_PATH=$(mktemp /tmp/cache-$CACHE_KEY.XXXXXXXX) | |
GCS_KEY="$CACHE_KEY.tar.gz" | |
GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$GCS_KEY" | |
echo "Downloading $CACHE_KEY cache from $GCS_LOCATION to $CACHE_PATH..." >&2 | |
rm -f "$CACHE_PATH" | |
unset CACHE_DOWNLOAD_FAILED | |
gsutil cp "$GCS_LOCATION" "$CACHE_PATH" || true | |
if ! [ -f "$CACHE_PATH" ]; then | |
CACHE_DOWNLOAD_FAILED=true | |
else | |
CACHE_FILE_SIZE=$(du -k $CACHE_PATH | cut -f1) | |
if [ $CACHE_FILE_SIZE -eq 0 ]; then | |
echo "=> $CACHE_PATH is an empty file!" >&2 | |
CACHE_DOWNLOAD_FAILED=true | |
fi | |
fi | |
if [ -n "$CACHE_DOWNLOAD_FAILED" ]; then | |
echo "Could not download $CACHE_KEY cache!" >&2 | |
if [ -z "$FALLBACK_CACHE_KEY" ]; then exit; fi | |
FALLBACK_CACHE_PATH=$(mktemp /tmp/cache-$FALLBACK_CACHE_KEY.XXXXXXXX) | |
FALLBACK_GCS_KEY="$FALLBACK_CACHE_KEY.tar.gz" | |
FALLBACK_GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$FALLBACK_GCS_KEY" | |
echo "Downloading fallback $FALLBACK_CACHE_KEY cache from \ | |
$FALLBACK_GCS_LOCATION to $FALLBACK_CACHE_PATH..." >&2 | |
rm -f $FALLBACK_CACHE_PATH | |
unset CACHE_DOWNLOAD_FAILED | |
gsutil cp "$FALLBACK_GCS_LOCATION" "$FALLBACK_CACHE_PATH" || true | |
if ! [ -f "$FALLBACK_CACHE_PATH" ]; then | |
CACHE_DOWNLOAD_FAILED=true | |
else | |
CACHE_FILE_SIZE=$(du -k $FALLBACK_CACHE_PATH | cut -f1) | |
if [ $CACHE_FILE_SIZE -eq 0 ]; then | |
echo "=> $FALLBACK_CACHE_PATH is an empty file!" >&2 | |
CACHE_DOWNLOAD_FAILED=true | |
fi | |
fi | |
if [ -n "$CACHE_DOWNLOAD_FAILED" ]; then | |
echo "Could not download $FALLBACK_CACHE_KEY cache!" >&2 | |
exit | |
fi | |
fi | |
echo "Extracting $CACHE_KEY cache..." >&2 | |
TAR_FLAGS="xz" | |
if [ -n "$CI_CACHE_VERBOSE" ]; then TAR_FLAGS="${TAR_FLAGS}v"; fi | |
tar "-$TAR_FLAGS" -f "$CACHE_PATH" | |
rm "$CACHE_PATH" | |
echo "success" | |
;; | |
*) | |
cat <<HELP >&2 | |
Usage: $0 [ | |
authenticate | | |
local_hash | remote_hash | check_hash | check_hash_with_fallback | update_hash | |
upload | download | |
] | |
* authenticate | |
Sets up Google Cloud authentication for gsutil. | |
Looks for a base64 encoded JSON key in \$CI_CACHE_GCS_AUTH. | |
Generate this base64 string by running: | |
$ openssl base64 -A -in your-gcs-auth-key.json | |
* local_hash <source files...> | |
Shows the current SHA256 hash for a list of local source files. | |
* remote_hash <cache key> [fallback cache key] | |
Fetches and prints the saved SHA256 hash from the GCS bucket. | |
If a fallback cache key is specified, this cache key will be downloaded if | |
the original cache key is missing. | |
(This is useful for branches and pull requests, if you want to default | |
to downloading the cache for the master branch.) | |
* check_hash <cache key> <source files...> | |
Fetches the remote hash for <cache key>. | |
=> If no remote hash is found, exits with code 1. | |
Calculates the local hash for the source files. | |
=> If the local and remote hashes are different, exits with code 1. | |
=> Otherwise, if the hashes are the same, exits with code 0. | |
* check_hash_with_fallback <cache key> <fallback cache key> <source files...> | |
Fetch the remote hash for <cache key>. If this does not exist, | |
fetch the remote hash for <fallback cache key>. | |
=> If no remote hashes are found, exits with code 1. | |
Calculates the local hash for the source files. | |
=> If the local and remote hashes are different, exits with code 1. | |
=> Otherwise, if the hashes are the same, exits with code 0. | |
NOTE: The fallback hash is only fetched if the first hash does not exist | |
(but not if a non-matching hash is found.) | |
* update_hash <cache key> <source files...> | |
Calculates the SHA256 hash for a list of source files, | |
then updates the remote hash in GCS. | |
* update_hash_manual <cache key> <hash> | |
If you calculate a hash manually, use this set a custom hash in GCS. | |
For example, I run the following to calculate a hash for all files | |
in a directory that are tracked by git: | |
$ git ls-files <directory> | xargs sha256sum | cut -d" " -f1 | \ | |
sha256sum | cut -d" " -f1 | |
* upload <cache key> <cache paths...> | |
*You must run "$0 authenticate" before running this command.* | |
Creates a gzipped tar archive for all the cached files, then uploads | |
the archive to a GCS bucket (\$CI_CACHE_GCS_BUCKET). | |
The file key in GCS is "<cache key>.tar.gz". | |
* download <cache key> [fallback cache key] | |
*You must run "$0 authenticate" before running this command.* | |
Downloads and extracts the cached files from "<cache key>.tar.gz". | |
If a fallback cache key is specified, this will be downloaded if | |
the first cache is missing. | |
Note: All log messages are sent to stderr. The download command always | |
exits with code 0, even if no cache is found. | |
(So a failed cache download will never fail your CI build.) | |
=> If a cache could be downloaded, the script prints "success" to stdout. | |
=> If no cache could be downloaded, the script prints no output to stdout. | |
This means you can do: | |
DOWNLOAD_RESULT=\$($0 download my-cache) | |
if [ "\$DOWNLOAD_RESULT" = "success" ]; then | |
... | |
fi | |
HELP | |
exit 1 | |
;; | |
esac |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ... | |
# In a multistage CI build, you don't always need to download | |
# the cache during the setup stage. | |
# (You just need to see if the file hashes match.) | |
stages: | |
- setup | |
- test | |
install_dependencies: | |
stage: setup | |
script: | |
- '( | |
CACHE_PREFIX="gems"; | |
SOURCE_FILES="Gemfile Gemfile.lock" && | |
CACHE_FILES="vendor/bundle .bundle/config" && | |
CACHE_KEY="$CACHE_PREFIX-$CI_COMMIT_REF_NAME" && | |
FALLBACK_CACHE_KEY="$CACHE_PREFIX-master" && | |
if ! ./scripts/ci_cache check_hash_with_fallback | |
$CACHE_KEY $FALLBACK_CACHE_KEY $SOURCE_FILES; then | |
./scripts/ci_cache download "$CACHE_KEY" "$FALLBACK_CACHE_KEY" && | |
bundle install -j $(nproc) | |
--path "$BUNDLE_PATH" | |
--binstubs "$BUNDLE_BIN" && | |
./scripts/ci_cache upload "$CACHE_KEY" $CACHE_FILES && | |
./scripts/ci_cache update_hash "$CACHE_KEY" $SOURCE_FILES; | |
fi | |
)' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ... | |
rspec: | |
before_script: | |
- '( | |
CACHE_PREFIX="gems" && | |
SOURCE_FILES="Gemfile Gemfile.lock" && | |
CACHE_FILES="vendor/bundle .bundle/config" && | |
CACHE_KEY="$CACHE_PREFIX-$CI_COMMIT_REF_NAME" && | |
FALLBACK_CACHE_KEY="$CACHE_PREFIX-master" && | |
DOWNLOAD_RESULT="$(./scripts/ci_cache download "$CACHE_KEY" "$FALLBACK_CACHE_KEY")" && | |
if [ "$DOWNLOAD_RESULT" != "success" ] || | |
! ./scripts/ci_cache check_hash_with_fallback | |
$CACHE_KEY $FALLBACK_CACHE_KEY $SOURCE_FILES; then | |
bundle install -j $(nproc) | |
--path "$BUNDLE_PATH" | |
--binstubs "$BUNDLE_BIN" && | |
./scripts/ci_cache upload "$CACHE_KEY" $CACHE_FILES && | |
./scripts/ci_cache update_hash "$CACHE_KEY" $SOURCE_FILES; | |
fi | |
)' | |
script: | |
- rspec |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment