Skip to content

Instantly share code, notes, and snippets.

@manodeep
Last active June 20, 2025 05:22
Show Gist options
  • Save manodeep/84f625f4e6d6d266508159d09ede3dab to your computer and use it in GitHub Desktop.
Save manodeep/84f625f4e6d6d266508159d09ede3dab to your computer and use it in GitHub Desktop.
Checking for reproducible atmosphere results between two experiments
#!/usr/bin/env bash
## Written by Manodeep Sinha (ACCESS-NRI, ANU) March 2025
## Has made use of https://www.shellcheck.net/ to identify potential errors
## in the script. Other code fragments taken out of Stackoverflow and alike sites
## are noted inline.
## LICENSE: MIT
echo "Setting up colors and font-styles ..."
## Setup colors and font-styles
# BLK=$(tput setaf 0)
RED=$(tput setaf 1)
GRN=$(tput setaf 2)
PURPLE=$(tput setaf 5);
BLD=$(tput bold)
DEF=$(tput sgr0)
echo "Setting up colors and font-styles ...done"
_pretty_print_test_result() {
[ $# -ne 4 ] && printf "Please give *exactly* four arguments (got %d)\n" $# && return 1
[ "$1" == "$2" ] && printf "Please provide two *different* directories as the first two arguments (got two identical args with value = '%s' and '%2')\n" "$1" "$2" && return 1
row_dir=$(basename "${1}")
col_dir=$(basename "${2}")
status="FAILED"
status_color="${RED}"
if [ "$4" -eq 0 ];
then
status="PASSED"
status_color="${GRN}"
fi
printf "%b" "Comparing ""$PURPLE""""${3}""$DEF"" in dirs = (""$BLD""$row_dir","$col_dir""$DEF""): Status - $status_color$status$DEF\n"
}
_grep_pattern_and_diff_two_files(){
[ $# -ne 3 ] && printf "Please give *exactly* three arguments -- first two should be unique filenames and the last the pattern to 'grep' for (got %d)\n" $# && return 1
[ "$1" == "$2" ] && printf "Please provide two *different* files as the first two arguments (got two identical args with value = '%s' and '%2')\n" "$1" "$2" && return 1
tmp_files=()
# This local declaration is important - otherwise seems to clobber the
# i variable within the for loop (for generating pairs of dirs) in main - MS 28 Mar, 2025
local i
for ((i = 1; i <= 2; i++));
do
fname="${!i}"
if [ ! -f "$fname" ]; then
printf "Did not find the expected output file for the component (tried %s) ...returning\n" "$fname"
return 1
fi
tmp_file="$(mktemp --suffix=.diff)"
tmp_files+=("${tmp_file}")
# Match the known string but don't print the matching bit (uses Perl syntax)
# Taken from this: https://askubuntu.com/a/530194
# The `-a` flag tells grep to process binary files as if they were text.
# The reason that is necessary is because (Spencer discovered this) grep was not
# returning all the matches to `[chksum]` (needed for the ocean). Turns out
# grep encountered a NUL byte in the log file (access-esm1.6.out in my case) half-way
# through and then proceeded to interpret the **remainder** of the file as *binary*.
# This was reflected in the output with the 'grep' message saying "Binary file access-esm1.6.out matches"
# (in hindsight, that should have been a give-away but my brain interpreted it as grep was grep'ing
# multiple files and I did not need that file ). I added the -I flag originally to suppress this warning
# which also meant that there was no further matches anyway. Now, the -a flag treats the file as text
# regardless of the bytes in the log file. Manodeep Sinha (3rd April, 2025)
grep -a -oP "$3" "$fname" | cat -n > "$tmp_file"
done
local res=0
cmp --silent "${tmp_files[0]}" "${tmp_files[1]}" || res=1
# cleanup and remove the two temporary files
for ((i = 0; i < 2; i++)); do
rm -f "${tmp_files[i]}"
done
return "$res"
}
_test_ocean() {
pattern='(\[chksum\] )\K.*'
_grep_pattern_and_diff_two_files "$1" "$2" "$pattern"
return "$?"
}
_test_atmosphere() {
pattern='(Final Absolute Norm :)\K.*'
_grep_pattern_and_diff_two_files "$1" "$2" "$pattern"
return "$?"
}
_check_and_print_results_in_two_dirs() {
[ $# -ne 2 ] && printf "Please give *exactly* two arguments (got %d)\n" $# && return 1
[ "$1" == "$2" ] && printf "Please provide two *different* directories (got two identical args with value = '%s' and '%2')\n" "$1" "$2" && return 1
full_result=0
## This bit could have been in a separate function but I would need to two fully
## generated filenames to pass to "test_atmosphere" - which would mean having to
## duplicate the "rel_filename"
need_to_check_atmosphere=1
rel_atm_filename="archive/output000/atmosphere/atm.fort6.pe0"
filenames=()
# This local declaration is important - otherwise seems to clobber the
# i variable within the for loop (for generating pairs of dirs) in main - MS 28 Mar, 2025
local i
for ((i = 1; i <= 2; i++));
do
fname="${!i}/${rel_atm_filename}"
if [ ! -f "$fname" ]; then
need_to_check_atmosphere=0
break
fi
filenames+=( "${fname}" )
done
if [ $need_to_check_atmosphere -gt 0 ];
then
_test_atmosphere "${filenames[0]}" "${filenames[1]}"
atm_result=$?
_pretty_print_test_result "${1}" "${2}" "Atmosphere" "$atm_result"
full_result=$(( full_result + atm_result ))
fi
need_to_check_ocean=1
# Need to parse the model-name from `config.yaml` - which usually
# looks like ``model: access-esm1.6``
rel_config_fname="config.yaml"
model_output_log_filenames=()
# This local declaration is important - otherwise seems to clobber the
# i variable within the for loop (for generating pairs of dirs) in main - MS 28 Mar, 2025
local i
for ((i = 1; i <= 2; i++));
do
fname="${!i}/${rel_config_fname}"
if [ ! -f "$fname" ];
then
need_to_check_ocean=0
break
else
# Check if there is a ocean component (under sub-models -> looking for text '-name: ocean'
# with arbitrary white-space between the characters)
if ! grep "\-\s*name\s*:\s*ocean" "$fname" > /dev/null 2>&1 ;
then
need_to_check_ocean=0
break
fi
fi
model_name=$(grep '^model:' "${fname}" | cut -d: -f2 | awk '{$1=$1};1')
res=$?
if [ $res != 0 ];
then
model_name="access-esm1.6"
echo "Warning: grep failed. Assuming defaul name = " "${model_name}"
fi
model_log_fname="${!i}/archive/output000/${model_name}.out"
if [ ! -f "$model_log_fname" ];
then
echo "Warning: Could not find output log file = " "${model_log_fname} - skipping the ocean-check"
need_to_check_ocean=0
break
fi
model_output_log_filenames+=( "${model_log_fname}" )
done
if [ $need_to_check_ocean -gt 0 ];
then
_test_ocean "${model_output_log_filenames[0]}" "${model_output_log_filenames[1]}"
ocean_result=$?
_pretty_print_test_result "${1}" "${2}" "Ocean" "$ocean_result"
full_result=$(( full_result + ocean_result ))
fi
return $full_result
}
main () {
[ $# -lt 2 ] && printf "Please give at least two arguments\n" && exit 1
# Check that all arguments are directories
echo "Checking that all arguments are directories ..."
for file in "${@}"
do
[ ! -d "$file" ] && printf "%s is not a directory.\nPlease supply a directory name\n" "$file" && exit 1
done
echo "Checking that all arguments are directories ...done"
echo "Checking that all arguments are unique directories ..."
# Check that none of the directories are duplicated
# Needs to be resilient against i) sym-links, ii) relative paths and iii) trailing slashes
# Adapted from https://stackoverflow.com/a/58662338
# The -A modifier makes the "unique" variable a dictionary
declare -A unique
for arg in "${@}"; do
fullpath="$(realpath "${arg}")"
if [[ -v unique[$fullpath] ]]; then
echo "Duplicate arg '${fullpath}'"
exit 1
fi
unique["${fullpath}"]= # The value doesn't matter; the empty string is as good as any
done
echo "Checking that all arguments are unique directories ...done"
ndirs=$#
any_fail=$(( ndirs * (ndirs - 1) / 2 ))
for ((i=1;i<ndirs;i++));
do
row_dir=$(basename "${!i}")
for ((j=i+1;j<=ndirs;j++));
do
col_dir=$(basename "${!j}")
_check_and_print_results_in_two_dirs "${!i}" "${!j}"
result=$?
if [ $result == 0 ];
then
any_fail=$(( any_fail - 1 ))
fi
done
done
return $any_fail
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment