Last active
June 20, 2025 05:22
-
-
Save manodeep/84f625f4e6d6d266508159d09ede3dab to your computer and use it in GitHub Desktop.
Checking for reproducible atmosphere results between two experiments
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
## Written by Manodeep Sinha (ACCESS-NRI, ANU) March 2025 | |
## Has made use of https://www.shellcheck.net/ to identify potential errors | |
## in the script. Other code fragments taken out of Stackoverflow and alike sites | |
## are noted inline. | |
## LICENSE: MIT | |
echo "Setting up colors and font-styles ..." | |
## Setup colors and font-styles | |
# BLK=$(tput setaf 0) | |
RED=$(tput setaf 1) | |
GRN=$(tput setaf 2) | |
PURPLE=$(tput setaf 5); | |
BLD=$(tput bold) | |
DEF=$(tput sgr0) | |
echo "Setting up colors and font-styles ...done" | |
_pretty_print_test_result() { | |
[ $# -ne 4 ] && printf "Please give *exactly* four arguments (got %d)\n" $# && return 1 | |
[ "$1" == "$2" ] && printf "Please provide two *different* directories as the first two arguments (got two identical args with value = '%s' and '%2')\n" "$1" "$2" && return 1 | |
row_dir=$(basename "${1}") | |
col_dir=$(basename "${2}") | |
status="FAILED" | |
status_color="${RED}" | |
if [ "$4" -eq 0 ]; | |
then | |
status="PASSED" | |
status_color="${GRN}" | |
fi | |
printf "%b" "Comparing ""$PURPLE""""${3}""$DEF"" in dirs = (""$BLD""$row_dir","$col_dir""$DEF""): Status - $status_color$status$DEF\n" | |
} | |
_grep_pattern_and_diff_two_files(){ | |
[ $# -ne 3 ] && printf "Please give *exactly* three arguments -- first two should be unique filenames and the last the pattern to 'grep' for (got %d)\n" $# && return 1 | |
[ "$1" == "$2" ] && printf "Please provide two *different* files as the first two arguments (got two identical args with value = '%s' and '%2')\n" "$1" "$2" && return 1 | |
tmp_files=() | |
# This local declaration is important - otherwise seems to clobber the | |
# i variable within the for loop (for generating pairs of dirs) in main - MS 28 Mar, 2025 | |
local i | |
for ((i = 1; i <= 2; i++)); | |
do | |
fname="${!i}" | |
if [ ! -f "$fname" ]; then | |
printf "Did not find the expected output file for the component (tried %s) ...returning\n" "$fname" | |
return 1 | |
fi | |
tmp_file="$(mktemp --suffix=.diff)" | |
tmp_files+=("${tmp_file}") | |
# Match the known string but don't print the matching bit (uses Perl syntax) | |
# Taken from this: https://askubuntu.com/a/530194 | |
# The `-a` flag tells grep to process binary files as if they were text. | |
# The reason that is necessary is because (Spencer discovered this) grep was not | |
# returning all the matches to `[chksum]` (needed for the ocean). Turns out | |
# grep encountered a NUL byte in the log file (access-esm1.6.out in my case) half-way | |
# through and then proceeded to interpret the **remainder** of the file as *binary*. | |
# This was reflected in the output with the 'grep' message saying "Binary file access-esm1.6.out matches" | |
# (in hindsight, that should have been a give-away but my brain interpreted it as grep was grep'ing | |
# multiple files and I did not need that file ). I added the -I flag originally to suppress this warning | |
# which also meant that there was no further matches anyway. Now, the -a flag treats the file as text | |
# regardless of the bytes in the log file. Manodeep Sinha (3rd April, 2025) | |
grep -a -oP "$3" "$fname" | cat -n > "$tmp_file" | |
done | |
local res=0 | |
cmp --silent "${tmp_files[0]}" "${tmp_files[1]}" || res=1 | |
# cleanup and remove the two temporary files | |
for ((i = 0; i < 2; i++)); do | |
rm -f "${tmp_files[i]}" | |
done | |
return "$res" | |
} | |
_test_ocean() { | |
pattern='(\[chksum\] )\K.*' | |
_grep_pattern_and_diff_two_files "$1" "$2" "$pattern" | |
return "$?" | |
} | |
_test_atmosphere() { | |
pattern='(Final Absolute Norm :)\K.*' | |
_grep_pattern_and_diff_two_files "$1" "$2" "$pattern" | |
return "$?" | |
} | |
_check_and_print_results_in_two_dirs() { | |
[ $# -ne 2 ] && printf "Please give *exactly* two arguments (got %d)\n" $# && return 1 | |
[ "$1" == "$2" ] && printf "Please provide two *different* directories (got two identical args with value = '%s' and '%2')\n" "$1" "$2" && return 1 | |
full_result=0 | |
## This bit could have been in a separate function but I would need to two fully | |
## generated filenames to pass to "test_atmosphere" - which would mean having to | |
## duplicate the "rel_filename" | |
need_to_check_atmosphere=1 | |
rel_atm_filename="archive/output000/atmosphere/atm.fort6.pe0" | |
filenames=() | |
# This local declaration is important - otherwise seems to clobber the | |
# i variable within the for loop (for generating pairs of dirs) in main - MS 28 Mar, 2025 | |
local i | |
for ((i = 1; i <= 2; i++)); | |
do | |
fname="${!i}/${rel_atm_filename}" | |
if [ ! -f "$fname" ]; then | |
need_to_check_atmosphere=0 | |
break | |
fi | |
filenames+=( "${fname}" ) | |
done | |
if [ $need_to_check_atmosphere -gt 0 ]; | |
then | |
_test_atmosphere "${filenames[0]}" "${filenames[1]}" | |
atm_result=$? | |
_pretty_print_test_result "${1}" "${2}" "Atmosphere" "$atm_result" | |
full_result=$(( full_result + atm_result )) | |
fi | |
need_to_check_ocean=1 | |
# Need to parse the model-name from `config.yaml` - which usually | |
# looks like ``model: access-esm1.6`` | |
rel_config_fname="config.yaml" | |
model_output_log_filenames=() | |
# This local declaration is important - otherwise seems to clobber the | |
# i variable within the for loop (for generating pairs of dirs) in main - MS 28 Mar, 2025 | |
local i | |
for ((i = 1; i <= 2; i++)); | |
do | |
fname="${!i}/${rel_config_fname}" | |
if [ ! -f "$fname" ]; | |
then | |
need_to_check_ocean=0 | |
break | |
else | |
# Check if there is a ocean component (under sub-models -> looking for text '-name: ocean' | |
# with arbitrary white-space between the characters) | |
if ! grep "\-\s*name\s*:\s*ocean" "$fname" > /dev/null 2>&1 ; | |
then | |
need_to_check_ocean=0 | |
break | |
fi | |
fi | |
model_name=$(grep '^model:' "${fname}" | cut -d: -f2 | awk '{$1=$1};1') | |
res=$? | |
if [ $res != 0 ]; | |
then | |
model_name="access-esm1.6" | |
echo "Warning: grep failed. Assuming defaul name = " "${model_name}" | |
fi | |
model_log_fname="${!i}/archive/output000/${model_name}.out" | |
if [ ! -f "$model_log_fname" ]; | |
then | |
echo "Warning: Could not find output log file = " "${model_log_fname} - skipping the ocean-check" | |
need_to_check_ocean=0 | |
break | |
fi | |
model_output_log_filenames+=( "${model_log_fname}" ) | |
done | |
if [ $need_to_check_ocean -gt 0 ]; | |
then | |
_test_ocean "${model_output_log_filenames[0]}" "${model_output_log_filenames[1]}" | |
ocean_result=$? | |
_pretty_print_test_result "${1}" "${2}" "Ocean" "$ocean_result" | |
full_result=$(( full_result + ocean_result )) | |
fi | |
return $full_result | |
} | |
main () { | |
[ $# -lt 2 ] && printf "Please give at least two arguments\n" && exit 1 | |
# Check that all arguments are directories | |
echo "Checking that all arguments are directories ..." | |
for file in "${@}" | |
do | |
[ ! -d "$file" ] && printf "%s is not a directory.\nPlease supply a directory name\n" "$file" && exit 1 | |
done | |
echo "Checking that all arguments are directories ...done" | |
echo "Checking that all arguments are unique directories ..." | |
# Check that none of the directories are duplicated | |
# Needs to be resilient against i) sym-links, ii) relative paths and iii) trailing slashes | |
# Adapted from https://stackoverflow.com/a/58662338 | |
# The -A modifier makes the "unique" variable a dictionary | |
declare -A unique | |
for arg in "${@}"; do | |
fullpath="$(realpath "${arg}")" | |
if [[ -v unique[$fullpath] ]]; then | |
echo "Duplicate arg '${fullpath}'" | |
exit 1 | |
fi | |
unique["${fullpath}"]= # The value doesn't matter; the empty string is as good as any | |
done | |
echo "Checking that all arguments are unique directories ...done" | |
ndirs=$# | |
any_fail=$(( ndirs * (ndirs - 1) / 2 )) | |
for ((i=1;i<ndirs;i++)); | |
do | |
row_dir=$(basename "${!i}") | |
for ((j=i+1;j<=ndirs;j++)); | |
do | |
col_dir=$(basename "${!j}") | |
_check_and_print_results_in_two_dirs "${!i}" "${!j}" | |
result=$? | |
if [ $result == 0 ]; | |
then | |
any_fail=$(( any_fail - 1 )) | |
fi | |
done | |
done | |
return $any_fail | |
} | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment