Created
February 13, 2019 04:18
-
-
Save legatoo/3f7e91a9c79d9171401c481a957b2d8f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#################################################################################################################### | |
# | |
# 说明: | |
# 无论单机版本还是分布式版本,请使用本脚本启动您的程序。本脚本默认项目具有一个run.py的主入口。 | |
# 脚本通过用户输入的参数确认执行本地版还是AFO版本。脚本默认从conf文件夹内加载配置。 | |
# * 对于local(默认)模式,从local_settings中以及hyper_params文件夹中抽取程序运行参数,抽取后输送给run.py执行 | |
# * 对于afo模式,加载afo_settings.xml文件,并将hyper_params中指定的超参配置添加入afo_settings.xml,传递给 | |
# afo进行执行 | |
# * (注)配置文件中可以使用顶头的#添加注释 | |
# | |
# 用法: | |
# sh run.sh [ --mode local | afo ] [ --hparam ][ --tag ] | |
# | |
# 参数说明: | |
# mode 必传参数。有local(本地版)和afo(分布式)两种模式。 | |
# hparam 必传参数。为了方便大家超参调试,我们允许用户将每一轮的超参放入独立的文件,并将文件名传入供脚本加载 | |
# tag 选填参数。为了方便大家比较多轮的训练,例如在tensorboard中展示多轮曲线。我们会在用户指定的checkpoint文件夹下保存各轮的结果 | |
# 每轮的结果保存在以tag名命名的文件夹内。tag默认为yyyyMMddHHmm格式,用户也可以通过tag参数手动指定 | |
# | |
#################################################################################################################### | |
#解析脚本Options | |
MODE="local" | |
HYPER_PARAM="" | |
TAG_ID=`/bin/date +"%Y%m%d%H%M"` | |
TIME_STAMP=`/bin/date +"%Y%m%d%H%M%S"` | |
while [[ $# -gt 0 ]] | |
do | |
case "$1" in | |
--mode) | |
MODE="$2" | |
shift | |
shift | |
;; | |
--hparam) | |
HYPER_PARAM="$2" | |
shift | |
shift | |
;; | |
--tag) | |
TAG_ID="$2" | |
shift | |
shift | |
;; | |
*) | |
echo "Unknown option $1. Supported options are [--mode local|afo] [--hparam] [--tag]" | |
exit 3 | |
;; | |
esac | |
done | |
echo "Running in [$MODE] mode with hyper param file [$HYPER_PARAM] and tag [${TAG_ID}] by [$MIS_ID]" | |
##配置文件中可以有注释,注意请使用顶头的#开始 | |
function skip_notation(){ | |
if [[ $1 =~ ^#.* ]];then | |
echo "skip annotation $1" | |
return 0 | |
fi | |
return 1 | |
} | |
# local_settings和hyper_param的参数使用 name=value的格式,一行一个 | |
function format_check(){ | |
if [[ ! $1 =~ .*=.* ]];then | |
echo "illegal param format $1" | |
echo "param has to defined in format name=value. one param one line." | |
return 1 | |
fi | |
#true | |
return 0 | |
} | |
function join() { | |
# $1 is return variable name | |
# $2 is sep | |
# $3... are the elements to join | |
local retname=$1 sep=$2 ret=$3 | |
shift 3 || shift $(($#)) | |
printf -v "$retname" "%s" "$ret${@/#/$sep}" | |
} | |
function run_local(){ | |
params=() | |
params+=("--script_mode=local") | |
while read -r line || [[ -n "$line" ]]; do | |
# 跳过注释 | |
if skip_notation $line;then | |
continue | |
fi | |
# 确保格式 | |
if ! format_check $line;then | |
exit 3 | |
fi | |
if [[ $line == *"@run_id@"* ]]; then | |
line="${line/@run_id@/$TAG_ID}" | |
fi | |
params+=("--$line") | |
done < conf/local_settings | |
if [[ -e conf/hyper_params/${HYPER_PARAM} ]]; then | |
while read -r line || [[ -n "$line" ]]; do | |
if ! format_check $line;then | |
exit 3 | |
fi | |
params+=("--$line") | |
done < conf/hyper_params/${HYPER_PARAM} | |
fi | |
echo "collected ${#params[@]} params." | |
printf '%s\n' "${params[@]}" | |
python run.py "${params[@]}" | |
} | |
function run_afo(){ | |
params=() | |
params+=("<!--${TIME_STAMP}-->") | |
params+=("<property><name>args.script_mode<\/name><value>afo<\/value><\/property>") | |
if [[ -e conf/hyper_params/${HYPER_PARAM} ]]; then | |
while read -r line || [[ -n "$line" ]]; do | |
if ! format_check $line;then | |
exit 3 | |
fi | |
# 先把参数用=split开 | |
IFS='=' read -r -a array <<< "$line" | |
#对每一个参数构建一个xml项 | |
params+=("<property><name>args.${array[0]}<\/name><value>${array[1]}<\/value><\/property>") | |
done < conf/hyper_params/${HYPER_PARAM} | |
fi | |
params+=("<!--${TIME_STAMP}-->") | |
#把param展开到afo_settings.xml的尾部 | |
join append '\ | |
' "${params[@]}" "<\/configuration>" | |
modified=/tmp/afo-settings.xml.backup.${TAG_ID} | |
sed "s/<\/configuration>/$append/" conf/afo_settings.xml > $modified | |
cat $modified > conf/afo_settings.xml | |
join to_print '\n' "${params[@]}" "<\/configuration>" | |
to_print=$(echo $to_print | sed "s/\\\//g") | |
echo "Params about to pass to AFO:\n$to_print" | |
#/opt/meituan/tensorflow-release/bin/tensorflow-submit.sh -conf ctr_hw_submit.xml -files ctr_main.py,ctr_estimator.py,run_setting.py,ctr_dataset.py,analysis.txt | |
} | |
function recovery_afo_settings(){ | |
recovery=/tmp/afo-settings.xml.recovery.${TAG_ID} | |
sed "/<!--${TIME_STAMP}-->/,/<!--${TIME_STAMP}-->/d" conf/afo_settings.xml > $recovery | |
cat $recovery > conf/afo_settings.xml | |
} | |
function finish() { | |
recovery_afo_settings | |
echo "bye." | |
exit 1 | |
} | |
#catch用户的ctrl-c行为,进行脚本恢复 | |
trap finish SIGINT | |
if [[ "$MODE" = "local" ]];then | |
if [[ ! -e conf/local_settings ]];then | |
echo "Unable to find file local_settings under conf directory. " | |
exit 3 | |
fi | |
run_local | |
fi | |
if [[ "$MODE" = "afo" ]];then | |
if [[ ! -e conf/afo_settings.xml ]];then | |
echo "Unable to find file afo_settings.xml under conf directory. " | |
exit 3 | |
fi | |
run_afo | |
recovery_afo_settings | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment