Skip to content

Instantly share code, notes, and snippets.

@codez0mb1e
Last active February 22, 2021 06:02
Show Gist options
  • Save codez0mb1e/352e274356cf3326bbfb198383365ca5 to your computer and use it in GitHub Desktop.
Save codez0mb1e/352e274356cf3326bbfb198383365ca5 to your computer and use it in GitHub Desktop.
ML flow config
default:
# Common ----
experiment_dir: '.' # relative from project directory as root
data_dir: '../data' # relative from @experiment_dir as root
cache_dir: 'cache' # relative from @experiment_dir as root
output_dir: 'output' # relative from @experiment_dir as root
experiment_version: '2020-01-01'
stages:
# Downloading data stage config ----
downloading:
data_version: '2020-01-01'
metadata:
target_columns: []
hashing_columns: []
# Preprocessing data stage config ----
preprocessing:
data_version: '2020-01-01'
metadata:
unique_id: 'id'
binary: []
datetime: []
timestamp: []
extra: []
# Feature engineering stage config ----
feature_engineering:
data_version: '2020-01-01'
metadata:
unique_id: 'id'
factors: []
extra: []
# Train model stage config ----
train_model:
data_version: '2020-02-02'
metadata:
unique_id: 'id'
extra: ['f2']
label: 'target'
weight: 'value'
timestamp: 'time'
factors_encoding:
type: 'TargetEncoding'
min_leveles_n: 3
folds_n: 8
fold_column: 'te_fold_n'
early_stopping:
rounds_n: 3
metric: 'logloss'
folding:
folds_n: 8
fold_column: 'fold_n' # must be null when folds_n has value
fold_assignment: NULL # must be null when fold_column has value
auto_ml:
include_algos: ['GBM', 'DRF'] # see available algos https://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/algo-params/include_algos.html
max_duration_secs: 3600 # limit the runtime in secs
max_models_n: 1000 # max trained models number
rounds_n: 5 # search will stop after not improving much over the best N random models
cross_validation:
top_n_models: 5
final_model:
model_version: 'auto'
seed: 314
# New data predictions stage config ----
predict_new_data:
data_version: '2020-02-02'
model_version: '1601976544.59666'
metadata:
unique_id: 'id'
weight: 'W'
label: 'label'
threshold_searching_strategy:
name: 'quantile'
value: 0.8
# Misc ----
verbose: 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment