Last active
February 22, 2021 06:02
-
-
Save codez0mb1e/352e274356cf3326bbfb198383365ca5 to your computer and use it in GitHub Desktop.
ML flow config
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
default: | |
# Common ---- | |
experiment_dir: '.' # relative from project directory as root | |
data_dir: '../data' # relative from @experiment_dir as root | |
cache_dir: 'cache' # relative from @experiment_dir as root | |
output_dir: 'output' # relative from @experiment_dir as root | |
experiment_version: '2020-01-01' | |
stages: | |
# Downloading data stage config ---- | |
downloading: | |
data_version: '2020-01-01' | |
metadata: | |
target_columns: [] | |
hashing_columns: [] | |
# Preprocessing data stage config ---- | |
preprocessing: | |
data_version: '2020-01-01' | |
metadata: | |
unique_id: 'id' | |
binary: [] | |
datetime: [] | |
timestamp: [] | |
extra: [] | |
# Feature engineering stage config ---- | |
feature_engineering: | |
data_version: '2020-01-01' | |
metadata: | |
unique_id: 'id' | |
factors: [] | |
extra: [] | |
# Train model stage config ---- | |
train_model: | |
data_version: '2020-02-02' | |
metadata: | |
unique_id: 'id' | |
extra: ['f2'] | |
label: 'target' | |
weight: 'value' | |
timestamp: 'time' | |
factors_encoding: | |
type: 'TargetEncoding' | |
min_leveles_n: 3 | |
folds_n: 8 | |
fold_column: 'te_fold_n' | |
early_stopping: | |
rounds_n: 3 | |
metric: 'logloss' | |
folding: | |
folds_n: 8 | |
fold_column: 'fold_n' # must be null when folds_n has value | |
fold_assignment: NULL # must be null when fold_column has value | |
auto_ml: | |
include_algos: ['GBM', 'DRF'] # see available algos https://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/algo-params/include_algos.html | |
max_duration_secs: 3600 # limit the runtime in secs | |
max_models_n: 1000 # max trained models number | |
rounds_n: 5 # search will stop after not improving much over the best N random models | |
cross_validation: | |
top_n_models: 5 | |
final_model: | |
model_version: 'auto' | |
seed: 314 | |
# New data predictions stage config ---- | |
predict_new_data: | |
data_version: '2020-02-02' | |
model_version: '1601976544.59666' | |
metadata: | |
unique_id: 'id' | |
weight: 'W' | |
label: 'label' | |
threshold_searching_strategy: | |
name: 'quantile' | |
value: 0.8 | |
# Misc ---- | |
verbose: 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment