Created
January 7, 2023 18:44
-
-
Save geraldstanje/97223d34e5a95fa649cb377492be9410 to your computer and use it in GitHub Desktop.
databricks file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
custom: | |
mm_tags: &mm-tags | |
"Application Type": "Back End" | |
Product: Optimization | |
"Sub Department": "Data Science" | |
Department: Engineering | |
"Service Name": cmv | |
"Repo Name": cmv-st | |
"Purpose": prod | |
"Category": Production | |
mm_dev_tags: &mm-dev-tags | |
"Application Type": "Back End" | |
Product: Optimization | |
"Sub Department": "Data Science" | |
Department: Engineering | |
"Service Name": cmv | |
"Repo Name": cmv-st | |
"Purpose": dev | |
"Category": Development | |
model-cluster-props: &model-cluster-props | |
spark_version: "10.4.x-cpu-ml-scala2.12" | |
node_type_id: "i3.4xlarge" | |
init_scripts: | |
- dbfs: | |
"destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
spark_conf: | |
spark.master: "local[*, 4]" | |
spark.databricks.cluster.profile: "singleNode" | |
aws_attributes: | |
"first_on_demand": 1 | |
"availability": "ON_DEMAND" | |
"zone_id": "us-east-1e" | |
custom_tags: | |
<<: *mm-tags | |
model-dev-cluster-props: &model-dev-cluster-props | |
spark_version: "10.4.x-cpu-ml-scala2.12" | |
node_type_id: "i3.4xlarge" | |
init_scripts: | |
- dbfs: | |
"destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
spark_conf: | |
spark.master: "local[*, 4]" | |
spark.databricks.cluster.profile: "singleNode" | |
aws_attributes: | |
"first_on_demand": 1 | |
"availability": "ON_DEMAND" | |
"zone_id": "us-east-1e" | |
custom_tags: | |
<<: *mm-dev-tags | |
etl-cluster-props: &etl-cluster-props | |
spark_version: "10.4.x-cpu-ml-scala2.12" | |
node_type_id: "r4.4xlarge" | |
init_scripts: | |
- dbfs: | |
"destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
aws_attributes: | |
"first_on_demand": 1 | |
"availability": "SPOT_WITH_FALLBACK" | |
"zone_id": "us-east-1e" | |
"spot_bid_price_percent": 100 | |
"ebs_volume_type": "GENERAL_PURPOSE_SSD" | |
"ebs_volume_count": 1 | |
"ebs_volume_size": 100 | |
custom_tags: | |
<<: *mm-tags | |
etl-dev-cluster-props: &etl-dev-cluster-props | |
spark_version: "10.4.x-cpu-ml-scala2.12" | |
node_type_id: "r4.4xlarge" | |
init_scripts: | |
- dbfs: | |
"destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
aws_attributes: | |
"first_on_demand": 1 | |
"availability": "SPOT_WITH_FALLBACK" | |
"zone_id": "us-east-1e" | |
"spot_bid_price_percent": 100 | |
"ebs_volume_type": "GENERAL_PURPOSE_SSD" | |
"ebs_volume_count": 1 | |
"ebs_volume_size": 100 | |
custom_tags: | |
<<: *mm-dev-tags | |
curve-cluster-props: &curve-cluster-props | |
spark_version: "10.4.x-cpu-ml-scala2.12" | |
node_type_id: "r4.2xlarge" | |
init_scripts: | |
- dbfs: | |
"destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
driver_node_type_id: "r4.8xlarge" | |
spark_conf: | |
spark.driver.maxResultSize: 0 | |
aws_attributes: | |
"first_on_demand": 1 | |
"availability": "ON_DEMAND" | |
"zone_id": "us-east-1e" | |
"ebs_volume_type": "GENERAL_PURPOSE_SSD" | |
"ebs_volume_count": 1 | |
"ebs_volume_size": 100 | |
custom_tags: | |
<<: *mm-tags | |
curve-dev-cluster-props: &curve-dev-cluster-props | |
spark_version: "10.4.x-cpu-ml-scala2.12" | |
node_type_id: "r4.2xlarge" | |
init_scripts: | |
- dbfs: | |
"destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
driver_node_type_id: "r4.8xlarge" | |
spark_conf: | |
spark.driver.maxResultSize: 0 | |
aws_attributes: | |
"first_on_demand": 1 | |
"availability": "ON_DEMAND" | |
"zone_id": "us-east-1e" | |
"ebs_volume_type": "GENERAL_PURPOSE_SSD" | |
"ebs_volume_count": 1 | |
"ebs_volume_size": 100 | |
custom_tags: | |
<<: *mm-dev-tags | |
etl-auto-scale-props: &etl-auto-scale-props | |
autoscale: | |
min_workers: 2 | |
max_workers: 8 | |
curve-auto-scale-props: &curve-auto-scale-props | |
autoscale: | |
min_workers: 2 | |
max_workers: 8 | |
etl-static-cluster: &etl-static-cluster | |
new_cluster: | |
<<: *etl-cluster-props | |
num_workers: 2 | |
model-static-cluster: &model-static-cluster | |
new_cluster: | |
<<: *model-cluster-props | |
num_workers: 0 | |
model-dev-static-cluster: &model-dev-static-cluster | |
new_cluster: | |
<<: *model-dev-cluster-props | |
num_workers: 0 | |
etl-autoscale-cluster: &etl-autoscale-cluster | |
new_cluster: | |
<<: # merge these two maps and place them here. | |
- *etl-cluster-props | |
- *etl-auto-scale-props | |
etl-dev-autoscale-cluster: &etl-dev-autoscale-cluster | |
new_cluster: | |
<<: # merge these two maps and place them here. | |
- *etl-dev-cluster-props | |
- *etl-auto-scale-props | |
curve-autoscale-cluster: &curve-autoscale-cluster | |
new_cluster: | |
<<: # merge these two maps and place them here. | |
- *curve-cluster-props | |
- *curve-auto-scale-props | |
curve-dev-autoscale-cluster: &curve-dev-autoscale-cluster | |
new_cluster: | |
<<: # merge these two maps and place them here. | |
- *curve-dev-cluster-props | |
- *curve-auto-scale-props | |
build: | |
python: "poetry" | |
environments: | |
default: | |
workflows: | |
- name: "bidstat-reader" | |
<<: *etl-dev-autoscale-cluster | |
email_notifications: | |
on_start: [ "[email protected]", "[email protected]", "[email protected]" ] | |
on_success: [ "[email protected]", "[email protected]", "[email protected]" ] | |
on_failure: [ "[email protected]", "[email protected]", "[email protected]" ] | |
no_alert_for_skipped_runs: false | |
spark_python_task: | |
python_file: "file://src/bidstat_reader.py" | |
- name: "experiment" | |
<<: *model-dev-static-cluster | |
max_concurrent_runs: 3 | |
spark_python_task: | |
python_file: "file://src/experiment_tf.py" | |
- name: "curve" | |
<<: *curve-dev-autoscale-cluster | |
max_concurrent_runs: 3 | |
spark_python_task: | |
python_file: "file://src/curve.py" | |
- name: "evaluate" | |
<<: *curve-dev-autoscale-cluster | |
spark_python_task: | |
python_file: "file://src/evaluate_tf.py" | |
- name: "predict" | |
<<: *model-dev-static-cluster | |
spark_python_task: | |
python_file: "file://src/predict.py" | |
- name: "model_plot" | |
<<: *model-dev-static-cluster | |
spark_python_task: | |
python_file: "file://src/model_plot.py" | |
dev: | |
workflows: | |
- name: "cmv3-dev" | |
format: MULTI_TASK | |
email_notifications: | |
on_start: [ "[email protected]", "[email protected]", "[email protected]" ] | |
on_success: [ "[email protected]", "[email protected]", "[email protected]" ] | |
on_failure: [ "[email protected]", "[email protected]", "[email protected]" ] | |
no_alert_for_skipped_runs: false | |
job_clusters: | |
- job_cluster_key: "etl-cluster" | |
<<: *etl-dev-autoscale-cluster | |
- job_cluster_key: "model-cluster" | |
<<: *model-dev-static-cluster | |
- job_cluster_key: "curve-cluster" | |
<<: *curve-dev-autoscale-cluster | |
tasks: | |
- task_key: "bidstat-reader" | |
job_cluster_key: "etl-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/bidstat_reader.py" | |
parameters: [ "--config-file", "generate_data_dev.yaml"] | |
- task_key: "model-generation" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/experiment_tf.py" | |
parameters: [ "--config-file", "model_dev.yaml"] | |
depends_on: | |
- task_key: "bidstat-reader" | |
- task_key: "curve-building" | |
job_cluster_key: "curve-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/curve.py" | |
parameters: [ "--config-file", "model_dev.yaml" ] | |
depends_on: | |
- task_key: "model-generation" | |
- task_key: "evaluate-results" | |
job_cluster_key: "curve-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/evaluate_tf.py" | |
parameters: [ "--config-file", "model_dev.yaml"] | |
depends_on: | |
- task_key: "curve-building" | |
- task_key: "copy-artifacts" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/serving_artifacts.py" | |
parameters: [ "--config-file", "model_dev.yaml"] | |
depends_on: | |
- task_key: "curve-building" | |
- task_key: "model-plot" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/model_plot.py" | |
parameters: [ "--config-file", "model_dev.yaml" ] | |
depends_on: | |
- task_key: "copy-artifacts" | |
prod_model_a: | |
workflows: | |
- name: "cmv3-prod-model-a" | |
format: MULTI_TASK | |
email_notifications: | |
on_start: [ "[email protected]", "[email protected]", "[email protected]" ] | |
on_success: [ "[email protected]", "[email protected]", "[email protected]" ] | |
on_failure: [ "[email protected]", "[email protected]", "[email protected]" ] | |
no_alert_for_skipped_runs: false | |
job_clusters: | |
- job_cluster_key: "etl-cluster" | |
<<: *etl-autoscale-cluster | |
- job_cluster_key: "model-cluster" | |
<<: *model-static-cluster | |
- job_cluster_key: "curve-cluster" | |
<<: *curve-autoscale-cluster | |
max_concurrent_runs: 2 | |
tasks: | |
- task_key: "bidstat-reader" | |
job_cluster_key: "etl-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/bidstat_reader.py" | |
parameters: ["--config-file", "generate_data_cb_prod_310.yaml", "--start_date", "T-4"] | |
- task_key: "model-generation" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/experiment_tf.py" | |
parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"] | |
depends_on: | |
- task_key: "bidstat-reader" | |
- task_key: "curve-building" | |
job_cluster_key: "curve-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/curve.py" | |
parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"] | |
depends_on: | |
- task_key: "model-generation" | |
- task_key: "evaluate-results" | |
job_cluster_key: "curve-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/evaluate_tf.py" | |
parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"] | |
depends_on: | |
- task_key: "curve-building" | |
- task_key: "copy-artifacts" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/serving_artifacts.py" | |
parameters: [ "--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1" ] | |
depends_on: | |
- task_key: "curve-building" | |
- task_key: "model-plot" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/model_plot.py" | |
parameters: [ "--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1" ] | |
depends_on: | |
- task_key: "copy-artifacts" | |
prod_model_b: | |
workflows: | |
- name: "cmv3-prod-model-b" | |
format: MULTI_TASK | |
email_notifications: | |
on_start: [ "[email protected]", "[email protected]", "[email protected]" ] | |
on_success: [ "[email protected]", "[email protected]", "[email protected]" ] | |
on_failure: [ "[email protected]", "[email protected]", "[email protected]" ] | |
no_alert_for_skipped_runs: false | |
job_clusters: | |
- job_cluster_key: "etl-cluster" | |
<<: *etl-autoscale-cluster | |
- job_cluster_key: "model-cluster" | |
<<: *model-static-cluster | |
- job_cluster_key: "curve-cluster" | |
<<: *curve-autoscale-cluster | |
max_concurrent_runs: 2 | |
tasks: | |
- task_key: "bidstat-reader" | |
job_cluster_key: "etl-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/bidstat_reader.py" | |
parameters: [ "--config-file", "generate_data_cb_prod_320.yaml", "--start_date", "T-7" ] | |
- task_key: "model-generation" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/experiment_tf.py" | |
parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
depends_on: | |
- task_key: "bidstat-reader" | |
- task_key: "curve-building" | |
job_cluster_key: "curve-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/curve.py" | |
parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
depends_on: | |
- task_key: "model-generation" | |
- task_key: "evaluate-results" | |
job_cluster_key: "curve-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/evaluate_tf.py" | |
parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
depends_on: | |
- task_key: "curve-building" | |
- task_key: "copy-artifacts" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/serving_artifacts.py" | |
parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
depends_on: | |
- task_key: "curve-building" | |
- task_key: "model-plot" | |
job_cluster_key: "model-cluster" | |
max_retries: 0 | |
spark_python_task: | |
python_file: "file://src/model_plot.py" | |
parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
depends_on: | |
- task_key: "copy-artifacts" | |
data: | |
workflows: | |
- name: "bidstat-reader" | |
spark_python_task: | |
python_file: "file://src/bidstat_reader.py" | |
experiment: | |
workflows: | |
- name: "experiment" | |
spark_python_task: | |
python_file: "file://src/experiment_tf.py" | |
curve: | |
workflows: | |
- name: "curve" | |
spark_python_task: | |
python_file: "file://src/curve.py" | |
evaluate: | |
workflows: | |
- name: "evaluate" | |
spark_python_task: | |
python_file: "file://src/evaluate_tf.py" | |
predict: | |
workflows: | |
- name: "predict" | |
spark_python_task: | |
python_file: "file://src/predict.py" | |
model_plot: | |
workflows: | |
- name: "model_plot" | |
spark_python_task: | |
python_file: "file://src/model_plot.py" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment