Created
March 28, 2024 19:03
-
-
Save gregsheremeta/7203bd8a3d1fbb5b8b354b961b12c293 to your computer and use it in GitHub Desktop.
iris-training-pipeline.yaml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PIPELINE DEFINITION | |
# Name: iris-training-pipeline | |
# Inputs: | |
# min_max_scaler: bool | |
# neighbors: int | |
# standard_scaler: bool | |
components: | |
comp-create-dataset: | |
executorLabel: exec-create-dataset | |
outputDefinitions: | |
artifacts: | |
iris_dataset: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
comp-normalize-dataset: | |
executorLabel: exec-normalize-dataset | |
inputDefinitions: | |
artifacts: | |
input_iris_dataset: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
parameters: | |
min_max_scaler: | |
parameterType: BOOLEAN | |
standard_scaler: | |
parameterType: BOOLEAN | |
outputDefinitions: | |
artifacts: | |
normalized_iris_dataset: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
comp-train-model: | |
executorLabel: exec-train-model | |
inputDefinitions: | |
artifacts: | |
normalized_iris_dataset: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
parameters: | |
n_neighbors: | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: system.Model | |
schemaVersion: 0.0.1 | |
deploymentSpec: | |
executors: | |
exec-create-dataset: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- create_dataset | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.6.0'\ | |
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef create_dataset(iris_dataset: Output[Dataset]):\n import pandas\ | |
\ as pd\n\n csv_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'\n\ | |
\ col_names = [\n 'Sepal_Length', 'Sepal_Width', 'Petal_Length',\ | |
\ 'Petal_Width', 'Labels'\n ]\n df = pd.read_csv(csv_url, names=col_names)\n\ | |
\n with open(iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n" | |
image: docker.io/python:3.9.17 | |
exec-normalize-dataset: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- normalize_dataset | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.6.0'\ | |
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ | |
\ 'scikit-learn==1.4.0' && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef normalize_dataset(\n input_iris_dataset: Input[Dataset],\n\ | |
\ normalized_iris_dataset: Output[Dataset],\n standard_scaler: bool,\n\ | |
\ min_max_scaler: bool,\n):\n if standard_scaler is min_max_scaler:\n\ | |
\ raise ValueError(\n 'Exactly one of standard_scaler\ | |
\ or min_max_scaler must be True.')\n\n import pandas as pd\n from\ | |
\ sklearn.preprocessing import MinMaxScaler\n from sklearn.preprocessing\ | |
\ import StandardScaler\n\n with open(input_iris_dataset.path) as f:\n\ | |
\ df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n if standard_scaler:\n\ | |
\ scaler = StandardScaler()\n if min_max_scaler:\n scaler\ | |
\ = MinMaxScaler()\n\n df = pd.DataFrame(scaler.fit_transform(df))\n\ | |
\ df['Labels'] = labels\n normalized_iris_dataset.metadata['state']\ | |
\ = \"Normalized\"\n with open(normalized_iris_dataset.path, 'w') as\ | |
\ f:\n df.to_csv(f)\n\n" | |
image: docker.io/python:3.9.17 | |
exec-train-model: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- train_model | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.6.0'\ | |
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ | |
\ 'scikit-learn==1.4.0' && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef train_model(\n normalized_iris_dataset: Input[Dataset],\n\ | |
\ model: Output[Model],\n n_neighbors: int,\n):\n import pickle\n\ | |
\n import pandas as pd\n from sklearn.model_selection import train_test_split\n\ | |
\ from sklearn.neighbors import KNeighborsClassifier\n\n with open(normalized_iris_dataset.path)\ | |
\ as f:\n df = pd.read_csv(f)\n\n y = df.pop('Labels')\n X\ | |
\ = df\n\n X_train, X_test, y_train, y_test = train_test_split(X, y,\ | |
\ random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\ | |
\ clf.fit(X_train, y_train)\n\n model.metadata['framework'] = 'scikit-learn'\n\ | |
\ with open(model.path, 'wb') as f:\n pickle.dump(clf, f)\n\n" | |
image: docker.io/python:3.9.17 | |
pipelineInfo: | |
name: iris-training-pipeline | |
root: | |
dag: | |
tasks: | |
create-dataset: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-create-dataset | |
taskInfo: | |
name: create-dataset | |
normalize-dataset: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-normalize-dataset | |
dependentTasks: | |
- create-dataset | |
inputs: | |
artifacts: | |
input_iris_dataset: | |
taskOutputArtifact: | |
outputArtifactKey: iris_dataset | |
producerTask: create-dataset | |
parameters: | |
min_max_scaler: | |
runtimeValue: | |
constant: false | |
standard_scaler: | |
runtimeValue: | |
constant: true | |
taskInfo: | |
name: normalize-dataset | |
train-model: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-train-model | |
dependentTasks: | |
- normalize-dataset | |
inputs: | |
artifacts: | |
normalized_iris_dataset: | |
taskOutputArtifact: | |
outputArtifactKey: normalized_iris_dataset | |
producerTask: normalize-dataset | |
parameters: | |
n_neighbors: | |
componentInputParameter: neighbors | |
taskInfo: | |
name: train-model | |
inputDefinitions: | |
parameters: | |
min_max_scaler: | |
parameterType: BOOLEAN | |
neighbors: | |
parameterType: NUMBER_INTEGER | |
standard_scaler: | |
parameterType: BOOLEAN | |
schemaVersion: 2.1.0 | |
sdkVersion: kfp-2.6.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment