Last active
October 20, 2024 18:51
-
-
Save kaunjovi/d3c6ce88fe75bd3c4779d8bb1ce75f28 to your computer and use it in GitHub Desktop.
ML tutorial series. Investigating underfitting and overfitting using sklearn.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.tree import DecisionTreeRegressor | |
from sklearn.metrics import mean_absolute_error | |
if __name__ == "__main__" : | |
# https://www.kaggle.com/code/kaunjovi/exercise-underfitting-and-overfitting/edit | |
print(f"Hello world. Pandas version [{pd.__version__}]") | |
iowa_file_path = './data/home-data-for-ml-course/train.csv' | |
home_data = pd.read_csv(iowa_file_path) | |
print(f"home_date.shape [{ home_data.shape }]") | |
# The target that we want to predict. | |
y = home_data.SalePrice | |
print(f"The price that we would like to be able to predict") | |
print(f"{ y.head() }") | |
# The features. | |
print(f"The features that we feel should allow us to predict our Sales price") | |
features = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd'] | |
X = home_data[features] | |
print(f"{ X.head() }") | |
# Split into validation and training data | |
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1) | |
# Specify Model | |
iowa_model = DecisionTreeRegressor(random_state=1) | |
# Fit Model | |
iowa_model.fit(train_X, train_y) | |
# Make validation predictions and calculate mean absolute error | |
val_predictions = iowa_model.predict(val_X) | |
val_mae = mean_absolute_error(val_predictions, val_y) | |
print("Validation MAE: {:,.0f}".format(val_mae)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment