Created
November 13, 2019 22:44
-
-
Save Per48edjes/f211dc1e1b65c12c47a3d6055a43b3de to your computer and use it in GitHub Desktop.
Get column names from ColumnTransformer with embedded pipelines (SKLearn)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Split into holdout for purposes of imputation and encoding | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state=2019) | |
class_labels = df['segment_label'].cat.categories | |
## Preprocessing pipeline | |
# Define transforms on numeric types | |
numeric_features = X.select_dtypes(np.number).columns | |
numeric_transformer = Pipeline(steps=[ | |
('imputer', SimpleImputer(strategy='median')), | |
('scaler', StandardScaler())]) | |
# Define transforms on categorical types | |
categorical_features = X.select_dtypes(['object', 'bool', 'category']).columns | |
categorical_transformer = Pipeline(steps=[ | |
('imputer', SimpleImputer(strategy='constant', fill_value='missing')), | |
('onehot', OneHotEncoder(handle_unknown='ignore'))]) | |
# Construct ColumnTransformer object | |
preprocessor = ColumnTransformer( | |
transformers=[ | |
('num', numeric_transformer, numeric_features), | |
('cat', categorical_transformer, categorical_features) | |
], | |
remainder='drop', | |
verbose=True) | |
# Visual inspection of transformed dataframe before CV | |
preprocessor.fit_transform(X_train, y_train) | |
enc_cat_features = preprocessor.named_transformers_['cat']['onehot'].get_feature_names() | |
labels = np.concatenate([numeric_features, enc_cat_features]) | |
transformed_df_columns = pd.DataFrame(preprocessor.transform(X_train).toarray(), columns=labels).columns | |
pp.pprint(transformed_df_columns) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment