Created
August 10, 2022 22:31
-
-
Save jthomasmock/2456fc15207d1f40f922dcd78d044bb0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Intro to Grammar of Graphics" | |
subtitle: "seaborn vs plotnine" | |
author: "Credit to @ChelseaParlett" | |
jupyter: venv-prez-kernel | |
format: | |
revealjs: | |
height: 1400 | |
width: 1920 | |
execute: | |
echo: true | |
eval: true | |
--- | |
## | |
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Started translating my 📊plotnine data viz lectures into 📈seaborn (so students can see both). <br><br>💭and I have some thoughts.<br><br>Here's the notebook with plotnine (commented out) and seaborn plots: <a href="https://t.co/Z36rodGFUU">https://t.co/Z36rodGFUU</a> <a href="https://t.co/Hw39RxbVS9">pic.twitter.com/Hw39RxbVS9</a></p>— Chelsea Parlett-Pelleriti (@ChelseaParlett) <a href="https://twitter.com/ChelseaParlett/status/1557482309565177859?ref_src=twsrc%5Etfw">August 10, 2022</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script> | |
## Library and Prep | |
```{python} | |
import warnings | |
warnings.filterwarnings('ignore') | |
import pandas as pd | |
import numpy as np | |
from plotnine import * | |
# from plotnine.data import mtcars | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
mtcars = pd.read_csv("https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv") | |
figure_size = (6.4, 4.8) | |
%matplotlib inline | |
``` | |
## Scatterplot | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(mtcars, aes(x = "wt", y = "mpg")) + geom_point()) | |
``` | |
## seaborn | |
```{python} | |
sns.scatterplot(x = "wt", y = "mpg", data = mtcars) | |
``` | |
::: | |
## Smooth | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(mtcars, aes(x = "wt", y = "mpg")) + | |
geom_point() + | |
stat_smooth(method = "lm")) | |
``` | |
## seaborn | |
```{python} | |
sns.regplot(x = "wt", y = "mpg", data = mtcars) | |
``` | |
::: | |
## Facets | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(mtcars, aes(x = "wt", y = "mpg")) + geom_point() + | |
stat_smooth(method = "lm") + | |
facet_wrap("~gear")) | |
``` | |
## Seaborn | |
```{python} | |
facet = sns.FacetGrid(col = "gear", data = mtcars) | |
facet.map(sns.regplot, "wt", "mpg") | |
``` | |
::: | |
## Smooth + Facet | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(mtcars, aes(x = "wt", y = "mpg", color = "factor(gear)")) + geom_point() + | |
stat_smooth(method = "lm") + | |
facet_wrap("~gear")) | |
``` | |
## Seaborn | |
```{python} | |
facet = sns.FacetGrid(col = "gear", data = mtcars, hue = "gear") | |
facet.map(sns.regplot, "wt", "mpg") | |
``` | |
::: | |
## Bring on the penguins! | |
```{python} | |
penguin = pd.read_csv("https://raw.githubusercontent.com/cmparlettpelleriti/CPSC392ParlettPelleriti/master/Data/penguins.csv") | |
penguin.head() | |
``` | |
# SNS Basic Steps | |
1. Tell SNS what kind of plot you want (using `sns.*`) | |
2. Tell SNS what you want to plot (inside the `sns.*()` function) | |
## Scatterplot | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin, aes(x = "bill_length_mm", y = "bill_depth_mm", color = "species")) + geom_point()) | |
``` | |
## seaborn | |
```{python} | |
sns.scatterplot(x = "bill_length_mm", y = "bill_depth_mm", hue = "species", data = penguin) | |
``` | |
::: | |
## Histogram | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin, aes(x = "body_mass_g")) + geom_histogram()) | |
``` | |
## seaborn | |
```{python} | |
sns.histplot(x = "body_mass_g", data = penguin) | |
``` | |
::: | |
## Bar | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin, aes(x = "species")) + geom_bar()) | |
``` | |
## Seaborn | |
```{python} | |
sns.countplot(x = "species", data = penguin) | |
``` | |
::: | |
## Boxplot | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin, aes(x = "species", y = "bill_length_mm")) + geom_boxplot()) | |
``` | |
## seaborn | |
```{python} | |
sns.boxplot(x ="species", y = "bill_length_mm", data = penguin) | |
``` | |
::: | |
## Boxplot + Point | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin, aes(x = "species", y = "bill_length_mm", fill = "species")) + | |
geom_point() + | |
geom_boxplot() + theme_minimal()) | |
``` | |
## seaborn | |
```{python} | |
sns.boxplot(x ="species", y = "bill_length_mm", data = penguin) | |
sns.scatterplot(x = "species", y = "bill_length_mm", data = penguin, color = "black") | |
``` | |
::: | |
## Color by species | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin, aes(x = "species", y = "bill_length_mm")) + | |
geom_point() + | |
geom_boxplot(aes(fill = "species")) + theme_minimal()) | |
``` | |
## seaborn | |
```{python} | |
sns.set_style("white") | |
sns.boxplot(x ="species", y = "bill_length_mm", data = penguin) | |
sns.scatterplot(x = "species", y = "bill_length_mm", data = penguin, color = "black") | |
``` | |
::: | |
## Prepping Data for Barcharts | |
```{python} | |
penguin_bill = penguin.groupby(["species"], as_index = False)["bill_length_mm"].mean() | |
penguin_bill | |
``` | |
## Plotting bar charts | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin, aes(x = "species", y = "body_mass_g", fill = "species")) + | |
stat_summary(fun_data = "mean_sdl", geom = "bar")) | |
``` | |
## seaborn | |
```{python} | |
sns.barplot(x = "species", y = "body_mass_g", hue = "species", data = penguin, dodge = False) | |
``` | |
::: | |
## Better bar chart | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin, aes(x = "species", y = "body_mass_g", fill = "species")) + | |
stat_summary(fun_data = "mean_sdl", geom = "bar") + | |
labs(x = "Species", y = "Body Mass (g)") + | |
ggtitle("Penguin Body Mass by Species") + | |
theme_minimal() + | |
theme(panel_grid_major_x = element_blank(), | |
panel_grid_minor_x = element_blank(), | |
panel_grid_minor_y = element_blank(), | |
legend_position = "none")) | |
``` | |
## seaborn | |
```{python} | |
bp = sns.barplot(x = "species", y = "body_mass_g", hue = "species", data = penguin, dodge = False) | |
bp.set(title = "Penguin Body Mass by Species", | |
xlabel = "Species", ylabel = "Body Mass (g)") | |
bp.grid(False) | |
plt.legend([],[], frameon = False) | |
``` | |
::: | |
## Better bar chart | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(penguin_bill, aes(x = "species", y = "bill_length_mm", fill = "species")) + | |
geom_bar(stat = "identity") + theme_minimal() + | |
labs(x = "Species", y = "Average Bill Length (mm)")) | |
``` | |
## seaborn | |
```{python} | |
bp2 = sns.barplot(x = "species", y = "bill_length_mm", hue = "species", data = penguin_bill, dodge = False) | |
bp2.set(xlabel = "Species", ylabel = "Average Bill Length (mm)") | |
plt.legend([],[], frameon = False) | |
``` | |
::: | |
# NON Count Bar Charts | |
```{python} | |
books = ["Home Before Dark", "The Wives", "You", "The Last Mrs. Parrish", "The Guest List", "Invisible Girl"] | |
ratings = [4.08,3.63,3.93,3.93, 3.85, 3.81] | |
book_df = pd.DataFrame({"books":books, "ratings": ratings}) | |
book_df | |
``` | |
## NON count bar charts | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(book_df, aes(x = "books", y = "ratings")) + | |
geom_bar(aes(fill = "books"), stat = "identity") + | |
theme_minimal() + | |
labs(title = "Book Ratings", | |
x = "Book Titles", | |
y = "Average GoodRead Ratings") + | |
theme(axis_text_x = element_text(angle = 45), | |
legend_position = "none")) | |
``` | |
## seaborn | |
```{python} | |
bp3 = sns.barplot(data = book_df, x = "books", y = "ratings", hue = "books", dodge = False) | |
bp3.set(title = "Book Ratings", | |
xlabel = "Book Titles", ylabel = "Average GoodRead Ratings") | |
plt.xticks(rotation=45) | |
plt.legend([],[], frameon = False) | |
``` | |
::: | |
# Let's Try to explore a new data set using Visualization! | |
```{python} | |
cereal = pd.read_csv("https://raw.githubusercontent.com/reisanar/datasets/master/Cereals.csv") | |
cereal.head() | |
``` | |
## countplot | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
(ggplot(cereal, aes(x = "mfr", fill = "mfr")) + geom_bar() + theme_bw()) | |
``` | |
## seaborn | |
```{python} | |
sns.countplot(data = cereal, x = "mfr", hue = "mfr", dodge = False) | |
``` | |
::: | |
## Facet wrap/grid | |
::: {.panel-tabset} | |
## plotnine | |
```{python} | |
# facet wrap/grid | |
(ggplot(cereal, aes(x = "protein", y = "sugars")) + geom_point() + theme_bw() + | |
facet_wrap("~mfr")) | |
``` | |
## seaborn | |
```{python} | |
f = sns.FacetGrid(data = cereal, col = "mfr", col_wrap= 3) | |
f.map(sns.scatterplot, "protein", "sugars") | |
``` | |
```{python} | |
# (ggplot(cereal, aes(x = "mfr", y = "sugars")) + geom_boxplot() + theme_minimal()) | |
sns.boxplot(data = cereal, x = "mfr", y = "sugars") | |
``` | |
::: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment