Last active
December 3, 2020 00:24
-
-
Save ateucher/e7584923784da6ae8af4e393438dd216 to your computer and use it in GitHub Desktop.
covid-canada.R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(gghighlight) | |
library(ggrepel) | |
library(scales) | |
data <- read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") | |
data_long <- pivot_longer(data, `1/22/20`:last_col(), names_to = "date", values_to = "cases") %>% | |
mutate(date = as.Date(date, format = "%m/%d/%y")) %>% | |
filter(!is.na(cases), | |
(`Province/State` != "Grand Princess" | is.na(`Province/State`)), | |
`Country/Region` != "Cruise Ship") | |
data_by_country <- group_by(data_long, country = `Country/Region`, date) %>% | |
summarise(cases = sum(cases, na.rm = TRUE)) | |
highlight_countries <- c("Canada", "US", "Italy", "United Kingdom", "Spain", "Australia") | |
(plot_countries_by_date <- filter(data_by_country, country != "China") %>% | |
ggplot(aes(x = date, y = cases, colour = country)) + | |
scale_x_date(date_breaks = "1 week", date_labels = "%b %d") + | |
scale_y_log10(expand = c(0,0.5), | |
labels = label_comma(accuracy = 1)) + | |
geom_line(size = 1.5) + | |
gghighlight(country %in% highlight_countries, | |
use_direct_label = FALSE, | |
unhighlighted_params = list(size = 0.5)) + | |
geom_text_repel(aes(label = country), | |
data = filter(data_by_country, | |
country %in% highlight_countries, | |
date == max(date)), | |
nudge_x = 1, direction = "y", | |
segment.size = 0, | |
xlim = c(max(data_by_country$date), | |
max(data_by_country$date) + 14)) + | |
scale_color_brewer(palette = 2, type = "qual", guide = "none") + | |
coord_cartesian(clip = "off") + | |
theme_minimal() + | |
theme(plot.margin = unit(c(0.5,3,0.5,0.5), "cm")) + | |
labs(x = "Date", | |
y = "Total number of cases (log scale)", | |
title = "COVID-19 Confirmed Cases")) | |
n_cases <- 100 | |
data_by_country_n_days <- data_by_country %>% | |
filter(cases >= n_cases) %>% | |
group_by(country) %>% | |
arrange(country, date) %>% | |
mutate(days_since_nth_case = as.numeric(date - min(date, na.rm = TRUE))) %>% | |
ungroup() | |
(plot_100_cases <- filter(data_by_country_n_days, country != "China") %>% | |
ggplot(aes(x = days_since_nth_case, y = cases, colour = country)) + | |
scale_y_log10() + | |
geom_line(size = 1.5) + | |
gghighlight(country %in% highlight_countries, | |
use_direct_label = FALSE, | |
unhighlighted_params = list(size = 0.5)) + | |
geom_text_repel(aes(label = country), | |
data = filter(data_by_country_n_days, | |
country %in% highlight_countries, | |
date == max(date)), | |
nudge_x = 1, direction = "both", | |
segment.size = 0) + | |
scale_color_brewer(palette = 2, type = "qual", guide = "none") + | |
theme_minimal() + | |
labs(x = glue::glue("Days since reaching {n_cases} confirmed cases"), | |
y = "Total number of cases (log scale)", | |
title = glue::glue("COVID-19 confirmed cases since reaching {n_cases} confirmed cases"))) | |
data_by_country_1st_case <- data_by_country %>% | |
filter(cases >= 1) %>% | |
group_by(country) %>% | |
arrange(country, date) %>% | |
mutate(days_since_nth_case = as.numeric(date - min(date, na.rm = TRUE))) %>% | |
ungroup() | |
(plot_1st_cases <- filter(data_by_country_1st_case, country != "China") %>% | |
ggplot(aes(x = days_since_nth_case, y = cases, colour = country)) + | |
scale_y_log10() + | |
geom_line(size = 1.5) + | |
gghighlight(country %in% highlight_countries, | |
use_direct_label = FALSE, | |
unhighlighted_params = list(size = 0.5)) + | |
geom_text_repel(aes(label = country), | |
data = filter(data_by_country_1st_case, | |
country %in% highlight_countries, | |
date == max(date)), | |
nudge_x = 2, direction = "both", | |
segment.size = 0) + | |
scale_color_brewer(palette = 2, type = "qual", guide = "none") + | |
theme_minimal() + | |
labs(x = glue::glue("Days since first confirmed case"), | |
y = "Total number of cases (log scale)", | |
title = "COVID-19 confirmed cases since first confirmed case")) | |
canada <- rename(data_long, | |
country = `Country/Region`, | |
province = `Province/State`) %>% | |
filter(country == "Canada") | |
(plot_canada <- ggplot(canada, aes(x = date, y = cases, colour = province)) + | |
geom_line(size = 1) + | |
scale_x_date(date_breaks = "1 week", date_labels = "%b %d") + | |
scale_y_continuous(expand = c(0,1)) + | |
scale_color_brewer(palette = 3, type = "qual", guide = FALSE) + | |
coord_cartesian(clip = "off") + | |
geom_text_repel(aes(label = province), | |
segment.size = 0, | |
data = filter(canada, date == max(date)), | |
nudge_x = 1, direction = "y", | |
xlim = c(max(canada$date), max(canada$date) + 14) | |
) + | |
theme_minimal() + | |
theme(plot.margin = unit(c(0.5,3,0.5,0.5), "cm")) + | |
labs(x = "Date", | |
y = "Total number of cases", | |
title = "COVID-19 Confirmed Cases in Canada")) | |
max_date <- max(data_long$date) | |
out_dir <- "~/Desktop/covid-plots" | |
dir.create(out_dir, showWarnings = FALSE) | |
ggsave(glue::glue("{out_dir}/countries_by_date-{max_date}.png"), plot_countries_by_date) | |
ggsave(glue::glue("{out_dir}/100_cases-{max_date}.png"), plot_100_cases) | |
ggsave(glue::glue("{out_dir}/1st_cases-{max_date}.png"), plot_1st_cases) | |
ggsave(glue::glue("{out_dir}/canada-{max_date}.png"), plot_canada) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment