Last active
March 25, 2020 05:41
-
-
Save ateucher/c930dffde79c07b36726d4a9c019b18c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*.png | |
.DS_Store | |
.Rhistory |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(gghighlight) | |
library(ggrepel) | |
library(scales) | |
fix_name_comma <- function(x) { | |
name_split <- strsplit(x, ",\\s?") | |
vapply(name_split, function(x) { | |
if (length(x) == 1L) return(x) | |
paste(x[2], x[1]) | |
}, FUN.VALUE = character(1)) | |
} | |
data <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") | |
data_long <- pivot_longer(data, `1/22/20`:last_col(), names_to = "date", values_to = "cases") %>% | |
filter(!is.na(cases), | |
(!grepl("[Pp]rincess|[Rr]ecovered", `Province/State`) | is.na(`Province/State`)), | |
`Country/Region` != "Cruise Ship", `Country/Region` != "Diamond Princess") %>% | |
mutate(date = as.Date(date, format = "%m/%d/%y"), | |
`Country/Region` = fix_name_comma(`Country/Region`)) | |
max_date <- max(data_long$date) | |
cap <- glue::glue("Data from https://github.com/CSSEGISandData/COVID-19, {max_date}") | |
data_by_country <- group_by(data_long, country = `Country/Region`, date) %>% | |
summarise(cases = sum(cases, na.rm = TRUE)) | |
highlight_countries <- c("Canada", "US", "Italy", "United Kingdom", "Spain", "Australia") | |
(plot_countries_by_date <- filter(data_by_country, country != "China") %>% | |
ggplot(aes(x = date, y = cases, colour = country)) + | |
scale_x_date(date_breaks = "1 week", date_labels = "%b %d") + | |
scale_y_log10(expand = c(0,0.5), | |
labels = label_comma(accuracy = 1)) + | |
geom_line(size = 1.5) + | |
gghighlight(country %in% highlight_countries, | |
use_direct_label = FALSE, | |
unhighlighted_params = list(size = 0.5)) + | |
geom_text_repel(aes(label = paste0(country, " (", cases, ")")), | |
data = filter(data_by_country, | |
country %in% highlight_countries, | |
date == max(date)), | |
nudge_x = 1, direction = "y", | |
segment.size = 0, | |
xlim = c(max(data_by_country$date), | |
max(data_by_country$date) + 21)) + | |
scale_color_brewer(palette = 2, type = "qual", guide = "none") + | |
coord_cartesian(clip = "off") + | |
theme_minimal() + | |
theme(plot.margin = unit(c(0.5,4,0.5,0.5), "cm")) + | |
labs(x = "Date", | |
y = "Total number of cases (log scale)", | |
title = "COVID-19 Confirmed Cases", | |
caption = cap)) | |
n_cases <- 100 | |
data_by_country_n_days <- data_by_country %>% | |
filter(cases >= n_cases) %>% | |
group_by(country) %>% | |
arrange(country, date) %>% | |
mutate(days_since_nth_case = as.numeric(date - min(date, na.rm = TRUE))) %>% | |
ungroup() | |
(plot_100_cases <- filter(data_by_country_n_days, country != "China") %>% | |
ggplot(aes(x = days_since_nth_case, y = cases, colour = country)) + | |
scale_y_log10(labels = label_comma(accuracy = 1)) + | |
geom_line(size = 1.5) + | |
gghighlight(country %in% c(highlight_countries, "South Korea", "Singapore", "Japan", "Iran"), | |
use_direct_label = FALSE, | |
unhighlighted_params = list(size = 0.5)) + | |
geom_text_repel(aes(label = paste0(country, " (", cases, ")")), | |
data = filter(data_by_country_n_days, | |
country %in% c(highlight_countries, "South Korea", "Singapore", "Japan", "Iran"), | |
date == max(date)), | |
nudge_x = 2, direction = "both", | |
segment.size = 0) + | |
coord_cartesian(clip = "off") + | |
scale_color_brewer(palette = 3, type = "qual", guide = "none") + | |
theme_minimal() + | |
theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm")) + | |
labs(x = glue::glue("Days since reaching {n_cases} confirmed cases"), | |
y = "Total number of cases (log scale)", | |
title = glue::glue("COVID-19 confirmed cases since reaching {n_cases} confirmed cases"), | |
caption = cap)) | |
data_by_country_1st_case <- data_by_country %>% | |
filter(cases >= 1) %>% | |
group_by(country) %>% | |
arrange(country, date) %>% | |
mutate(days_since_nth_case = as.numeric(date - min(date, na.rm = TRUE))) %>% | |
ungroup() | |
(plot_1st_cases <- filter(data_by_country_1st_case, country != "China") %>% | |
ggplot(aes(x = days_since_nth_case, y = cases, colour = country)) + | |
scale_y_log10(labels = label_comma(accuracy = 1)) + | |
geom_line(size = 1.5) + | |
gghighlight(country %in% c(highlight_countries, "South Korea", "Singapore", "Japan", "Iran"), | |
use_direct_label = FALSE, | |
unhighlighted_params = list(size = 0.5)) + | |
geom_text_repel(aes(label = paste0(country, " (", cases, ")")), | |
data = filter(data_by_country_1st_case, | |
country %in% c(highlight_countries, "South Korea", "Singapore", "Japan", "Iran"), | |
date == max(date)) , | |
nudge_x = 6, direction = "both", | |
segment.size = 0, | |
xlim = c(min(data_by_country_1st_case$days_since_nth_case), | |
max(data_by_country_1st_case$days_since_nth_case) + 20)) + | |
scale_color_brewer(palette = 3, type = "qual", guide = "none") + | |
coord_cartesian(clip = "off") + | |
theme_minimal() + | |
theme(plot.margin = unit(c(0.5,4,0.5,0.5), "cm")) + | |
labs(x = glue::glue("Days since first confirmed case"), | |
y = "Total number of cases (log scale)", | |
title = "COVID-19 confirmed cases since first confirmed case", | |
caption = cap)) | |
canada <- rename(data_long, | |
country = `Country/Region`, | |
province = `Province/State`) %>% | |
filter(country == "Canada") | |
(plot_canada <- ggplot(canada, aes(x = date, y = cases, colour = province)) + | |
geom_line(size = 1) + | |
scale_x_date(date_breaks = "1 week", date_labels = "%b %d") + | |
scale_y_log10(expand = c(0,1), | |
labels = label_comma(accuracy = 1)) + | |
scale_color_brewer(palette = 3, type = "qual", guide = FALSE) + | |
coord_cartesian(clip = "off") + | |
geom_text_repel(aes(label = paste0(province, " (", cases, ")")), | |
segment.size = 0, | |
data = filter(canada, date == max(date)), | |
nudge_x = 1, direction = "y", | |
xlim = c(max(canada$date) + 1, max(canada$date) + 30) | |
) + | |
theme_minimal() + | |
theme(plot.margin = unit(c(0.5,6,0.5,0.5), "cm")) + | |
labs(x = "Date", | |
y = "Total number of cases (log scale)", | |
title = "COVID-19 Confirmed Cases in Canada", | |
caption = cap)) | |
ggsave(glue::glue("countries_by_date-{max_date}.png"), plot_countries_by_date, width = 9, height = 6) | |
ggsave(glue::glue("100_cases-{max_date}.png"), plot_100_cases, width = 9, height = 6) | |
ggsave(glue::glue("1st_cases-{max_date}.png"), plot_1st_cases, width = 9, height = 6) | |
ggsave(glue::glue("canada-{max_date}.png"), plot_canada, width = 9, height = 6) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for sharing - I just downloaded it and it runs fine. I see you changed a couple of hardwired things - thanks. Just tried the log-axis for provinces - looking like exponential growth (though noisy).