Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active June 8, 2025 20:47
Show Gist options
  • Save USMortality/a4b712792a80d5bf0ffb9816f62eb23a to your computer and use it in GitHub Desktop.
Save USMortality/a4b712792a80d5bf0ffb9816f62eb23a to your computer and use it in GitHub Desktop.
Vaccine Related Deaths [USA]
library(data.table)
library(ggplot2)
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
# All potential vaccine related ICD10 codes
elevated_icd10 <- c(
# Complications following immunization
"T880", # Infection following immunization
"T881", # Other complications following immunization, not elsewhere classified
# Anaphylactic shock due to adverse effect of correct drug or medicament
# properly administered
"T886",
"T887", # Unspecified adverse effect of drug or medicament
# Other specified complications of surgical and medical care, not elsewhere
# classified
"T888",
"T889", # Complication of surgical and medical care, unspecified
# Poisoning by and exposure to drugs, medicaments, and biological substances
# Accidental poisoning by and exposure to other and unspecified drugs,
# medicaments, and biological substances
"X44",
# Intentional self-poisoning by and exposure to other and unspecified drugs,
# medicaments, and biological substances
"X64",
# Poisoning by and exposure to other and unspecified drugs, medicaments, and
# biological substances, undetermined intent
"Y14",
# Bacterial vaccines
"Y580", # BCG vaccine
"Y581", # Typhoid and paratyphoid vaccine
"Y582", # Cholera vaccine
"Y583", # Plague vaccine
"Y584", # Tetanus vaccine
"Y585", # Diphtheria vaccine
"Y586", # Pertussis vaccine, including combinations with a pertussis component
# Mixed bacterial vaccines, except combinations with a pertussis component
"Y588",
"Y589", # Other and unspecified bacterial vaccines
# Other and unspecified vaccines and biological substances
"Y590", # Viral vaccines
"Y591", # Rickettsial vaccines
"Y592", # Protozoal vaccines
"Y598", # Other specified vaccines and biological substances
"Y599", # Vaccine or biological substance, unspecified
# Adverse effects and complications from vaccination
"U12", # COVID-19 vaccines causing adverse effects in therapeutic use
"Y603", # During injection or immunization
"Y623", # During injection or immunization
"Y613", # During injection or immunization
# Contaminated medical or biological substance, injected or used for
# immunization
"Y641",
# Specific conditions associated with immunization
"A800", # Acute paralytic poliomyelitis, vaccine-associated
"R291", # Meningismus
"Z887", # Personal history of allergy to serum and vaccine
"M022", # Postimmunization arthropathy
"G040" # Acute disseminated encephalitis (Encephalomyelitis postimmunization)
)
# Load and filter data
dt <- fread("curl -Ls sars2.net/f/vital.csv.xz | xz -dc")[
cause %in% elevated_icd10 & year %in% 2014:2023
]
# Aggregate deaths by year and cause (count rows since each row is a death)
dt_yearly <- dt[, .(deaths = sum(mcd)), by = .(year, cause)]
# Create complete grid of all year-cause combinations and fill missing with 0
all_years <- 2014:2023
all_causes <- unique(dt_yearly$cause)
complete_grid <- CJ(year = all_years, cause = all_causes)
dt_yearly <- merge(complete_grid, dt_yearly, by = c("year", "cause"), all.x = TRUE)
dt_yearly[is.na(deaths), deaths := 0]
# Calculate baseline (2017-2019 average) by cause
baseline_data <- dt_yearly[year %in% 2017:2019, .(
baseline_mean = mean(deaths, na.rm = TRUE),
baseline_sd = sd(deaths, na.rm = TRUE)
), by = cause]
# Replace NA baseline_sd with 0
baseline_data[is.na(baseline_sd), baseline_sd := 0]
# Merge with yearly data
dt_analysis <- merge(dt_yearly, baseline_data, by = "cause", all.x = TRUE)
# Calculate excess deaths and elevation status
dt_analysis[, `:=`(
expected_deaths = baseline_mean,
excess_deaths = deaths - baseline_mean,
is_elevated = deaths > baseline_mean
)]
# Check elevation patterns by cause - only for causes that have data in 2020-2023
elevation_patterns <- dt_analysis[year %in% 2020:2023, .(
max_excess_2020_2022 = {
excess_vals <- excess_deaths[year %in% 2020:2022]
if(length(excess_vals) > 0 && any(!is.na(excess_vals))) {
max(excess_vals, na.rm = TRUE)
} else {
-Inf
}
},
excess_2023 = {
val_2023 <- excess_deaths[year == 2023]
if(length(val_2023) > 0 && !is.na(val_2023[1])) val_2023[1] else 0
},
total_excess_2020_2022 = sum(pmax(excess_deaths[year %in% 2020:2022], 0), na.rm = TRUE)
), by = cause]
# Calculate signal-to-noise ratio
elevation_patterns[, signal_ratio := ifelse(excess_2023 == 0,
ifelse(max_excess_2020_2022 > 0, Inf, 0),
max_excess_2020_2022 / abs(excess_2023))]
# Define "returned to baseline" as high signal ratio
elevation_patterns[, returned_to_baseline := signal_ratio > 5 | is.infinite(signal_ratio)]
# Identify potential vaccine signal causes
vaccine_signal_causes <- elevation_patterns[
max_excess_2020_2022 > 0 & returned_to_baseline & is.finite(max_excess_2020_2022)
]$cause
print("Potential Vaccine Signal Causes (Elevated 2020-2022, Normal 2023):")
print(vaccine_signal_causes)
# Prepare data for plotting (ALL YEARS) - remove rows with missing values
plot_data <- dt_analysis[!is.na(excess_deaths)]
plot_data[, vaccine_signal := ifelse(cause %in% vaccine_signal_causes,
"Potential Vaccine Signal",
"Unlikely Vaccine Related"
)]
# Only include positive excess deaths for stacking
plot_data[, excess_positive := pmax(excess_deaths, 0)]
# Create stacked bar chart split by vaccine signal classification
chart <- ggplot(
plot_data,
aes(x = factor(year), y = excess_positive, fill = cause)
) +
geom_bar(stat = "identity", position = "stack") +
facet_wrap(~vaccine_signal, scales = "free_y") +
labs(
title = "Excess Deaths by ICD-10 Cause (2014-2023)",
subtitle = "Split by potential vaccine signal vs. unlikely vaccine-related",
x = "Year",
y = "Excess Deaths (Above 2017-2019 Baseline)",
fill = "ICD-10 Cause"
) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
axis.text.x = element_text(angle = 45, hjust = 1)
)
ggplot2::ggsave(
filename = "chart1.png",
plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# Summary statistics
vaccine_signal_summary <- plot_data[
cause %in% vaccine_signal_causes,
.(
total_excess_2020_2022 = sum(ifelse(year %in% 2020:2022,
excess_positive, 0
), na.rm = TRUE),
peak_excess = max(excess_positive, na.rm = TRUE),
peak_year = year[which.max(excess_positive)]
),
by = cause
][order(-total_excess_2020_2022)]
print("\nSummary of Potential Vaccine-Related Excess Deaths:")
print(vaccine_signal_summary)
# Total potential vaccine-related excess deaths
total_vaccine_excess <- sum(vaccine_signal_summary$total_excess_2020_2022,
na.rm = TRUE
)
print(paste(
"\nTotal Potential Vaccine-Related Excess Deaths (2020-2022):",
round(total_vaccine_excess)
))
# Alternative view: All causes side by side
chart <- ggplot(
plot_data,
aes(x = factor(year), y = excess_positive, fill = vaccine_signal)
) +
geom_bar(stat = "identity", position = "stack") +
facet_wrap(~cause, scales = "free_y", labeller = label_both) +
labs(
title = "Excess Deaths by Individual ICD-10 Cause (2014-2023)",
subtitle = "Red = Potential vaccine signal, Blue = Unlikely vaccine-related",
x = "Year",
y = "Excess Deaths",
fill = "Classification"
) +
scale_fill_manual(values = c(
"Potential Vaccine Signal" = "red",
"Unlikely Vaccine Related" = "blue"
)) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
axis.text.x = element_text(angle = 45, hjust = 1)
)
ggplot2::ggsave(
filename = "chart2.png",
plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# Create final dataset with only vaccine signal causes - use ALL YEARS from dt_analysis
final_data <- dt_analysis[cause %in% vaccine_signal_causes, .(
deaths = sum(deaths, na.rm = TRUE),
baseline_mean = sum(baseline_mean, na.rm = TRUE)
), by = year]
# Calculate excess deaths (can be negative for years below baseline)
final_data[, excess := deaths - baseline_mean]
# For confidence intervals, we need to properly aggregate the variance
# Get the pooled standard deviation for the aggregated causes
pooled_baseline_data <- dt_analysis[
cause %in% vaccine_signal_causes & year %in% 2017:2019,
.(total_deaths = sum(deaths, na.rm = TRUE)),
by = year
]
# Calculate the standard deviation of the aggregated baseline
baseline_sd_pooled <- sd(pooled_baseline_data$total_deaths, na.rm = TRUE)
# Add confidence intervals using the pooled SD
final_data[, `:=`(
hl_lower = baseline_mean - 1.96 * baseline_sd_pooled,
hl_upper = baseline_mean + 1.96 * baseline_sd_pooled
)]
# Create the final plot
chart <- ggplot(final_data, aes(x = year)) +
geom_col(aes(y = deaths),
fill = "red",
alpha = 0.8
) +
geom_line(aes(y = baseline_mean),
linewidth = 1.2,
color = "black",
linetype = "solid"
) +
geom_ribbon(aes(ymin = hl_lower, ymax = hl_upper),
fill = "black",
alpha = 0.2
) +
# Add excess death labels
geom_text(
aes(
y = deaths + max(deaths) * 0.05,
label = ifelse(abs(excess) > 10,
paste0(ifelse(excess > 0, "+", ""), round(excess)),
""
)
),
size = 3,
color = "black",
fontface = "bold"
) +
scale_x_continuous(
breaks = scales::pretty_breaks(n = 10)
) +
scale_y_continuous(labels = scales::comma_format()) +
labs(
title = "Potential Vaccine-Related Deaths [USA]",
subtitle = "Baseline: Mean 2017-2019 · Elevated 2020-2022, Normal 2023 · Source: CDC NVSS",
x = "Year",
y = "Number of Deaths",
caption = paste("ICD10 Codes:", paste(vaccine_signal_causes, collapse = ", "))
) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray60"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.text.y = element_text(size = 10),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(size = 9, color = "gray50", hjust = 0),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "gray90", linewidth = 0.5),
panel.grid.major.y = element_line(color = "gray90", linewidth = 0.5)
)
ggplot2::ggsave(
filename = "chart3.png",
plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# Print summary of what codes were included
print(paste("Vaccine signal causes included:", paste(vaccine_signal_causes, collapse = ", ")))
print(paste(
"Total potential vaccine-related excess deaths (2020-2022):",
sum(final_data[year %in% 2020:2022, pmax(excess, 0)])
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment