Skip to content

Instantly share code, notes, and snippets.

@nanxstats
Last active March 25, 2025 15:53
Show Gist options
  • Save nanxstats/07442aba90a5bc2f4b7d821338f9eede to your computer and use it in GitHub Desktop.
Save nanxstats/07442aba90a5bc2f4b7d821338f9eede to your computer and use it in GitHub Desktop.
Benchmark code comparing the performance of .combine = "rbind" vs. manual aggregation using data.table::rbindlist() in parallel foreach loops in R. Demonstrates significant speed improvements with manual aggregation.
library(doFuture)
plan(multisession, workers = 32)
options(scipen = 999)
anysvd <- function(id, dim = 10, nrep = 300) {
results <- vector("list", nrep)
for (j in 1:nrep) {
X <- matrix(rnorm(dim^2), dim, dim)
A <- crossprod(X)
s <- svd(A)
results[[j]] <- data.frame(
id = id,
sub_id = j,
sv1 = s$d[1],
sv2 = s$d[2],
sv3 = s$d[3],
matrix_norm = norm(A, type = "F"),
matrix_trace = sum(diag(A))
)
}
as.data.frame(data.table::rbindlist(results))
}
nsim_grid <- c(1000, 10000, 50000, 100000)
df_bench <- data.frame(
nsim = integer(),
method = character(),
time = numeric()
)
for (nsim in nsim_grid) {
message("Running benchmark with nsim=", nsim)
# Method 1: foreach(.combine = "rbind")
set.seed(42)
tictoc::tic.clearlog()
tictoc::tic(paste0("method1_nsim", nsim))
df_rbind <- foreach(
i = 1:nsim,
.combine = "rbind",
.options.future = list(seed = TRUE)
) %dofuture% {
anysvd(i)
}
tictoc::toc(log = TRUE, quiet = TRUE)
# Method 2: foreach then rbindlist
set.seed(42)
tictoc::tic(paste0("method2_nsim", nsim))
lst_rbindlist <- foreach(
i = 1:nsim,
.options.future = list(seed = TRUE)
) %dofuture% {
anysvd(i)
}
df_rbindlist <- as.data.frame(data.table::rbindlist(lst_rbindlist))
tictoc::toc(log = TRUE, quiet = TRUE)
if (!identical(df_rbind, df_rbindlist)) warning("Discrepant results for nsim=", nsim)
lst_log <- tictoc::tic.log(format = FALSE)
df_bench <- rbind(
df_bench,
data.frame(
nsim = nsim,
method = "foreach(.combine=\"rbind\")",
time = lst_log[[1]]$toc - lst_log[[1]]$tic
),
data.frame(
nsim = nsim,
method = "rbindlist",
time = lst_log[[2]]$toc - lst_log[[2]]$tic
),
make.row.names = FALSE
)
tictoc::tic.clearlog()
}
knitr::kable(df_bench)
ragg::agg_png("foreach-rbind-benchmark.png", width = 2000, height = 2000 / 1.618, res = 300, scaling = 1.25)
ggplot2::ggplot(
df_bench,
ggplot2::aes(x = nsim, y = time, color = method, group = method)
) +
ggplot2::geom_line(linewidth = 1) +
ggplot2::geom_point(size = 3) +
ggplot2::labs(
title = "Speed Comparison",
subtitle = "foreach(.combine=\"rbind\") vs. rbindlist",
x = "Number of Simulations (nsim)",
y = "Time (seconds)",
color = "Method"
) +
cowplot::theme_cowplot() +
ggsci::scale_color_observable() +
ggplot2::guides(colour = ggplot2::guide_legend(position = "inside")) +
ggplot2::theme(legend.position.inside = c(0.05, 0.8))
dev.off()
options(scipen = 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment