Last active
March 25, 2025 15:53
-
-
Save nanxstats/07442aba90a5bc2f4b7d821338f9eede to your computer and use it in GitHub Desktop.
Benchmark code comparing the performance of .combine = "rbind" vs. manual aggregation using data.table::rbindlist() in parallel foreach loops in R. Demonstrates significant speed improvements with manual aggregation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(doFuture) | |
plan(multisession, workers = 32) | |
options(scipen = 999) | |
anysvd <- function(id, dim = 10, nrep = 300) { | |
results <- vector("list", nrep) | |
for (j in 1:nrep) { | |
X <- matrix(rnorm(dim^2), dim, dim) | |
A <- crossprod(X) | |
s <- svd(A) | |
results[[j]] <- data.frame( | |
id = id, | |
sub_id = j, | |
sv1 = s$d[1], | |
sv2 = s$d[2], | |
sv3 = s$d[3], | |
matrix_norm = norm(A, type = "F"), | |
matrix_trace = sum(diag(A)) | |
) | |
} | |
as.data.frame(data.table::rbindlist(results)) | |
} | |
nsim_grid <- c(1000, 10000, 50000, 100000) | |
df_bench <- data.frame( | |
nsim = integer(), | |
method = character(), | |
time = numeric() | |
) | |
for (nsim in nsim_grid) { | |
message("Running benchmark with nsim=", nsim) | |
# Method 1: foreach(.combine = "rbind") | |
set.seed(42) | |
tictoc::tic.clearlog() | |
tictoc::tic(paste0("method1_nsim", nsim)) | |
df_rbind <- foreach( | |
i = 1:nsim, | |
.combine = "rbind", | |
.options.future = list(seed = TRUE) | |
) %dofuture% { | |
anysvd(i) | |
} | |
tictoc::toc(log = TRUE, quiet = TRUE) | |
# Method 2: foreach then rbindlist | |
set.seed(42) | |
tictoc::tic(paste0("method2_nsim", nsim)) | |
lst_rbindlist <- foreach( | |
i = 1:nsim, | |
.options.future = list(seed = TRUE) | |
) %dofuture% { | |
anysvd(i) | |
} | |
df_rbindlist <- as.data.frame(data.table::rbindlist(lst_rbindlist)) | |
tictoc::toc(log = TRUE, quiet = TRUE) | |
if (!identical(df_rbind, df_rbindlist)) warning("Discrepant results for nsim=", nsim) | |
lst_log <- tictoc::tic.log(format = FALSE) | |
df_bench <- rbind( | |
df_bench, | |
data.frame( | |
nsim = nsim, | |
method = "foreach(.combine=\"rbind\")", | |
time = lst_log[[1]]$toc - lst_log[[1]]$tic | |
), | |
data.frame( | |
nsim = nsim, | |
method = "rbindlist", | |
time = lst_log[[2]]$toc - lst_log[[2]]$tic | |
), | |
make.row.names = FALSE | |
) | |
tictoc::tic.clearlog() | |
} | |
knitr::kable(df_bench) | |
ragg::agg_png("foreach-rbind-benchmark.png", width = 2000, height = 2000 / 1.618, res = 300, scaling = 1.25) | |
ggplot2::ggplot( | |
df_bench, | |
ggplot2::aes(x = nsim, y = time, color = method, group = method) | |
) + | |
ggplot2::geom_line(linewidth = 1) + | |
ggplot2::geom_point(size = 3) + | |
ggplot2::labs( | |
title = "Speed Comparison", | |
subtitle = "foreach(.combine=\"rbind\") vs. rbindlist", | |
x = "Number of Simulations (nsim)", | |
y = "Time (seconds)", | |
color = "Method" | |
) + | |
cowplot::theme_cowplot() + | |
ggsci::scale_color_observable() + | |
ggplot2::guides(colour = ggplot2::guide_legend(position = "inside")) + | |
ggplot2::theme(legend.position.inside = c(0.05, 0.8)) | |
dev.off() | |
options(scipen = 0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment