Skip to content

Instantly share code, notes, and snippets.

@youkaichao
Created February 6, 2025 04:34
Show Gist options
  • Save youkaichao/9d92875e584f3161001944c3348d1ce2 to your computer and use it in GitHub Desktop.
Save youkaichao/9d92875e584f3161001944c3348d1ce2 to your computer and use it in GitHub Desktop.
gloo v.s. nccl
import torch
import torch.distributed as dist
use_nccl = False
dist.init_process_group(backend="nccl" if use_nccl else "gloo")
rank = dist.get_rank()
torch.cuda.set_device(rank % 8)
def try_nccl():
signal = 0
signal_tensor = torch.tensor(signal, device="cuda", dtype=torch.long)
dist.all_reduce(signal_tensor, op=dist.ReduceOp.MAX)
reduced_signal = signal_tensor.item()
return reduced_signal
def try_gloo():
signal = 0
signal_tensor = torch.tensor(signal, device="cpu", dtype=torch.long)
dist.all_reduce(signal_tensor, op=dist.ReduceOp.MAX)
reduced_signal = signal_tensor.item()
return reduced_signal
func = try_nccl if use_nccl else try_gloo
# warm up
for i in range(10):
func()
# measure the time
import time
start = time.time()
for i in range(1000):
func()
end = time.time()
per_iter = (end - start) / 1000 * 1000 # in milliseconds
print(f"per iteration time: {per_iter:.2f} ms")
@youkaichao
Copy link
Author

test command:

torchrun --nproc-per-node=8 test.py

gloo:

per iteration time: 0.27 ms
per iteration time: 0.27 ms
per iteration time: 0.27 ms
per iteration time: 0.27 ms
per iteration time: 0.27 ms
per iteration time: 0.27 ms
per iteration time: 0.27 ms
per iteration time: 0.27 ms

nccl:

per iteration time: 0.07 ms
per iteration time: 0.07 ms
per iteration time: 0.07 ms
per iteration time: 0.07 ms
per iteration time: 0.07 ms
per iteration time: 0.07 ms
per iteration time: 0.07 ms
per iteration time: 0.07 ms

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment