Created
June 16, 2025 03:38
-
-
Save bugparty/7123923d24b809fe5e3699ee686635b5 to your computer and use it in GitHub Desktop.
steamdeck room hip gpu bandwidth test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <hip/hip_runtime.h> | |
#include <iostream> | |
#include <chrono> | |
#include <hip/hip_fp16.h> | |
#include "utils.hpp" | |
#define N (2047 * 1024 * 1024) // MB | |
#define THREADS_PER_BLOCK 256 | |
__global__ void read_kernel(const int* __restrict__ data, long long* __restrict__ sum_out, size_t count) { | |
size_t idx = threadIdx.x + blockIdx.x * blockDim.x; | |
long long sum = 0; | |
for (size_t i = idx; i < count; i += gridDim.x * blockDim.x) { | |
sum = data[i]; | |
} | |
} | |
__global__ void read_kernel2(const int* __restrict__ data, long long* __restrict__ sum_out, size_t count) { | |
size_t idx = threadIdx.x + blockIdx.x * blockDim.x; | |
long long sum = 0; | |
for (size_t i = idx; i < count; i += gridDim.x * blockDim.x) { | |
sum += data[i]; | |
} | |
} | |
int main() { | |
int* data = nullptr; | |
long long* sum = nullptr; | |
hipError_t err = hipSuccess; | |
std::cout << "Allocating managed memory..." << std::endl; | |
HIP_CHECK(hipMallocManaged(&data, N * sizeof(int))); | |
HIP_CHECK(hipMallocManaged(&sum, sizeof(long long))); | |
*sum = 0; | |
std::cout << "Initializing data..." << std::endl; | |
for (size_t i = 0; i < N; ++i) data[i] = i % 13; | |
hipDeviceSynchronize(); // ensure init done | |
std::cout << "Prefetching to GPU..." << std::endl; | |
int device = 0; | |
hipGetDevice(&device); | |
hipMemPrefetchAsync(data, N * sizeof(int), device); | |
hipDeviceSynchronize(); | |
std::cout << "Running warm-up bandwitdh read." << std::endl; | |
read_kernel<<<(N + 32 - 1) / 32, 32>>>(data, sum, N); | |
hipDeviceSynchronize(); | |
std::vector<int> block_sizes = {8,16, 32,64,128}; | |
for (int block_size : block_sizes) { | |
std::cout << "Launching kernel..." << std::endl; | |
auto start = std::chrono::high_resolution_clock::now(); | |
int blocks = (N + block_size - 1) / block_size; | |
read_kernel2<<<blocks, block_size>>>(data, sum, N); | |
hipDeviceSynchronize(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
double duration = std::chrono::duration<double>(end - start).count(); | |
double gb = (N * sizeof(int)) / 1e9; | |
std::cout << "wrap size: " << block_size << std::endl; | |
std::cout << "Read BW: " << (gb / duration) << " GB/s" << std::endl; | |
} | |
hipFree(data); | |
hipFree(sum); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment