Skip to content

Instantly share code, notes, and snippets.

#include <cuda_runtime.h>
#include <iostream>
#include <chrono>
#define CHECK_CUDA(call) \
if ((call) != cudaSuccess) { \
std::cerr << "CUDA error at " << __FILE__ << ":" << __LINE__ << std::endl; \
std::exit(1); \
}
#include <cuda_runtime.h>
#include <iostream>
#include <chrono>
#define CHECK_CUDA(call) \
do { \
cudaError_t err = call; \
if (err != cudaSuccess) { \
std::cerr << "CUDA Error: " << cudaGetErrorString(err) << "\n"; \
exit(EXIT_FAILURE); \
#include <cuda_runtime.h>
#include <iostream>
#include <chrono>
#define CHECK_CUDA(call) \
do { \
cudaError_t err = call; \
if (err != cudaSuccess) { \
std::cerr << "CUDA Error: " << cudaGetErrorString(err) << "\n"; \
exit(EXIT_FAILURE); \
12,13c12,15
< #include "ggml-cpu/unary-ops.h"
< #include "ggml-cpu/binary-ops.h"
---
> #include "unary-ops.h"
> #include "binary-ops.h"
> #include "vec.h"
> #include "ops.h"
86,109d87
< #if defined(GGML_USE_ACCELERATE)
@cmdr2
cmdr2 / ops.cpp.patch
Last active March 29, 2025 13:07
ops.cpp.patch
diff --git a/ops.cpp b/ops.cpp
index 6190d0d..c44157b 100644
--- a/ops.cpp
+++ b/ops.cpp
@@ -2347,7 +2347,7 @@ static void ggml_compute_forward_repeat_back_f32(
GGML_ASSERT(nb00 == sizeof(float));
if (ggml_is_contiguous(dst)) {
- ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0);
+ ggml_vec_set_f32(ne0*ne1*ne2*ne3, (float *)dst->data, 0);
11a12,13
> #include "ggml-cpu/unary-ops.h"
> #include "ggml-cpu/binary-ops.h"
4292,4625d4293
< static void ggml_compute_forward_add_f32(
< const struct ggml_compute_params * params,
< struct ggml_tensor * dst) {
<
< const struct ggml_tensor * src0 = dst->src[0];
< const struct ggml_tensor * src1 = dst->src[1];
@cmdr2
cmdr2 / simple_addition_fp16.cpp
Created February 24, 2025 07:44
Add two float16 tensors using ggml. Each tensor takes 1 GB of memory.
#include "ggml.h"
#include "ggml-cpu.h"
#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
#include <vector>
#include <iostream>
#include <chrono>
@cmdr2
cmdr2 / simple_addition_fp32.cpp
Created February 24, 2025 07:44
Add two float32 tensors using ggml. Each tensor takes 1 GB of memory.
#include "ggml.h"
#include "ggml-cpu.h"
#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
#include <vector>
#include <iostream>
#include <chrono>
@cmdr2
cmdr2 / simple_ggml_addition.cpp
Last active February 17, 2025 11:43
A simple program to add two vectors using ggml, that can compile to CPU or CUDA.
#include "ggml.h"
#include "ggml-cpu.h"
#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
#include <vector>
#include <iostream>
@cmdr2
cmdr2 / hexdump.py
Created November 17, 2024 16:38
Simple python-based hexdumper for analysing dll and executable files. Uses pefile (https://github.com/erocarrera/pefile)
import pefile
import sys
if len(sys.argv) < 2:
print("Error: No file path specified. Usage: python hexdump.py <file_path>")
sys.exit(1)
file_path = sys.argv[1]
try: