Skip to content

Instantly share code, notes, and snippets.

@Validark
Last active June 17, 2025 21:15
Show Gist options
  • Save Validark/5c5d6bde9ac2f969bdff085c77a95e2c to your computer and use it in GitHub Desktop.
Save Validark/5c5d6bde9ac2f969bdff085c77a95e2c to your computer and use it in GitHub Desktop.
prefix_xor u512
const std = @import("std");
fn carryless_multiply_x86(a: @Vector(8, u8), b: @Vector(16, u8)) @Vector(8, u8) {
return @as([2]@Vector(8, u8), @bitCast(struct {
extern fn @"llvm.x86.pclmulqdq"(@Vector(2, u64), @Vector(2, u64), i8) @Vector(2, u64);
}.@"llvm.x86.pclmulqdq"(
@bitCast(std.simd.join(a, @as(@Vector(8, u8), undefined))),
@bitCast(b), 0)))[0];
}
fn carryless_multiply_avx512(a: @Vector(8, u64), b: @Vector(8, u64), comptime imm8: enum (u8) {
low_a_low_b = 0b0000_0000,
high_a_low_b = 0b0000_0001,
low_a_high_b = 0b0001_0000,
high_a_high_b = 0b0001_0001,
}) @Vector(8, u64) {
return struct {
extern fn @"llvm.x86.pclmulqdq.512"(@Vector(8, u64), @Vector(8, u64), i8) @Vector(8, u64);
}.@"llvm.x86.pclmulqdq.512"(a, b, @intFromEnum(imm8));
}
export fn prefix_xor_512(a: @Vector(8, u64)) @Vector(8, u64) {
const b: @Vector(8, u64) = @splat(0xFFFFFFFFFFFFFFFF);
const x = carryless_multiply_avx512(a, b, .low_a_low_b);
const y = carryless_multiply_avx512(a, b, .high_a_high_b);
return @shuffle(u64, x, y, [_]i32{ 0, -1, 2, -3, 4, -5, 6, -7 }) ^
@shuffle(u64, x, y, [_]i32{ 1, -2, 3, -4, 5, -6, 7, -8 });
}
export fn prefix_xor_512_2(a: @Vector(8, u64)) @Vector(8, u64) {
const b: @Vector(8, u64) = @splat(0xFFFFFFFFFFFFFFFF);
const cross_xor: @Vector(8, i64) = @as(@Vector(8, i8), @bitCast(
carryless_multiply_avx512(@bitCast(std.simd.join(@popCount(a) & @as(@Vector(8, u8), @splat(1)), @as(@Vector(56, u8), undefined))), b, .low_a_low_b)
[0]));
//const cross_xor: @Vector(8, i64) = @as(@Vector(8, i8), @bitCast(carryless_multiply_x86(@popCount(a) & @as(@Vector(8, u8), @splat(1)), @splat(0xFF))));
const x = carryless_multiply_avx512(a, b, .low_a_low_b);
const y = carryless_multiply_avx512(a, b, .high_a_high_b);
return @shuffle(u64, x, y, [_]i32{ 0, -1, 2, -3, 4, -5, 6, -7 }) ^ @as(@Vector(8, u64), @bitCast(cross_xor));
}
fn prefix_xor_512_3(a: @Vector(8, u64)) @Vector(8, u64) {
const b: @Vector(8, u64) = @splat(0xFFFFFFFFFFFFFFFF);
const c: @Vector(8, u8) = @splat(1);
const cross_xor: @Vector(8, bool) = c == c & @as(@Vector(8, u8), @bitCast(carryless_multiply_x86(@popCount(a), @splat(1))));
const x = carryless_multiply_avx512(a, b, .low_a_low_b);
const y = carryless_multiply_avx512(a, b, .high_a_high_b);
return @shuffle(u64, x, y, [_]i32{ 0, -1, 2, -3, 4, -5, 6, -7 }) ^ @select(u64, cross_xor, b, @as(@Vector(8, u64), @splat(0)));
}
export fn prefixXOR(bitstring: @Vector(8, u64)) @Vector(8, u64) {
var x = bitstring;
x ^= x << @splat(1);
x ^= x << @splat(2);
x ^= x << @splat(4);
x ^= x << @splat(8);
x ^= x << @splat(16);
x ^= x << @splat(32);
x ^= std.simd.shiftElementsRight(x, 1, 0);
x ^= std.simd.shiftElementsRight(x, 2, 0);
x ^= std.simd.shiftElementsRight(x, 4, 0);
return x;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment