Last active
June 17, 2025 21:15
-
-
Save Validark/5c5d6bde9ac2f969bdff085c77a95e2c to your computer and use it in GitHub Desktop.
prefix_xor u512
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
fn carryless_multiply_x86(a: @Vector(8, u8), b: @Vector(16, u8)) @Vector(8, u8) { | |
return @as([2]@Vector(8, u8), @bitCast(struct { | |
extern fn @"llvm.x86.pclmulqdq"(@Vector(2, u64), @Vector(2, u64), i8) @Vector(2, u64); | |
}.@"llvm.x86.pclmulqdq"( | |
@bitCast(std.simd.join(a, @as(@Vector(8, u8), undefined))), | |
@bitCast(b), 0)))[0]; | |
} | |
fn carryless_multiply_avx512(a: @Vector(8, u64), b: @Vector(8, u64), comptime imm8: enum (u8) { | |
low_a_low_b = 0b0000_0000, | |
high_a_low_b = 0b0000_0001, | |
low_a_high_b = 0b0001_0000, | |
high_a_high_b = 0b0001_0001, | |
}) @Vector(8, u64) { | |
return struct { | |
extern fn @"llvm.x86.pclmulqdq.512"(@Vector(8, u64), @Vector(8, u64), i8) @Vector(8, u64); | |
}.@"llvm.x86.pclmulqdq.512"(a, b, @intFromEnum(imm8)); | |
} | |
export fn prefix_xor_512(a: @Vector(8, u64)) @Vector(8, u64) { | |
const b: @Vector(8, u64) = @splat(0xFFFFFFFFFFFFFFFF); | |
const x = carryless_multiply_avx512(a, b, .low_a_low_b); | |
const y = carryless_multiply_avx512(a, b, .high_a_high_b); | |
return @shuffle(u64, x, y, [_]i32{ 0, -1, 2, -3, 4, -5, 6, -7 }) ^ | |
@shuffle(u64, x, y, [_]i32{ 1, -2, 3, -4, 5, -6, 7, -8 }); | |
} | |
export fn prefix_xor_512_2(a: @Vector(8, u64)) @Vector(8, u64) { | |
const b: @Vector(8, u64) = @splat(0xFFFFFFFFFFFFFFFF); | |
const cross_xor: @Vector(8, i64) = @as(@Vector(8, i8), @bitCast( | |
carryless_multiply_avx512(@bitCast(std.simd.join(@popCount(a) & @as(@Vector(8, u8), @splat(1)), @as(@Vector(56, u8), undefined))), b, .low_a_low_b) | |
[0])); | |
//const cross_xor: @Vector(8, i64) = @as(@Vector(8, i8), @bitCast(carryless_multiply_x86(@popCount(a) & @as(@Vector(8, u8), @splat(1)), @splat(0xFF)))); | |
const x = carryless_multiply_avx512(a, b, .low_a_low_b); | |
const y = carryless_multiply_avx512(a, b, .high_a_high_b); | |
return @shuffle(u64, x, y, [_]i32{ 0, -1, 2, -3, 4, -5, 6, -7 }) ^ @as(@Vector(8, u64), @bitCast(cross_xor)); | |
} | |
fn prefix_xor_512_3(a: @Vector(8, u64)) @Vector(8, u64) { | |
const b: @Vector(8, u64) = @splat(0xFFFFFFFFFFFFFFFF); | |
const c: @Vector(8, u8) = @splat(1); | |
const cross_xor: @Vector(8, bool) = c == c & @as(@Vector(8, u8), @bitCast(carryless_multiply_x86(@popCount(a), @splat(1)))); | |
const x = carryless_multiply_avx512(a, b, .low_a_low_b); | |
const y = carryless_multiply_avx512(a, b, .high_a_high_b); | |
return @shuffle(u64, x, y, [_]i32{ 0, -1, 2, -3, 4, -5, 6, -7 }) ^ @select(u64, cross_xor, b, @as(@Vector(8, u64), @splat(0))); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export fn prefixXOR(bitstring: @Vector(8, u64)) @Vector(8, u64) { | |
var x = bitstring; | |
x ^= x << @splat(1); | |
x ^= x << @splat(2); | |
x ^= x << @splat(4); | |
x ^= x << @splat(8); | |
x ^= x << @splat(16); | |
x ^= x << @splat(32); | |
x ^= std.simd.shiftElementsRight(x, 1, 0); | |
x ^= std.simd.shiftElementsRight(x, 2, 0); | |
x ^= std.simd.shiftElementsRight(x, 4, 0); | |
return x; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment