Skip to content

Instantly share code, notes, and snippets.

View xevix's full-sized avatar

Alejandro Wainzinger xevix

  • California
  • 01:57 (UTC -07:00)
View GitHub Profile
@xevix
xevix / list_hive_partitions.sql
Last active August 13, 2025 06:21
List Hive Partitions
WITH hive_cols AS (
PIVOT (
SELECT
file.split('/').list_filter(lambda x: x.contains('=')).list_transform(lambda x: x.split('=')).unnest() AS kv,
file
FROM glob('/path/to/hive/**/*.parquet')
)
ON kv[1]
USING first(kv[2])
)
@xevix
xevix / duckdb_build_errors_parquet_ubuntu_aarch64.log
Created July 12, 2025 00:00
duckdb_build_errors_parquet_ubuntu_aarch64.log
$ GEN=ninja make unittest_release
mkdir -p ./build/release && \
cd build/release && \
cmake -G "Ninja" -DFORCE_COLORED_OUTPUT=1 -DLOCAL_EXTENSION_REPO="" -DOVERRIDE_GIT_DESCRIBE="" -DDUCKDB_EXPLICIT_VERSION="" -DCMAKE_BUILD_TYPE=Release ../.. && \
cmake --build . --config Release
-- Found Python3: /usr/bin/python3 (found version "3.13.3") found components: Interpreter
-- The C compiler identification is GNU 14.2.0
-- The CXX compiler identification is GNU 14.2.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
[~/external]$ git clone [email protected]:duckdb/duckdb.git duckdb_tests
Cloning into 'duckdb_tests'...
remote: Enumerating objects: 707000, done.
remote: Counting objects: 100% (166/166), done.
remote: Compressing objects: 100% (95/95), done.
remote: Total 707000 (delta 112), reused 71 (delta 71), pack-reused 706834 (from 2)
Receiving objects: 100% (707000/707000), 347.96 MiB | 17.11 MiB/s, done.
Resolving deltas: 100% (590703/590703), done.
Updating files: 100% (14122/14122), done.
[~/external]$ cd duckdb_tests
#!/bin/bash
## WARNING: no warranty express or implied. Messing with Steam Deck internals without being familiar with a Linux terminal could lead to a broken system. Proceed with caution.
# Set password for `deck` user in order to have password for sudo commands (remember this password)
passwd
# Disable read-only
sudo steamos-readonly disable
# Set up Pacman
import pandas as pd
import duckdb
import pygrib
import sys
# Testing data from: https://data.ecmwf.int/forecasts/20250211/00z/aifs/0p25/oper/
# ECMWF is the European Centre for Medium-Range Weather Forecasts.
def grib_to_df(grb):
attrs = str(grb).split(":")
-- Tested on 2024 MBP M4 Max 128G RAM 16 cores (12P, 4E), DuckDB 1.1.3, Sequoia 15.1
-- Taxi data set: https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page
-- Inspired by: https://duckdb.org/2024/10/16/driving-csv-performance-benchmarking-duckdb-with-the-nyc-taxi-dataset.html
-- taxi_data_2019: ~84M rows
-- UNPIVOT (naive)
-- ~5500ms (1.1.3)
-- ~4500ms (1.2)
-- ~330ms (2025/02/13 git HEAD) https://github.com/duckdb/duckdb/pull/16221
WITH locations AS (
fname = 'pwned-passwords-2.0.txt'
# Change me
# echo -n password | shasum -a 1 | awk '{print toupper($1)}'
password = '7C4A8D09CA3762AF61E59520943DC26494F8941B'
search = password
with open(fname) as f:
for line in f:
sha = line[:40]
if search == sha:

Keybase proof

I hereby claim:

  • I am xevix on github.
  • I am xevix (https://keybase.io/xevix) on keybase.
  • I have a public key ASDKzeeW4q_b8UXMKqF-posWghvBE63A41Msr3QCiHTrxwo

To claim this, I am signing this object:

@xevix
xevix / foldl1_ish.rs
Last active September 10, 2017 05:50
trait Folder {
type Item;
fn foldl1<F>(self, f: F) -> Option<Self::Item>
where
F: FnMut(Self::Item, &Self::Item) -> Self::Item;
}
impl<T: Clone> Folder for Vec<T> {
type Item = T;
fn foldl1<F>(self, mut f: F) -> Option<Self::Item>
@xevix
xevix / loop.rs
Last active September 10, 2017 02:53
pub fn combine_all_option<T>(xs: &Vec<T>) -> Option<T>
where
T: Semigroup + Clone,
{
match xs.first() {
Some(head) => {
// Dear lord this reads horribly
xs[1..].iter().fold(Some((*head).clone()), |acc, x| acc.combine(&Some((*x).clone())))
}
_ => None