Skip to content

Instantly share code, notes, and snippets.

@James-Rocker
Last active June 3, 2025 15:35
Show Gist options
  • Save James-Rocker/fbd63d01fd5b3153ddf0cb34481d9a17 to your computer and use it in GitHub Desktop.
Save James-Rocker/fbd63d01fd5b3153ddf0cb34481d9a17 to your computer and use it in GitHub Desktop.
Polars vs pandas
import pandas as pd
import polars as pl
import time
# Generate a sample CSV file for the test
data = {
"col1": range(1, 1000001),
"col2": range(1000000, 0, -1),
"col3": ["text"] * 1000000
}
df = pd.DataFrame(data)
df.to_csv("sample.csv", index=False)
# Read and manipulate data using pandas
start_time = time.time()
pandas_df = pd.read_csv("sample.csv")
pandas_df['col4'] = pandas_df['col1'] + pandas_df['col2']
pandas_sum = pandas_df['col4'].sum()
pandas_time = time.time() - start_time
print(f"Pandas operation took {pandas_time:.4f} seconds")
# Read and manipulate data using Polars
start_time = time.time()
polars_df = pl.read_csv("sample.csv")
polars_df = polars_df.with_columns((polars_df['col1'] + polars_df['col2']).alias('col4'))
polars_sum = polars_df['col4'].sum()
polars_time = time.time() - start_time
print(f"Polars operation took {polars_time:.4f} seconds")
# Verify that the results are the same
assert pandas_sum == polars_sum, "The results do not match!"
# Output comparison
print(f"Pandas took {pandas_time:.4f} seconds, Polars took {polars_time:.4f} seconds")
@James-Rocker
Copy link
Author

Pandas operation took 0.1143 seconds
Polars operation took 0.0451 seconds
Pandas took 0.1143 seconds, Polars took 0.0451 seconds

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment