Last active
June 3, 2025 15:35
-
-
Save James-Rocker/fbd63d01fd5b3153ddf0cb34481d9a17 to your computer and use it in GitHub Desktop.
Polars vs pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import polars as pl | |
import time | |
# Generate a sample CSV file for the test | |
data = { | |
"col1": range(1, 1000001), | |
"col2": range(1000000, 0, -1), | |
"col3": ["text"] * 1000000 | |
} | |
df = pd.DataFrame(data) | |
df.to_csv("sample.csv", index=False) | |
# Read and manipulate data using pandas | |
start_time = time.time() | |
pandas_df = pd.read_csv("sample.csv") | |
pandas_df['col4'] = pandas_df['col1'] + pandas_df['col2'] | |
pandas_sum = pandas_df['col4'].sum() | |
pandas_time = time.time() - start_time | |
print(f"Pandas operation took {pandas_time:.4f} seconds") | |
# Read and manipulate data using Polars | |
start_time = time.time() | |
polars_df = pl.read_csv("sample.csv") | |
polars_df = polars_df.with_columns((polars_df['col1'] + polars_df['col2']).alias('col4')) | |
polars_sum = polars_df['col4'].sum() | |
polars_time = time.time() - start_time | |
print(f"Polars operation took {polars_time:.4f} seconds") | |
# Verify that the results are the same | |
assert pandas_sum == polars_sum, "The results do not match!" | |
# Output comparison | |
print(f"Pandas took {pandas_time:.4f} seconds, Polars took {polars_time:.4f} seconds") |
Author
James-Rocker
commented
Jul 2, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment