Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Last active August 13, 2024 09:29
Show Gist options
  • Save psychemedia/baca1d22fbb5673aad54a1b91dda07f1 to your computer and use it in GitHub Desktop.
Save psychemedia/baca1d22fbb5673aad54a1b91dda07f1 to your computer and use it in GitHub Desktop.
Example of using duckdb with various magic-duckdb and jupysql in Jupyter notebook (works in Jupyterlite)
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"kernelspec": {
"name": "python",
"display_name": "Python (Pyodide)",
"language": "python"
},
"language_info": {
"codemirror_mode": {
"name": "python",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8"
}
},
"nbformat_minor": 4,
"nbformat": 4,
"cells": [
{
"cell_type": "code",
"source": "import duckdb",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"source": "duckdb.sql(\"select 42\")",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 2,
"output_type": "execute_result",
"data": {
"text/plain": "┌───────┐\n│ 42 │\n│ int32 │\n├───────┤\n│ 42 │\n└───────┘"
},
"metadata": {}
}
],
"execution_count": 2
},
{
"cell_type": "code",
"source": "%pip install --quiet magic-duckdb\n%load_ext magic_duckdb",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"source": "%%dql\nSELECT 42;",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 2,
"output_type": "execute_result",
"data": {
"text/plain": " 42\n0 42",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>42</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>42</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 2
},
{
"cell_type": "code",
"source": "%pip install jupysql\n%env PLOOMBER_STATS_ENABLED=\"false\"\n%env PLOOMBER_VERSION_CHECK_DISABLED=\"false\"\n%load_ext sql\n\n#https://jupysql.ploomber.io/en/latest/integrations/duckdb-native.html",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "env: PLOOMBER_STATS_ENABLED=\"false\"\nenv: PLOOMBER_VERSION_CHECK_DISABLED=\"false\"\n",
"output_type": "stream"
}
],
"execution_count": 1
},
{
"cell_type": "code",
"source": "import pandas as pd\nimport duckdb\n\nconn = duckdb.connect()\ndf = pd.DataFrame({\"x\": range(10)})",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 4
},
{
"cell_type": "code",
"source": "%sql conn",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 5
},
{
"cell_type": "code",
"source": "%%sql\nSELECT *\nFROM df\nWHERE x > 4",
"metadata": {
"trusted": true
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "Running query in 'DuckDBPyConnection'",
"text/html": "<span style=\"None\">Running query in &#x27;DuckDBPyConnection&#x27;</span>"
},
"metadata": {}
},
{
"execution_count": 6,
"output_type": "execute_result",
"data": {
"text/plain": "+---+\n| x |\n+---+\n| 5 |\n| 6 |\n| 7 |\n| 8 |\n| 9 |\n+---+",
"text/html": "<table>\n <thead>\n <tr>\n <th>x</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>5</td>\n </tr>\n <tr>\n <td>6</td>\n </tr>\n <tr>\n <td>7</td>\n </tr>\n <tr>\n <td>8</td>\n </tr>\n <tr>\n <td>9</td>\n </tr>\n </tbody>\n</table>"
},
"metadata": {}
}
],
"execution_count": 6
},
{
"cell_type": "code",
"source": "df.to_csv(\"test.csv\")",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 8
},
{
"cell_type": "code",
"source": "%%sql\nCREATE TABLE readcsvtest AS SELECT * FROM test.csv",
"metadata": {
"trusted": true
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "Running query in 'DuckDBPyConnection'",
"text/html": "<span style=\"None\">Running query in &#x27;DuckDBPyConnection&#x27;</span>"
},
"metadata": {}
},
{
"execution_count": 9,
"output_type": "execute_result",
"data": {
"text/plain": "+-------+\n| Count |\n+-------+\n| 10 |\n+-------+",
"text/html": "<table>\n <thead>\n <tr>\n <th>Count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>10</td>\n </tr>\n </tbody>\n</table>"
},
"metadata": {}
}
],
"execution_count": 9
},
{
"cell_type": "code",
"source": "%%sql\nSELECT *\nFROM readcsvtest\nLIMIT 3",
"metadata": {
"trusted": true
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "Running query in 'DuckDBPyConnection'",
"text/html": "<span style=\"None\">Running query in &#x27;DuckDBPyConnection&#x27;</span>"
},
"metadata": {}
},
{
"execution_count": 10,
"output_type": "execute_result",
"data": {
"text/plain": "+---------+---+\n| column0 | x |\n+---------+---+\n| 0 | 0 |\n| 1 | 1 |\n| 2 | 2 |\n+---------+---+",
"text/html": "<table>\n <thead>\n <tr>\n <th>column0</th>\n <th>x</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <td>2</td>\n <td>2</td>\n </tr>\n </tbody>\n</table>"
},
"metadata": {}
}
],
"execution_count": 10
},
{
"cell_type": "code",
"source": " ",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": null
}
]
}
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"kernelspec": {
"name": "python",
"display_name": "Python (Pyodide)",
"language": "python"
},
"language_info": {
"codemirror_mode": {
"name": "python",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8"
}
},
"nbformat_minor": 4,
"nbformat": 4,
"cells": [
{
"cell_type": "code",
"source": "import duckdb",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"source": "duckdb.sql(\"select 42\")",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 2,
"output_type": "execute_result",
"data": {
"text/plain": "┌───────┐\n│ 42 │\n│ int32 │\n├───────┤\n│ 42 │\n└───────┘"
},
"metadata": {}
}
],
"execution_count": 2
},
{
"cell_type": "code",
"source": "%pip install --quiet magic-duckdb\n%load_ext magic_duckdb",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"source": "%%dql\nSELECT 42;",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 2,
"output_type": "execute_result",
"data": {
"text/plain": " 42\n0 42",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>42</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>42</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 2
},
{
"cell_type": "code",
"source": "%pip install jupysql\n%env PLOOMBER_STATS_ENABLED=\"false\"\n%env PLOOMBER_VERSION_CHECK_DISABLED=\"false\"\n%load_ext sql\n\n#https://jupysql.ploomber.io/en/latest/integrations/duckdb-native.html",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "env: PLOOMBER_STATS_ENABLED=\"false\"\nenv: PLOOMBER_VERSION_CHECK_DISABLED=\"false\"\n",
"output_type": "stream"
}
],
"execution_count": 1
},
{
"cell_type": "code",
"source": "import pandas as pd\nimport duckdb\n\nconn = duckdb.connect()\ndf = pd.DataFrame({\"x\": range(10)})",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 4
},
{
"cell_type": "code",
"source": "%sql conn",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 5
},
{
"cell_type": "code",
"source": "%%sql\nSELECT *\nFROM df\nWHERE x > 4",
"metadata": {
"trusted": true
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "Running query in 'DuckDBPyConnection'",
"text/html": "<span style=\"None\">Running query in &#x27;DuckDBPyConnection&#x27;</span>"
},
"metadata": {}
},
{
"execution_count": 6,
"output_type": "execute_result",
"data": {
"text/plain": "+---+\n| x |\n+---+\n| 5 |\n| 6 |\n| 7 |\n| 8 |\n| 9 |\n+---+",
"text/html": "<table>\n <thead>\n <tr>\n <th>x</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>5</td>\n </tr>\n <tr>\n <td>6</td>\n </tr>\n <tr>\n <td>7</td>\n </tr>\n <tr>\n <td>8</td>\n </tr>\n <tr>\n <td>9</td>\n </tr>\n </tbody>\n</table>"
},
"metadata": {}
}
],
"execution_count": 6
},
{
"cell_type": "code",
"source": "df.to_csv(\"test.csv\")",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 8
},
{
"cell_type": "code",
"source": "%%sql\nCREATE TABLE readcsvtest AS SELECT * FROM test.csv",
"metadata": {
"trusted": true
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "Running query in 'DuckDBPyConnection'",
"text/html": "<span style=\"None\">Running query in &#x27;DuckDBPyConnection&#x27;</span>"
},
"metadata": {}
},
{
"execution_count": 9,
"output_type": "execute_result",
"data": {
"text/plain": "+-------+\n| Count |\n+-------+\n| 10 |\n+-------+",
"text/html": "<table>\n <thead>\n <tr>\n <th>Count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>10</td>\n </tr>\n </tbody>\n</table>"
},
"metadata": {}
}
],
"execution_count": 9
},
{
"cell_type": "code",
"source": "%%sql\nSELECT *\nFROM readcsvtest\nLIMIT 3",
"metadata": {
"trusted": true
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "Running query in 'DuckDBPyConnection'",
"text/html": "<span style=\"None\">Running query in &#x27;DuckDBPyConnection&#x27;</span>"
},
"metadata": {}
},
{
"execution_count": 10,
"output_type": "execute_result",
"data": {
"text/plain": "+---------+---+\n| column0 | x |\n+---------+---+\n| 0 | 0 |\n| 1 | 1 |\n| 2 | 2 |\n+---------+---+",
"text/html": "<table>\n <thead>\n <tr>\n <th>column0</th>\n <th>x</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <td>2</td>\n <td>2</td>\n </tr>\n </tbody>\n</table>"
},
"metadata": {}
}
],
"execution_count": 10
},
{
"cell_type": "code",
"source": " ",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": null
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment