Last active
August 5, 2022 18:14
-
-
Save siakaramalegos/f1872c0b2de2121371161dd237ec7d35 to your computer and use it in GitHub Desktop.
Runs queries on BigQuery and outputs results to local CSV files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Converts BigQuery SQL to CSV. | |
# | |
# Usage: | |
# | |
# Before starting, install the BigQuery CLI, bq https://cloud.google.com/sdk/docs/install | |
# | |
# 1. Find all your queries and output the list of files into query_list.txt: | |
# `find almanac.httparchive.org/sql/2022/mobile-web/*.sql > query_list.txt` | |
# 2. Run `sql/run_queries.sh <sql-directory>` | |
# | |
# For example: | |
# | |
# sql/run_queries.sh sql/2019 | |
# | |
# This will run all 2019 queries. | |
# You could provide the path to a specific sql file to run only that query. | |
set -o pipefail | |
DIRECTORY=$1 | |
BQ_CMD="bq --format csv --project_id httparchive query --use_legacy_sql=false --max_rows=16000" | |
for sql in $(cat query_list.txt); do | |
metric=$(echo $sql | cut -d"/" -f5 | cut -d"." -f1) | |
echo "Querying $metric" | |
cat $sql | $BQ_CMD | sed '/^$/d' > $metric.csv | |
# Make sure the query succeeded. | |
if [ $? -ne 0 ]; then | |
echo "Error querying $sql" | |
exit 1 | |
fi | |
done | |
echo "Done!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment