Last active
May 23, 2021 00:08
-
-
Save jamilnyc/70200814ac332cff07944089cfa69206 to your computer and use it in GitHub Desktop.
A simple python script for sequentially fetching URL's, parsing their JSON response and writing them to files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import json | |
import requests | |
from pathlib import Path | |
import sys | |
class bcolors: | |
""" | |
Color codes for the command line | |
""" | |
HEADER = '\033[95m' | |
OKBLUE = '\033[94m' | |
OKCYAN = '\033[96m' | |
OKGREEN = '\033[92m' | |
WARNING = '\033[93m' | |
FAIL = '\033[91m' | |
ENDC = '\033[0m' | |
BOLD = '\033[1m' | |
UNDERLINE = '\033[4m' | |
def print_error(output: str): | |
print("{}{}{}".format(bcolors.FAIL, output, bcolors.ENDC)) | |
def print_warning(output: str): | |
print("{}{}{}".format(bcolors.WARNING, output, bcolors.ENDC)) | |
def print_ok(output: str): | |
print("{}{}{}".format(bcolors.OKGREEN, output, bcolors.ENDC)) | |
def get_urls_from_file(filename: str) -> list: | |
with open(filename) as f: | |
lines = f.readlines() | |
# Skip commentedout URL's | |
urls = [line.strip() for line in lines if line[0] != '#'] | |
return urls | |
def write_to_file(content: str, directory: str, filename: str): | |
# Create directory if it doesn't exist | |
Path(directory).mkdir(parents=True, exist_ok=True) | |
rel_path = "{}/{}".format(directory, filename) | |
with open(rel_path, "w") as text_file: | |
text_file.write(content) | |
def save_response_to_file(url: str, directory: str, filename: str, print_result: bool = False) -> bool: | |
r = requests.get(url) | |
try: | |
if print_result: | |
print("[{}] {}".format(r.status_code, url)) | |
resp = r.json() | |
json_str = json.dumps(resp, indent=2, sort_keys=True) | |
write_to_file(content=json_str, directory=directory, filename=filename) | |
return True | |
except json.decoder.JSONDecodeError: | |
if print_result: | |
print_error('JSON Decode Error: {}'.format(url)) | |
return False | |
if __name__ == "__main__": | |
# Read command line arguments passed to this script | |
args = sys.argv | |
if len(args) != 3: | |
print_error('Usage: {} file_with_urls.txt directory_to_write_responses_to'.format(args[0])) | |
sys.exit(1) | |
filename = args[1] | |
directory = args[2] | |
urls = get_urls_from_file(filename) | |
saved = 0 | |
for url in urls: | |
# TODO: Paralellize this operation or use asyncio? | |
response_filename = url.rsplit('/', 1)[-1] | |
success = save_response_to_file(url=url, directory=directory, filename=response_filename, print_result=True) | |
if success: | |
saved += 1 | |
status = 'Saved {} of {} url responses'.format(saved, len(urls)) | |
if saved == len(urls): | |
print_ok(status) | |
else: | |
print_warning(status) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment