Last active
March 11, 2017 19:38
-
-
Save dwcoates/08a20422f77e2e65199016475ad3310d to your computer and use it in GitHub Desktop.
for pops
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import gzip | |
import io | |
def read_json(filename, gz=None): | |
""" | |
Read in comma-newline delimited json files encoded in latin-1. | |
""" | |
_io = gzip.io if gz else io | |
with _io.open("./data_sample.json", encoding="latin1") as f: | |
content = f.read().strip() | |
parse_fails = 0 | |
data = [] | |
for line in content.split("\n"): | |
line = line[:-1] # drop annoying commas after json objects | |
js = None | |
try: | |
js = json.loads(line) | |
except Exception as ex: | |
parse_fails+=1 | |
if parse_fails and not parse_fails % 100: | |
print ("Warning: {0} json object" + | |
" parse failures ({1})").format(parse_fails, | |
ex.message) | |
if js: | |
data.append(js) | |
if parse_fails: | |
print "Warning: {} json object parse failures.".format(parse_fails) | |
else: | |
print "No parse failures while reading '{}'".format(filename) | |
return data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment