Created
January 29, 2018 09:31
-
-
Save cryptoscopia/bfc424fe73f007d0eb0437a4f50e63c4 to your computer and use it in GitHub Desktop.
Parse CSV exports of Etherscan transactions into one graph-friendly CSV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import argparse | |
from collections import OrderedDict | |
import csv | |
from datetime import datetime, timedelta | |
from decimal import Decimal | |
parser = argparse.ArgumentParser() | |
parser.add_argument('incoming', help='CSV export of transactions') | |
parser.add_argument('outgoing', help='CSV export of internal transactions') | |
args = parser.parse_args() | |
# A place to gather the incoming transaction data | |
data_in = OrderedDict() | |
with open(args.incoming) as f: | |
incoming = csv.reader(f) | |
# Skip CSV header row | |
next(incoming) | |
for _,_,timestamp,_,_,_,_,value_in,value_out,_,_,_,_,_,_ in incoming: | |
# Sanity check, just in case | |
assert not float(value_out) | |
# Round the timestamp down to the last 10-minute mark | |
timestamp = datetime.utcfromtimestamp(int(timestamp)) | |
rounded_minute = timestamp.minute // 10 * 10 | |
timestamp = timestamp.replace(minute=rounded_minute, second=0, microsecond=0) | |
# First parsed row, store initial record | |
if not data_in: | |
first_timestamp = timestamp | |
data_in[timestamp] = Decimal(value_in) | |
last_timestamp = timestamp | |
continue | |
# We've already had a transaction for this 10-minute period, add this one to it | |
if timestamp in data_in: | |
data_in[timestamp] += Decimal(value_in) | |
last_timestamp = timestamp | |
continue | |
# Fill in 10-minute periods without transactions with the total from | |
# the previous period (probably unnecessary, but avoids gaps) | |
while last_timestamp + timedelta(minutes=10) < timestamp: | |
data_in[last_timestamp + timedelta(minutes=10)] = data_in[last_timestamp] | |
last_timestamp += timedelta(minutes=10) | |
# And now add the new 10-minute period with the cumulative total | |
data_in[timestamp] = data_in[last_timestamp] + Decimal(value_in) | |
last_timestamp = timestamp | |
# This is where we'll gather the outgoing transaction data | |
data_out = OrderedDict() | |
# Start it off with 0 at the time of the first incoming transaction | |
data_out[first_timestamp] = Decimal('0.0') | |
# The reason I keep adding these silly *_timestamp variables is because I can't | |
# do data_in.keys()[0] or data_in.keys()[-1], and I don't want to keep casting | |
# them into lists. | |
last_out_timestamp = first_timestamp | |
with open(args.outgoing) as f: | |
outgoing = csv.reader(f) | |
# Skip CSV header row | |
next(outgoing) | |
for _,_,timestamp,_,_,_,_,value_in,value_out,_,_,_,_,_ in outgoing: | |
# Sanity check, just in case | |
assert not float(value_in) | |
# Round the timestamp down to the last 10-minute mark | |
timestamp = datetime.utcfromtimestamp(int(timestamp)) | |
rounded_minute = timestamp.minute // 10 * 10 | |
timestamp = timestamp.replace(minute=rounded_minute, second=0, microsecond=0) | |
# We've already had a transaction for this 10-minute period, add this one to it | |
if timestamp in data_out: | |
data_out[timestamp] += Decimal(value_out) | |
last_out_timestamp = timestamp | |
continue | |
# Fill in 10-minute periods without transactions as we did for incoming | |
while last_out_timestamp + timedelta(minutes=10) < timestamp: | |
data_out[last_out_timestamp + timedelta(minutes=10)] = data_out[last_out_timestamp] | |
last_out_timestamp += timedelta(minutes=10) | |
# And now add the new 10-minute period with the cumulative total | |
data_out[timestamp] = data_out[last_out_timestamp] + Decimal(value_in) | |
last_out_timestamp = timestamp | |
# If the outgoing transactions end more than 10 minutes before | |
# the incoming ones, fill in the empty periods at the end | |
while last_out_timestamp < last_timestamp: | |
data_out[last_out_timestamp + timedelta(minutes=10)] = data_out[last_out_timestamp] | |
last_out_timestamp += timedelta(minutes=10) | |
# Output our data in CSV format | |
for k, v in data_in.items(): | |
print('%s,%s,%s' % (k, v, data_out[k])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Created for use with PoWH coin: https://etherscan.io/address/0xa7ca36f7273d4d38fc2aec5a454c497f86728a7a
Download CSV export of transactions as
incoming.csv
Download CSV export of internal transaction as
outgoing.csv
Run
python powh_graph.py incoming.csv outgoing.csv > combined.csv
Open
combined.csv
with your favourite spreadsheet editor (e.g. Google Sheets) and generate graphIt should look something like this: