Last active
July 1, 2024 17:18
-
-
Save mynameisvinn/3945844f0a3df32e0bf13d25de51d328 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from glob import glob | |
import json | |
from tqdm import tqdm | |
# raw treatment -> gs://etsy-recsys-ml-dev-data-nxsn/user/vtang/updates-ranker-v1/metrics/prod_updates_2 | |
prod_fp = glob("./prod_updates_2/*") | |
prod_updates = {} # key = notification_feed_id, value = notification_type of candidate position 0 | |
for fp in tqdm(prod_fp): | |
with open(fp, "r") as f: | |
candidates = f.readlines() | |
for candidate in candidates: | |
candidate = json.loads(candidate) | |
# we want the top candidates for each notification_feed_id | |
if candidate['position'] == 0: | |
prod_updates[candidate['notification_feed_id']] = candidate['notification_type'] | |
len(prod_updates) | |
# raw production -> gs://etsy-recsys-ml-dev-data-nxsn/user/vtang/updates-ranker-v1/metrics/updates-ranker-v1/ | |
# dataflow logs -> https://console.cloud.google.com/dataflow/jobs/us-central1/2024-07-01_09_45_29-15640809537440376803;step=ParquetDataLoaderAndFilterAttributions;graphView=0?project=etsy-recsys-ml-dev&pageState=(%22dfTime%22:(%22l%22:%22dfJobMaxTime%22)) | |
treatment_fp = glob("./updates-ranker-v2/updates-ranker-v1/*") | |
print(len(treatment_fp)) | |
updates_ranker_v1 = {} | |
for fp in tqdm(treatment_fp): | |
with open(fp, "r") as f: | |
candidates = f.readlines() | |
for candidate in candidates: | |
candidate = json.loads(candidate) | |
# we want the top candidates for each notification_feed_id | |
if candidate['position'] == 0: | |
updates_ranker_v1[candidate['notification_feed_id']] = candidate['notification_type'] | |
len(updates_ranker_v1) | |
# compare the top candidates | |
for k, v in updates_ranker_v1.items(): | |
if k in prod_updates.keys(): | |
is_same = v == prod_updates[k] | |
if not is_same: | |
print(k) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment