Created
August 3, 2020 12:18
-
-
Save marirs/9dbd962716337817bba99edb45afb47f to your computer and use it in GitHub Desktop.
Given a list of date objects - remove duplicates from that list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import datetime | |
import time | |
import calendar | |
from typing import List | |
def dedupe_dates(lst: list, precision: str="day", tz_aware: bool=False) -> List: | |
"""Dedupe a given list of dateobjects | |
:param lst: list of date objects | |
:param precision: "day" to dedupe upto day (y/m/d) or "time" for y/m/d h:m:s | |
:param tz_aware: If true, then dedupe keeping timezones information | |
""" | |
dates_dict = {} | |
k = None | |
for dt in lst: | |
if tz_aware: | |
if not dt.tzinfo: | |
timestamp = calendar.timegm(dt.timetuple()) | |
k = datetime.datetime.utcfromtimestamp(timestamp) | |
else: | |
timestamp = time.mktime(dt.timetuple()) | |
k = datetime.datetime.fromtimestamp(timestamp) | |
else: | |
if precision == "day": | |
k = datetime.datetime(dt.year, dt.month, dt.day).timestamp() | |
elif precision == "time": | |
k = datetime.datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second).timestamp() | |
dates_dict.update({k: dt}) | |
return list(dates_dict.values()) | |
res = dedupe_dates( | |
[ | |
datetime.datetime(2006, 4, 13, 0, 0), | |
datetime.datetime(2006, 4, 13, 0, 0), | |
datetime.datetime(2021, 5, 1, 0, 0), | |
datetime.datetime(2021, 5, 1, 0, 0), | |
datetime.datetime(2020, 4, 23, 0, 0), | |
datetime.datetime(1997, 9, 15, 4, 0), | |
datetime.datetime(1997, 9, 15, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), | |
datetime.datetime(2020, 4, 23, 0, 0), | |
datetime.datetime(2005, 5, 30, 0, 0), | |
datetime.datetime(2021, 5, 31, 0, 0), | |
datetime.datetime(2020, 6, 1, 1, 5, 9, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400))), | |
datetime.datetime(2020, 6, 1), | |
datetime.datetime(2011, 4, 26, 17, 57, 27), | |
datetime.datetime(2010, 2, 25, 1, 4, 59), | |
datetime.datetime(2021, 2, 24, 23, 59, 59), | |
datetime.datetime(2020, 1, 28, 10, 35, 17), | |
datetime.datetime(2020, 11, 9, 0, 0), | |
datetime.datetime(2019, 10, 7, 17, 50, 9, tzinfo=datetime.timezone(datetime.timedelta(seconds=10800))), | |
datetime.datetime(2019, 10, 7, 17, 50, 9, tzinfo=datetime.timezone(datetime.timedelta(seconds=10800))), | |
] | |
) | |
print(len(res)) | |
for r in res: | |
print(r) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment