Created
January 1, 2022 21:28
-
-
Save joelkuiper/b9b1866a0fa2aec8a448464da5622222 to your computer and use it in GitHub Desktop.
Utility for copying files with time stamps to folders by week
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
import glob | |
import re | |
import dateutil.parser as dp | |
import pandas as pd | |
base_dir = "L_frames" | |
output_dir = "by_week" | |
files = glob.glob(os.path.join(base_dir, "*")) | |
date_pattern = r"\d{4}-\d{2}-\d{2}" | |
def date_from_file(s): | |
return dp.parse(re.search(date_pattern, s).group()) | |
def parse_files(files): | |
return [{'file': file, 'date': date_from_file(file)} for file in files] | |
def week_buckets(file_dates): | |
df = pd.dataframe.from_dict(file_dates) | |
buckets = list(df.resample('W', on='date')) | |
return buckets | |
def copy_files(week_buckets): | |
for ts, df in week_buckets: | |
bucket_name = str(f"{(ts - pd.to_timedelta(7, unit='d')).strftime('%Y%m%d')}-{ts.strftime('%Y%m%d')}") | |
if not len(df): | |
print(f"no data for the week of {bucket_name}") | |
continue | |
dst = os.path.join(output_dir, bucket_name) | |
os.makedirs(dst, exist_ok=True) | |
for r in df.to_dict("records"): | |
f, f_date = r["file"], r["date"] | |
f_name, f_ext = os.path.splitext(f) | |
print(f"Copying {f_name} from {f_date.strftime('%Y%m%d')} to {bucket_name}") | |
src = os.path.abspath(f) | |
shutil.copy(src, dst) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment