Created
January 11, 2018 07:53
-
-
Save dirkjonker/546fc67eb06e76142482d77569d21ebe to your computer and use it in GitHub Desktop.
deduplicate and organise photos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import hashlib | |
import pathlib | |
import sys | |
import piexif | |
PIC_ROOT = pathlib.Path.home() / 'Pictures' | |
create_dirs = set() | |
mv_files = set() | |
new_files = set() | |
del_files = set() | |
error_files = set() | |
def gethash(p): | |
h = hashlib.new('md5') | |
h.update(p.open('rb').read()) | |
return h.hexdigest() | |
def get_mtime(p): | |
return datetime.datetime.fromtimestamp(p.stat().st_mtime) | |
def get_exiftime(p): | |
i = piexif.load(p.as_posix()) | |
d = i['Exif'][36867].decode() | |
return datetime.datetime.strptime(d, '%Y:%m:%d %H:%M:%S') | |
def get_file_date(p): | |
try: | |
return get_exiftime(p) | |
except: | |
return get_mtime(p) | |
def process(p): | |
d = get_file_date(p) | |
new_dir = PIC_ROOT / d.strftime('%Y/%m/%d') | |
if not new_dir.is_dir(): | |
create_dirs.add(new_dir) | |
new_file = new_dir / p.name | |
if new_file.is_file() or new_file in new_files: | |
if gethash(new_file) == gethash(p): | |
del_files.add(p) | |
else: | |
error_files.add((p, new_file, 'identical name, different hash')) | |
else: | |
mv_files.add((p, new_file)) | |
new_files.add(new_file) | |
def dry_run(): | |
for d in sorted(create_dirs): | |
print('Would create directory {}'.format(d)) | |
for orig, new in sorted(mv_files): | |
print('Would move {} to {}'.format(orig, new)) | |
for f in sorted(del_files): | |
print('Would remove duplicate file {}'.format(f)) | |
for source, target, reason in sorted(error_files): | |
print("error: {}: source {} target: {}".format(reason, source, target)) | |
print('Total number of files to move:', len(mv_files)) | |
print('Total number of files to delete:', len(del_files)) | |
def main(): | |
if len(sys.argv) != 2: | |
print('Please provide directory to scan for images') | |
exit(1) | |
search_root = pathlib.Path(sys.argv[1]) | |
if not search_root.is_dir(): | |
print('{} is not a directory'.format(search_root)) | |
exit(1) | |
for pic in search_root.rglob('*.JPG'): | |
process(pic) | |
dry_run() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment