Last active
August 31, 2023 08:17
-
-
Save KR1470R/1729c935ea8206c08e1be17abd48737d to your computer and use it in GitHub Desktop.
CV Type file corrector
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################################### | |
#################################### Requirements ##################################### | |
# Python version 3.10 and newest(didn't tested on older versions) | |
# pip install python-magic | |
#################################### How does it work ##################################### | |
# Traverses through files in specified directory, checking the metadata of each file: | |
# - if file type is not pdf/docx/doc/txt - REMOVES it, | |
# - if extension file different from the type in metadata(pdf/docx/doc/txt) - RENAMES it. | |
#################################### How to Run ##################################### | |
# 1. save the script to your system. | |
# 2. python cv-files-corrector.py /path/to/folder/with/cv | |
# 3. await to "Done!" message. | |
########################################################################################### | |
import magic | |
import os | |
import sys | |
args = sys.argv | |
if len(args) <= 1: | |
print("Directory with resumes should be specified!") | |
sys.exit(1) | |
dir_path = sys.argv[1] | |
def should_be_removed(file_mime_type, path): | |
removed = False | |
for removable_type in types_to_remove: | |
if removable_type in file_mime_type: | |
os.remove(path) | |
removed = True | |
return removed | |
return removed | |
def get_file_type(path): | |
mime = magic.Magic(mime=True) | |
file_mime_type = mime.from_file(path) | |
if "pdf" in file_mime_type: | |
return "pdf" | |
elif "msword" in file_mime_type: | |
return "doc" | |
elif "vnd.openxmlformats-officedocument.wordprocessingml.document" in file_mime_type: | |
return "docx" | |
elif "text/plain" in file_mime_type: | |
return "txt" | |
else: | |
print(f"Removing unsupportable file {path} with type {file_mime_type}") | |
return None | |
files = os.listdir(dir_path) | |
for file in files: | |
filecreds = os.path.basename(file).split(".") | |
if len(filecreds) == 1: | |
print(f"Removing unsupported file {file}") | |
os.remove(f"{dir_path}/{file}") | |
continue | |
filename = filecreds[0] | |
filename_type = filecreds[1] | |
full_path = f"{dir_path}/{file}" | |
file_type = get_file_type(full_path) | |
if file_type is not None and file_type != filename_type: | |
new_file_path = f"{dir_path}/{filename}.{file_type}" | |
os.rename(full_path, new_file_path) | |
print(f"Renamed {full_path} to {new_file_path}") | |
print("Done!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment