Created
November 26, 2024 11:58
-
-
Save avipars/64b4dfc23f8fbadb713f0fdb8c1076bf to your computer and use it in GitHub Desktop.
Recover ursl from great suspender
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# script to ask user for the txt file name and then clean the URLS and save in new txt file | |
def main(): | |
# ask user for the txt file name | |
file_name = input("Enter the txt file name: ") | |
parse_file(file_name) | |
def parse_file(file_name): | |
with open(file_name, "r") as file: | |
# read the file | |
content = file.read() | |
# split the content by new line | |
content = content.split("\n") | |
suspendedURLS = [] # list to store the suspended URLS | |
regularURLS = [] # list to store the regular URLS | |
for url in content: | |
# check if contains the infamous suspension part | |
if "chrome-extension://" in url: | |
content[content.index(url)] = clean_url(url) # clean url and return it | |
suspendedURLS.append(clean_url(url)) # add the suspended URL to the list | |
else: | |
regularURLS.append(url) | |
save_urls("all_urls.txt",content) # save the all the URLS in new txt file | |
save_urls("regular_urls.txt",regularURLS) # save the regular URLS in new txt file (without the suspended URLS | |
save_urls("cleaned_urls.txt",suspendedURLS) # save the suspended URLS in new txt file | |
remove_duplicates("cleaned_urls",suspendedURLS) # remove the duplicates from the suspended URLS and save them in new txt file | |
# gets a single "link", cleans it then returns it | |
def clean_url(string): | |
# clean the URL | |
return string.split("&uri=")[1] | |
def save_urls(title, content): | |
with open(title, "w") as file: | |
# write the urls to the file | |
for url in content: | |
file.write(url + "\n") | |
# using a set to remove duplicates | |
def remove_duplicates(title, content): | |
# make a set and ten try to add the content to it | |
content = set(content) | |
new_name = str(title + "_no_duplicates.txt") | |
save_urls(new_name,content) # save the | |
return content | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment