Created
May 16, 2024 21:13
-
-
Save MaraScott/0c008760a38c66e4d0c719389c5926ab to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import pandas as pd | |
# Step 1: Parse the favorites_5_16_24.html file | |
with open('favorites_5_16_24.html', 'r', encoding='utf-8') as file: | |
soup = BeautifulSoup(file, 'html.parser') | |
# Extract all links | |
links = soup.find_all('a') | |
bookmarks = [{'name': link.get_text(), 'url': link['href']} for link in links] | |
# Save bookmarks to CSV | |
bookmarks_df = pd.DataFrame(bookmarks) | |
bookmarks_df.to_csv('favorites_5_16_24.csv', index=False) | |
# Assume you update the CSV manually or programmatically, then read back the updated CSV | |
updated_bookmarks_df = pd.read_csv('favorites_5_16_24_normalized.csv') | |
# Convert updated DataFrame to a list of dictionaries | |
updated_bookmarks = updated_bookmarks_df.to_dict(orient='records') | |
# Create a mapping of old URLs to new URLs (assuming URLs are unique) | |
url_mapping = {bookmark['url']: updated_bookmark['url'] for bookmark, updated_bookmark in zip(bookmarks, updated_bookmarks)} | |
# Update the HTML with new URLs | |
for link in soup.find_all('a'): | |
old_url = link['href'] | |
if old_url in url_mapping: | |
link['href'] = url_mapping[old_url] | |
# Create a mapping of old URLs to new names (assuming URLs are unique) | |
name_mapping = {bookmark['url']: updated_bookmark['name'] for bookmark, updated_bookmark in zip(bookmarks, updated_bookmarks)} | |
# Update the HTML with new names | |
for link in soup.find_all('a'): | |
old_url = link['href'] | |
if old_url in name_mapping: | |
link.string = name_mapping[old_url] | |
# Save the updated HTML back to file | |
with open('favorites_5_16_24_updated.html', 'w', encoding='utf-8') as file: | |
file.write(str(soup.prettify(formatter=None))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment