Skip to content

Instantly share code, notes, and snippets.

@appel
Last active May 31, 2025 14:18
Show Gist options
  • Save appel/a6accfab384f80cb12c9c20a1075e942 to your computer and use it in GitHub Desktop.
Save appel/a6accfab384f80cb12c9c20a1075e942 to your computer and use it in GitHub Desktop.
Python script which converts Pocket's csv export file to html.
# This script converts Pocket's csv export file to html,
# allowing you to import it to a wider range of apps (like Grimoire and Linkding).
# This is a deno to python port, via Gemini.
#
# Note that I did not write this, all credit goes to the original author:
# https://github.com/enjikaka/pocket-to-bookmark
#
# From that repo: "Mozilla is killing Pocket and you get your data export
# as an CSV file. I'll use Linkding instead and it supports the
# Netscape Bookmark File Format. This Deno script converts your CSV export
# to a Netscape Bookmark File Format-compatible HTML-file, that you can import
# to Linkding or your browser."
#
# Usage:
# $python3 pocket-to-bookmark.py part_000001.csv pocket-export.html
import sys
import csv
import html # For a more standard HTML escaping, though a custom one is used for fidelity
import time
from pathlib import Path
# --- Helper Functions ---
def escape_html_custom(text: str) -> str:
"""
Custom HTML escaping function to match the Deno script's behavior.
It escapes &, ", <, >.
"""
return (
str(text)
.replace("&", "&amp;")
.replace('"', "&quot;")
.replace("<", "&lt;")
.replace(">", "&gt;")
)
def generate_bookmark_html(bookmarks: list) -> str:
"""Generates the Netscape bookmark HTML structure."""
lines = []
lines.append("<!DOCTYPE NETSCAPE-Bookmark-file-1>")
lines.append('<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">')
lines.append("<TITLE>Pocket Export</TITLE>")
lines.append("<H1>Pocket Export</H1>")
lines.append("<DL><p>")
lines.append(" <DT><H3>Pocket</H3>")
lines.append(" <DL><p>")
for bm in bookmarks:
# timeAdded is expected to be a Unix timestamp (seconds)
# Deno's Math.floor() is equivalent to int() truncation for positive numbers
add_date = int(bm['timeAdded'])
escaped_url = escape_html_custom(bm['url'])
escaped_title = escape_html_custom(bm['title'])
lines.append(
f' <DT><A HREF="{escaped_url}" ADD_DATE="{add_date}">{escaped_title}</A>'
)
lines.append(" </DL><p>")
lines.append("</DL><p>")
return "\n".join(lines)
# --- Main Script Logic ---
def main():
if len(sys.argv) != 3:
print(
"Usage: python pocket_to_bookmarks.py input.csv output.html",
file=sys.stderr,
)
sys.exit(1)
input_path_str = sys.argv[1]
output_path_str = sys.argv[2]
input_path = Path(input_path_str)
output_path = Path(output_path_str)
if not input_path.is_file():
print(f"Error: Input file not found at {input_path}", file=sys.stderr)
sys.exit(1)
all_bookmarks = []
try:
csv_text = input_path.read_text(encoding='utf-8').strip()
if not csv_text:
print("Warning: CSV file is empty or contains only whitespace.", file=sys.stderr)
# Fall through to generate an empty HTML, similar to Deno script behavior
csv_lines = csv_text.splitlines() # Handles various newline characters
if not csv_lines:
html_output = generate_bookmark_html([])
output_path.write_text(html_output, encoding='utf-8')
# Match Deno's log message structure, even if count is 0
print(f"✅ Exporterade 0 unread-bokmärken → {output_path_str}")
return
# Deno: const headers = lines[0].split(",");
# Mimicking the Deno script's potentially naive header split.
# Pocket CSV headers are usually simple (e.g., "url,title,time_added,tags,status")
# so this simple split is often okay.
header_line = csv_lines[0]
headers = [header.strip() for header in header_line.split(',')]
# Process data rows
# The Deno script uses `jsr:@jlarky/csv-parse` per line for fields.
# Python's `csv.reader` on a single-element list `[line_str]` will parse that one line.
for line_str in csv_lines[1:]:
if not line_str.strip(): # Skip empty lines that might be in the CSV
continue
# Use csv.reader for the current line to parse its fields
# This handles commas within quoted fields correctly for the data part.
parsed_fields_list = list(csv.reader([line_str]))
fields = parsed_fields_list[0] if parsed_fields_list and parsed_fields_list[0] else []
entry = {}
for i, key in enumerate(headers):
# Deno: fields[i] || ""
# This ensures a value, defaulting to "" if field is missing or falsy (like null/undefined in JS)
# Python equivalent:
entry[key] = fields[i].strip() if i < len(fields) else ""
# title: entry.title || entry.url
# In JS, `"" || "url"` results in `"url"`. Python's `or` behaves similarly for strings.
current_title = entry.get('title', '')
current_url = entry.get('url', '')
title = current_title or current_url
url = current_url
# timeAdded: parseInt(entry.time_added) || Date.now() / 1000
# Deno's parseInt(""): NaN; parseInt("abc"): NaN; parseInt("0"): 0
# In JS: (NaN || default) -> default; (0 || default) -> default
time_added_str = entry.get('time_added', '')
try:
if not time_added_str: # Empty string becomes default time
raise ValueError("Empty time_added string")
# Deno's parseInt truncates floats (e.g., parseInt("123.45") is 123)
val = int(float(time_added_str)) # Handles "123" and "123.45"
if val == 0: # Mimic JS (0 || default_time) behaviour
time_added = time.time()
else:
time_added = float(val) # Store as float, like time.time()
except ValueError:
time_added = time.time() # Default to current time in seconds (float)
status = entry.get('status', '')
all_bookmarks.append({
'title': title,
'url': url,
'timeAdded': time_added, # Stored as float (seconds since epoch)
'status': status,
})
except FileNotFoundError:
print(f"Error: Input file not found at {input_path_str}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"An error occurred during CSV processing: {e}", file=sys.stderr)
sys.exit(1)
# Filter and sort bookmarks
# Deno: const filteredBookmarks = allBookmarks.filter(b => b.status === "unread")
# .sort((a, b) => a.timeAdded - b.timeAdded);
filtered_bookmarks = [b for b in all_bookmarks if b['status'] == "unread"]
filtered_bookmarks.sort(key=lambda b: b['timeAdded'])
html_output = generate_bookmark_html(filtered_bookmarks)
try:
output_path.write_text(html_output, encoding='utf-8')
except Exception as e:
print(f"Error writing HTML to {output_path_str}: {e}", file=sys.stderr)
sys.exit(1)
# Deno: console.log(`✅ Exporterade ${allBookmarks.length} unread-bokmärken → ${outputPath}`);
# Replicating the Deno script's message, which uses the count of *all parsed* bookmarks
# rather than the count of *filtered (unread)* bookmarks in the message.
print(f"✅ Exporterade {len(all_bookmarks)} unread-bokmärken → {output_path_str}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment