appel · May 31, 2025 14:18
diff --git a/pocket-to-bookmark.py b/pocket-to-bookmark.py
 # This script converts Pocket's csv export file to html,
 # allowing you to import it to a wider range of apps (like Grimoire and Linkding).
 # This is a deno to python port, via Gemini.
 # 
 # Note that I did not write this, all credit goes to the original author:
 # https://github.com/enjikaka/pocket-to-bookmark
 #
 # From that repo: "Mozilla is killing Pocket and you get your data export
 # as an CSV file. I'll use Linkding instead and it supports the
 # Netscape Bookmark File Format. This Deno script converts your CSV export
 # to a Netscape Bookmark File Format-compatible HTML-file, that you can import
 # to Linkding or your browser."
 # 
 # Usage:
 # $python3 pocket-to-bookmark.py part_000001.csv pocket-export.html

 import sys
 import csv
 import html # For a more standard HTML escaping, though a custom one is used for fidelity
 import time
 from pathlib import Path

 # --- Helper Functions ---

 def escape_html_custom(text: str) -> str:
    """
    Custom HTML escaping function to match the Deno script's behavior.
    It escapes &, ", <, >.
    """
    return (
        str(text)
        .replace("&", "&amp;")
        .replace('"', "&quot;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
    )

 def generate_bookmark_html(bookmarks: list) -> str:
    """Generates the Netscape bookmark HTML structure."""
    lines = []
    lines.append("<!DOCTYPE NETSCAPE-Bookmark-file-1>")
    lines.append('<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">')
    lines.append("<TITLE>Pocket Export</TITLE>")
    lines.append("<H1>Pocket Export</H1>")
    lines.append("<DL><p>")
    lines.append("  <DT><H3>Pocket</H3>")
    lines.append("  <DL><p>")

    for bm in bookmarks:
        # timeAdded is expected to be a Unix timestamp (seconds)
        # Deno's Math.floor() is equivalent to int() truncation for positive numbers
        add_date = int(bm['timeAdded'])
        escaped_url = escape_html_custom(bm['url'])
        escaped_title = escape_html_custom(bm['title'])
        lines.append(
            f'    <DT><A HREF="{escaped_url}" ADD_DATE="{add_date}">{escaped_title}</A>'
        )

    lines.append("  </DL><p>")
    lines.append("</DL><p>")
    return "\n".join(lines)

 # --- Main Script Logic ---

 def main():
    if len(sys.argv) != 3:
        print(
            "Usage: python pocket_to_bookmarks.py input.csv output.html",
            file=sys.stderr,
        )
        sys.exit(1)

    input_path_str = sys.argv[1]
    output_path_str = sys.argv[2]

    input_path = Path(input_path_str)
    output_path = Path(output_path_str)

    if not input_path.is_file():
        print(f"Error: Input file not found at {input_path}", file=sys.stderr)
        sys.exit(1)

    all_bookmarks = []
    try:
        csv_text = input_path.read_text(encoding='utf-8').strip()
        if not csv_text:
            print("Warning: CSV file is empty or contains only whitespace.", file=sys.stderr)
            # Fall through to generate an empty HTML, similar to Deno script behavior
        
        csv_lines = csv_text.splitlines() # Handles various newline characters

        if not csv_lines:
            html_output = generate_bookmark_html([])
            output_path.write_text(html_output, encoding='utf-8')
            # Match Deno's log message structure, even if count is 0
            print(f"✅ Exporterade 0 unread-bokmärken → {output_path_str}")
            return

        # Deno: const headers = lines[0].split(",");
        # Mimicking the Deno script's potentially naive header split.
        # Pocket CSV headers are usually simple (e.g., "url,title,time_added,tags,status")
        # so this simple split is often okay.
        header_line = csv_lines[0]
        headers = [header.strip() for header in header_line.split(',')]

        # Process data rows
        # The Deno script uses `jsr:@jlarky/csv-parse` per line for fields.
        # Python's `csv.reader` on a single-element list `[line_str]` will parse that one line.
        for line_str in csv_lines[1:]:
            if not line_str.strip():  # Skip empty lines that might be in the CSV
                continue

            # Use csv.reader for the current line to parse its fields
            # This handles commas within quoted fields correctly for the data part.
            parsed_fields_list = list(csv.reader([line_str]))
            fields = parsed_fields_list[0] if parsed_fields_list and parsed_fields_list[0] else []

            entry = {}
            for i, key in enumerate(headers):
                # Deno: fields[i] || ""
                # This ensures a value, defaulting to "" if field is missing or falsy (like null/undefined in JS)
                # Python equivalent:
                entry[key] = fields[i].strip() if i < len(fields) else ""


            # title: entry.title || entry.url
            # In JS, `"" || "url"` results in `"url"`. Python's `or` behaves similarly for strings.
            current_title = entry.get('title', '')
            current_url = entry.get('url', '')
            title = current_title or current_url

            url = current_url

            # timeAdded: parseInt(entry.time_added) || Date.now() / 1000
            # Deno's parseInt(""): NaN; parseInt("abc"): NaN; parseInt("0"): 0
            # In JS: (NaN || default) -> default; (0 || default) -> default
            time_added_str = entry.get('time_added', '')
            try:
                if not time_added_str: # Empty string becomes default time
                    raise ValueError("Empty time_added string")
                
                # Deno's parseInt truncates floats (e.g., parseInt("123.45") is 123)
                val = int(float(time_added_str)) # Handles "123" and "123.45"
                
                if val == 0: # Mimic JS (0 || default_time) behaviour
                    time_added = time.time()
                else:
                    time_added = float(val) # Store as float, like time.time()
            except ValueError:
                time_added = time.time() # Default to current time in seconds (float)

            status = entry.get('status', '')

            all_bookmarks.append({
                'title': title,
                'url': url,
                'timeAdded': time_added, # Stored as float (seconds since epoch)
                'status': status,
            })

    except FileNotFoundError:
        print(f"Error: Input file not found at {input_path_str}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"An error occurred during CSV processing: {e}", file=sys.stderr)
        sys.exit(1)

    # Filter and sort bookmarks
    # Deno: const filteredBookmarks = allBookmarks.filter(b => b.status === "unread")
    #         .sort((a, b) => a.timeAdded - b.timeAdded);
    filtered_bookmarks = [b for b in all_bookmarks if b['status'] == "unread"]
    filtered_bookmarks.sort(key=lambda b: b['timeAdded'])

    html_output = generate_bookmark_html(filtered_bookmarks)

    try:
        output_path.write_text(html_output, encoding='utf-8')
    except Exception as e:
        print(f"Error writing HTML to {output_path_str}: {e}", file=sys.stderr)
        sys.exit(1)

    # Deno: console.log(`✅ Exporterade ${allBookmarks.length} unread-bokmärken → ${outputPath}`);
    # Replicating the Deno script's message, which uses the count of *all parsed* bookmarks
    # rather than the count of *filtered (unread)* bookmarks in the message.
    print(f"✅ Exporterade {len(all_bookmarks)} unread-bokmärken → {output_path_str}")

 if __name__ == "__main__":
    main()
	# This script converts Pocket's csv export file to html,
	# allowing you to import it to a wider range of apps (like Grimoire and Linkding).
	# This is a deno to python port, via Gemini.
	#
	# Note that I did not write this, all credit goes to the original author:
	# https://github.com/enjikaka/pocket-to-bookmark
	#
	# From that repo: "Mozilla is killing Pocket and you get your data export
	# as an CSV file. I'll use Linkding instead and it supports the
	# Netscape Bookmark File Format. This Deno script converts your CSV export
	# to a Netscape Bookmark File Format-compatible HTML-file, that you can import
	# to Linkding or your browser."
	#
	# Usage:
	# $python3 pocket-to-bookmark.py part_000001.csv pocket-export.html

	import sys
	import csv
	import html # For a more standard HTML escaping, though a custom one is used for fidelity
	import time
	from pathlib import Path

	# --- Helper Functions ---

	def escape_html_custom(text: str) -> str:
	"""
	Custom HTML escaping function to match the Deno script's behavior.
	It escapes &, ", <, >.
	"""
	return (
	str(text)
	.replace("&", "&")
	.replace('"', """)
	.replace("<", "<")
	.replace(">", ">")
	)

	def generate_bookmark_html(bookmarks: list) -> str:
	"""Generates the Netscape bookmark HTML structure."""
	lines = []
	lines.append("<!DOCTYPE NETSCAPE-Bookmark-file-1>")
	lines.append('<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">')
	lines.append("<TITLE>Pocket Export</TITLE>")
	lines.append("<H1>Pocket Export</H1>")
	lines.append("<DL><p>")
	lines.append(" <DT><H3>Pocket</H3>")
	lines.append(" <DL><p>")

	for bm in bookmarks:
	# timeAdded is expected to be a Unix timestamp (seconds)
	# Deno's Math.floor() is equivalent to int() truncation for positive numbers
	add_date = int(bm['timeAdded'])
	escaped_url = escape_html_custom(bm['url'])
	escaped_title = escape_html_custom(bm['title'])
	lines.append(
	f' <DT><A HREF="{escaped_url}" ADD_DATE="{add_date}">{escaped_title}</A>'
	)

	lines.append(" </DL><p>")
	lines.append("</DL><p>")
	return "\n".join(lines)

	# --- Main Script Logic ---

	def main():
	if len(sys.argv) != 3:
	print(
	"Usage: python pocket_to_bookmarks.py input.csv output.html",
	file=sys.stderr,
	)
	sys.exit(1)

	input_path_str = sys.argv[1]
	output_path_str = sys.argv[2]

	input_path = Path(input_path_str)
	output_path = Path(output_path_str)

	if not input_path.is_file():
	print(f"Error: Input file not found at {input_path}", file=sys.stderr)
	sys.exit(1)

	all_bookmarks = []
	try:
	csv_text = input_path.read_text(encoding='utf-8').strip()
	if not csv_text:
	print("Warning: CSV file is empty or contains only whitespace.", file=sys.stderr)
	# Fall through to generate an empty HTML, similar to Deno script behavior

	csv_lines = csv_text.splitlines() # Handles various newline characters

	if not csv_lines:
	html_output = generate_bookmark_html([])
	output_path.write_text(html_output, encoding='utf-8')
	# Match Deno's log message structure, even if count is 0
	print(f"✅ Exporterade 0 unread-bokmärken → {output_path_str}")
	return

	# Deno: const headers = lines[0].split(",");
	# Mimicking the Deno script's potentially naive header split.
	# Pocket CSV headers are usually simple (e.g., "url,title,time_added,tags,status")
	# so this simple split is often okay.
	header_line = csv_lines[0]
	headers = [header.strip() for header in header_line.split(',')]

	# Process data rows
	# The Deno script uses `jsr:@jlarky/csv-parse` per line for fields.
	# Python's `csv.reader` on a single-element list `[line_str]` will parse that one line.
	for line_str in csv_lines[1:]:
	if not line_str.strip(): # Skip empty lines that might be in the CSV
	continue

	# Use csv.reader for the current line to parse its fields
	# This handles commas within quoted fields correctly for the data part.
	parsed_fields_list = list(csv.reader([line_str]))
	fields = parsed_fields_list[0] if parsed_fields_list and parsed_fields_list[0] else []

	entry = {}
	for i, key in enumerate(headers):
	# Deno: fields[i] \|\| ""
	# This ensures a value, defaulting to "" if field is missing or falsy (like null/undefined in JS)
	# Python equivalent:
	entry[key] = fields[i].strip() if i < len(fields) else ""


	# title: entry.title \|\| entry.url
	# In JS, `"" \|\| "url"` results in `"url"`. Python's `or` behaves similarly for strings.
	current_title = entry.get('title', '')
	current_url = entry.get('url', '')
	title = current_title or current_url

	url = current_url

	# timeAdded: parseInt(entry.time_added) \|\| Date.now() / 1000
	# Deno's parseInt(""): NaN; parseInt("abc"): NaN; parseInt("0"): 0
	# In JS: (NaN \|\| default) -> default; (0 \|\| default) -> default
	time_added_str = entry.get('time_added', '')
	try:
	if not time_added_str: # Empty string becomes default time
	raise ValueError("Empty time_added string")

	# Deno's parseInt truncates floats (e.g., parseInt("123.45") is 123)
	val = int(float(time_added_str)) # Handles "123" and "123.45"

	if val == 0: # Mimic JS (0 \|\| default_time) behaviour
	time_added = time.time()
	else:
	time_added = float(val) # Store as float, like time.time()
	except ValueError:
	time_added = time.time() # Default to current time in seconds (float)

	status = entry.get('status', '')

	all_bookmarks.append({
	'title': title,
	'url': url,
	'timeAdded': time_added, # Stored as float (seconds since epoch)
	'status': status,
	})

	except FileNotFoundError:
	print(f"Error: Input file not found at {input_path_str}", file=sys.stderr)
	sys.exit(1)
	except Exception as e:
	print(f"An error occurred during CSV processing: {e}", file=sys.stderr)
	sys.exit(1)

	# Filter and sort bookmarks
	# Deno: const filteredBookmarks = allBookmarks.filter(b => b.status === "unread")
	# .sort((a, b) => a.timeAdded - b.timeAdded);
	filtered_bookmarks = [b for b in all_bookmarks if b['status'] == "unread"]
	filtered_bookmarks.sort(key=lambda b: b['timeAdded'])

	html_output = generate_bookmark_html(filtered_bookmarks)

	try:
	output_path.write_text(html_output, encoding='utf-8')
	except Exception as e:
	print(f"Error writing HTML to {output_path_str}: {e}", file=sys.stderr)
	sys.exit(1)

	# Deno: console.log(`✅ Exporterade ${allBookmarks.length} unread-bokmärken → ${outputPath}`);
	# Replicating the Deno script's message, which uses the count of all parsed bookmarks
	# rather than the count of filtered (unread) bookmarks in the message.
	print(f"✅ Exporterade {len(all_bookmarks)} unread-bokmärken → {output_path_str}")

	if __name__ == "__main__":
	main()