-
-
Save tinyapps/df2b6757a142ff93caf9c63d0ef38b11 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3 | |
""" | |
Forked and modified from pukkandan/ytdlp_nest_comments.py: | |
https://gist.github.com/pukkandan/ee737fec64822f2552caf3ca4cbf5db7 | |
which included this license and copyright information: | |
"SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT | |
Copyright © 2021 [email protected]" | |
Convert YouTube comments from an info.json file (acquired via | |
yt-dlp --write-comments) to HTML. | |
""" | |
import os.path | |
import json | |
import argparse | |
import logging | |
from datetime import datetime | |
import html | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') | |
def get_fields(dct): | |
for name, fn in FIELDS.items(): | |
val = fn(dct, name) | |
if val is not None: | |
yield name, val | |
def filter_func(comments): | |
return [dict(get_fields(c)) for c in comments] | |
FIELDS = { | |
'text': dict.get, | |
'author': dict.get, | |
'timestamp': lambda dct, name: dct.get(name) and datetime.strftime( | |
datetime.utcfromtimestamp(dct.get(name)), '%Y/%m/%d'), | |
# Add more fields here | |
'replies': lambda dct, name: filter_func(dct.get(name, [])) or None | |
} | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'--input-file', '-i', | |
dest='inputfile', metavar='FILE', required=True, | |
help='File to read video metadata from (info.json)') | |
parser.add_argument( | |
'--output-file', '-o', | |
dest='outputfile', metavar='FILE', required=True, | |
help='File to write comments to (html)') | |
args = parser.parse_args() | |
ext = os.path.splitext(args.outputfile)[1][1:] | |
if ext != 'html': | |
raise SystemExit(f'ERROR: Only html format is supported, not {ext}') | |
logging.info('Reading file') | |
try: | |
with open(args.inputfile, encoding='utf-8') as f: | |
info_dict = json.load(f) | |
except FileNotFoundError: | |
logging.error(f'File {args.inputfile} not found') | |
raise | |
except json.JSONDecodeError: | |
logging.error(f'Error decoding JSON from file {args.inputfile}') | |
raise | |
comment_data = {c['id']: c for c in sorted( | |
info_dict['comments'], key=lambda c: c.get('timestamp') or 0)} | |
count = len(info_dict['comments']) | |
nested_comments = [] | |
for i, (cid, c) in enumerate(comment_data.items(), 1): | |
logging.info(f'Processing comment {i}/{count}') | |
parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', []) | |
parent.append(c) | |
nested_comments = filter_func(nested_comments) | |
logging.info('Converting to html') | |
def wrap_html(data, top_level=True): | |
html_content = '<ul>' | |
for comment in data: | |
author = html.escape(comment.get("author", "Anonymous")) | |
text = html.escape(comment["text"]).replace('\n', '<br>') # Convert newlines to <br> | |
timestamp = html.escape(comment.get("timestamp", "")) | |
html_content += f'<li><div class="comment-box">' | |
html_content += f'<p><strong>{author}:</strong> <div class="comment-text">{text}</div></p>' # Wrap text in div with a class for styling | |
if timestamp: | |
html_content += f'<p><small>{timestamp}</small></p>' | |
if 'replies' in comment and comment['replies']: | |
html_content += wrap_html(comment['replies'], top_level=False) | |
html_content += '</div></li>' | |
html_content += '</ul>' | |
if top_level: | |
style = ''' | |
<style> | |
.comment-box { | |
border: 1px solid #ccc; | |
padding: 10px; | |
} | |
.comments ul { | |
list-style-type: none; | |
padding-left: 20px; | |
} | |
.comment-text { | |
white-space: pre-wrap; /* Preserve whitespace and line breaks */ | |
} | |
@media (prefers-color-scheme: dark) { | |
body { | |
background-color: #121212; | |
color: #e0e0e0; | |
} | |
.comment-box { | |
border-color: #444; | |
} | |
} | |
</style> | |
''' | |
meta = '<meta charset="UTF-8">' | |
return f'{meta}{style}<div class="comments">{html_content}</div>' | |
return html_content | |
out = wrap_html(nested_comments) | |
logging.info('Writing file') | |
try: | |
with open(args.outputfile, 'w', encoding='utf-8') as f: | |
f.write(out) | |
logging.info('Done') | |
except IOError as e: | |
logging.error(f'Error writing to file {args.outputfile}: {e}') | |
raise |
Thanks for testing it @m3jorri, as well as for your kind feedback in @pukkandan's original gist.
Thanks for your efforts and quick responses! If I may request a feature, would it be possible to add a dark mode toggle switch to the html output file?
My pleasure, @m3jorri. While it's not a toggle switch, adding these few lines just above the closing style
tag will display the HTML in dark mode-friendly format if the user environment is set to dark mode:
@media (prefers-color-scheme: dark) {
body {
background-color: #121212;
color: #e0e0e0;
}
.comment-box {
border-color: #444;
}
}
I've updated the gist as well. Thanks again for testing and offering feedback.
That works, thanks
Hello, @tinyapps
Thank you for your work, on this script!
(and to pukkandan too!)
Please if you have time and strength could you implement this:
- Would it be possible to integrate the comment's up/down votes too?
- To write comments in descending order based on upvotes (highest first)?
I wish I cold help but have no Idea.
Thanks again:
Hetsz.
To write comments in descending order based on upvotes (highest first)?
This is not necessary because it can be decided when the comments are downloaded with yt-dlp by specifying "youtube:comment_sort=top"
The up/down votes would be cool af to have.
Also idk how hard this is but if i was able to click a parent comment and only then show replies this would be the perfect script imo. This way replies would be hidden by default.
I wish i had the skills to make it happen.
Great script!
Seems to work perfectly!