Created
January 27, 2020 17:32
-
-
Save FloPinguin/b2b2a2efa160f7c3f7748e9cdd12efff to your computer and use it in GitHub Desktop.
In a directory structure full of .info.json files (Generated by youtube-dl), get the comments of youtube videos and save them as .comments files (https://github.com/philbot9/youtube-comment-scraper-cli must be installed)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import json | |
import subprocess | |
import datetime | |
rootdir = 'U:/Hoard' | |
def log(msg): | |
sys.stdout.buffer.write((msg + "\n").encode('utf8')) | |
for subdir, dirs, files in os.walk(rootdir): | |
for file in files: | |
if ".info.json" in file: | |
comment_path = os.path.join(subdir, file.replace(".info.json", ".comments")) | |
if not os.path.isfile(comment_path) or os.path.getsize(comment_path) == 0: | |
f = open(os.path.join(subdir, file), "r") | |
j = json.loads(f.read()) | |
if j['extractor'] == "youtube": | |
if (datetime.datetime.today() - datetime.datetime.strptime(j['upload_date'], '%Y%m%d')).days > 3: | |
log(comment_path + " downloading...") | |
subprocess.run("youtube-comment-scraper --outputFile \"" + comment_path + "\" -- " + j['id'], shell=True) | |
else: | |
log(comment_path + " video is too new") | |
else: | |
log(comment_path + " extractor is not youtube") | |
else: | |
log(comment_path + " already exists") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment