Last active
August 29, 2015 14:01
-
-
Save konfiot/34f7add4689c2ce4673f to your computer and use it in GitHub Desktop.
Youtube channels to RSS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2.7 | |
import json | |
import urllib | |
import pafy | |
import pytz | |
import os | |
import hashlib | |
from datetime import datetime | |
from feedgen.feed import FeedGenerator | |
ROOT = "https://82.237.11.61/yt/" | |
RSS_FILENAME = "rss.xml" | |
MAX_ITEMS = 20 | |
QUIET = False | |
chans = json.load(open("chans.json")) | |
urls = [] | |
downloaded = json.load(open("downloads.json")) | |
for chan in chans : | |
feed = json.load(urllib.urlopen("https://gdata.youtube.com/feeds/base/users/" + chan + "/uploads?alt=json")) | |
for vid in feed["feed"]["entry"] : | |
for link in vid["link"] : | |
if link["rel"] == "alternate" and link["type"] == "text/html" : | |
urls.append({'url' : link["href"], 'published' : vid["published"]["$t"], 'updated' : vid["updated"]["$t"], 'id':vid["id"]["$t"]}) | |
last_update = "" | |
urls.sort(key = lambda x : int(datetime.strptime(x["published"], '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%s')), reverse=True) | |
while len(urls) > MAX_ITEMS : urls.pop() | |
for url in urls : | |
videoid = hashlib.md5(url["id"]).hexdigest() | |
filename = videoid + ".mp4" | |
if not videoid in downloaded or not os.path.isfile(filename) : | |
video = pafy.new(url["url"]) | |
best = video.getbest(preftype="mp4") | |
downloaded[videoid] = {'filename': filename, 'title':video.title, 'description':video.description, 'thumb':video.thumb, 'published':url["published"], 'updated':url["updated"], 'id' : videoid, 'link':url["url"], 'author':video.author, 'category':video.category, 'duration': video.duration} | |
print ("Downloading " + video.title) | |
best.download(quiet=QUIET, filepath = filename) | |
print() | |
json.dump(downloaded, open("downloads.json", mode="w")) | |
downloaded_sorted = sorted(downloaded.iteritems(), key=lambda (k,v) :int(datetime.strptime(v["published"], '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%s')), reverse=True) | |
while len(downloaded_sorted) > MAX_ITEMS : | |
to_del = downloaded_sorted.pop() | |
os.remove(downloaded[to_del[0]]["filename"]) | |
del downloaded[to_del[0]] | |
json.dump(downloaded, open("downloads.json", mode="w")) | |
fg = FeedGenerator() | |
fg.title('Youtube Subscriptions') | |
fg.id(ROOT) | |
fg.language('en') | |
fg.updated(datetime.now(tz=pytz.utc)) | |
fg.link({'href':ROOT + RSS_FILENAME, 'rel':'self'}) | |
fg.load_extension('podcast') | |
fg.description("Youtube subscriptions") | |
for download in downloaded : | |
fe = fg.add_entry() | |
fe.id(downloaded[download]["link"]) | |
fe.title(downloaded[download]["title"]) | |
fe.description(downloaded[download]["description"]) | |
fe.published(downloaded[download]["published"]) | |
fe.updated(downloaded[download]["updated"]) | |
fe.link({'href':downloaded[download]["link"], 'rel':'alternate', 'type':'text/html'}) | |
fe.author({'name':downloaded[download]["author"]}) | |
fe.category({'term':downloaded[download]["category"]}) | |
fe.enclosure(ROOT + downloaded[download]["filename"], type = "video/mp4", length = os.path.getsize(downloaded[download]["filename"])) | |
fe.podcast.itunes_image(downloaded[download]["thumb"]) | |
fe.podcast.itunes_duration(downloaded[download]["duration"]) | |
fg.rss_file(RSS_FILENAME) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment