Created
February 16, 2018 11:10
-
-
Save dvrensk/a60e2c91cc863c7577071c758f4ba620 to your computer and use it in GitHub Desktop.
Script to download all screencasts from ElixirSips using the RSS feed. Should work for other podcast feeds too.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env ruby | |
require "nokogiri" | |
require "date" | |
require "fileutils" | |
require "pry" | |
class GetAll | |
def self.run | |
check_upass | |
fetch_all | |
end | |
UPASS = ENV["UPASS"] | |
def self.check_upass | |
raise "Define [email protected]:p4ssw0rd in the environment" unless UPASS.to_s.size > 0 | |
end | |
def self.fetch_all | |
xml = Nokogiri.XML(File.open("feed.xml")) | |
xml.xpath("/rss/channel/item").each do |node| | |
new(node).fetch | |
end | |
end | |
def initialize(node) | |
@node = node | |
end | |
attr_reader :node | |
def fetch | |
FileUtils.mkdir_p dir | |
Dir.chdir dir do | |
File.write("episode.html", description) | |
if enclosure | |
fetch_per_enclosure | |
else | |
puts "No enclosure for #{dir}, looking at text content…" | |
fetch_from_links | |
end | |
end | |
end | |
def fetch_per_enclosure | |
url = enclosure["url"] | |
size = enclosure["length"].to_i | |
name = File.basename(url) | |
if File.exists?(name) && File.size(name) == size | |
# puts "#{dir} has mp4, skipping" | |
else | |
puts "Fetching #{name} to #{dir}:" | |
system "curl", "-u", UPASS, "-kOL", url | |
end | |
end | |
def fetch_from_links | |
html = Nokogiri.HTML(description) | |
first, *more = html.xpath("//ul/li/a").select {|n| n.text[/\.mp4$/] } | |
if first.nil? | |
puts "Nothing in content either; skipping" | |
return | |
elsif more.any? | |
puts "*** Found #{1 + more.size} mp4s; don't know what to do!" | |
return | |
else | |
name = first.text | |
if File.exists?(name) and false | |
puts "#{dir} has mp4 (complete or incomplete), skipping" | |
else | |
file_id = first["href"][/file_id=(\d+)/, 1] | |
url = "https://elixirsips.dpdcart.com/subscriber/download?file_id=#{file_id}" | |
puts "Fetching #{name} from #{url} to #{dir}:" | |
puts ">>> No, not actually since it never seems to work" | |
# system "curl", "-u", UPASS, "-kLo", name, url | |
end | |
end | |
end | |
def date | |
DateTime.parse(node.at("pubDate").text) | |
end | |
def clean_name | |
node.at("title").text.downcase.gsub(/[^-a-z0-9.]+/, "_") | |
end | |
def dir | |
[date.strftime("%Y-%m-%d"), clean_name].join("-") | |
end | |
def description | |
node.at("description").text | |
end | |
def enclosure | |
node.at("enclosure") | |
end | |
end | |
GetAll.run if $0 == __FILE__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment