Yureien · January 31, 2018 13:08
diff --git a/animefrost_scraper.py b/animefrost_scraper.py
 USAGE = """
 Scraper for anime-frost.com.
 Author: Soham Sen (FadedCoder) - http://sohamsen.me

 Usage:
    python animefrost_scraper.py <link to anime>

 It will output a txt file with the Google Drive URLs.

 Example:
    python animefrost_scraper.py "https://anime-frost.com/anime/sword-art-online/"
 Outputs -
    A file called "sword-art-online.txt" which is like this -
    -----------------------------------------------------------------------------------
    #1 - The World of Swords. Link: https://anime-frost.com/anime/sword-art-online/0/1
    https://drive.google.com/file/d/0BwBKVy9cKcUcbW5xT0xQUHBQR2c/preview

    #2 - Beater. Link: https://anime-frost.com/anime/sword-art-online/0/2
    https://drive.google.com/file/d/0BwBKVy9cKcUcUE92UURCVWs0dkE/preview
    ...
    -----------------------------------------------------------------------------------
 """


 import sys
 import requests
 import re
 from bs4 import BeautifulSoup


 if len(sys.argv) != 2:
    print(USAGE)
    exit(0)

 anime_name = re.findall("\S+anime-frost.com/anime/(\S+)/", sys.argv[1])[0]
 soup = BeautifulSoup(requests.get(sys.argv[1]).text, 'html.parser')
 _ep_list = soup.find_all(attrs="episode-row")
 ep_list = []
 get_ep_num = re.compile("(\d+)")
 for x in _ep_list:
    num = int(get_ep_num.findall(x.find(attrs={'class': "episode-number"}).text)[0])
    title = x.find(attrs={'class': "episode-title"}).text
    link = x.find("a").get("href")
    ep_list.append({"ep_num": num, "ep_title": title, "ep_link": link})

 full_list = []
 get_vid = re.compile("/player\?url=(\S+)&\S+")
 base_url = "https://anime-frost.com"
 get_gdrive_url = base_url + "/getplayercontents.php?id="
 for x in ep_list:
    soup = BeautifulSoup(requests.get(x["ep_link"]).text, 'html.parser')
    player_src = soup.find("iframe").get("src")
    video_id = get_vid.findall(player_src)[0]
    gdrive_video_url = requests.get(get_gdrive_url + video_id, headers={
        "referer": base_url + player_src}).url
    x.update({"gdrive_video_url": gdrive_video_url})
    print("Got episode #{0} - {1}".format(x['ep_num'], x['ep_title']))

 with open(anime_name + ".txt", "w") as f:
    for x in ep_list:
        f.write("#{0} - {1}. Link: {2}\n".format(x['ep_num'], x['ep_title'], x['ep_link']))
        f.write(x['gdrive_video_url'] + "\n\n")
    f.flush()
	USAGE = """
	Scraper for anime-frost.com.
	Author: Soham Sen (FadedCoder) - http://sohamsen.me

	Usage:
	python animefrost_scraper.py <link to anime>

	It will output a txt file with the Google Drive URLs.

	Example:
	python animefrost_scraper.py "https://anime-frost.com/anime/sword-art-online/"
	Outputs -
	A file called "sword-art-online.txt" which is like this -
	-----------------------------------------------------------------------------------
	#1 - The World of Swords. Link: https://anime-frost.com/anime/sword-art-online/0/1
	https://drive.google.com/file/d/0BwBKVy9cKcUcbW5xT0xQUHBQR2c/preview

	#2 - Beater. Link: https://anime-frost.com/anime/sword-art-online/0/2
	https://drive.google.com/file/d/0BwBKVy9cKcUcUE92UURCVWs0dkE/preview
	...
	-----------------------------------------------------------------------------------
	"""


	import sys
	import requests
	import re
	from bs4 import BeautifulSoup


	if len(sys.argv) != 2:
	print(USAGE)
	exit(0)

	anime_name = re.findall("\S+anime-frost.com/anime/(\S+)/", sys.argv[1])[0]
	soup = BeautifulSoup(requests.get(sys.argv[1]).text, 'html.parser')
	_ep_list = soup.find_all(attrs="episode-row")
	ep_list = []
	get_ep_num = re.compile("(\d+)")
	for x in _ep_list:
	num = int(get_ep_num.findall(x.find(attrs={'class': "episode-number"}).text)[0])
	title = x.find(attrs={'class': "episode-title"}).text
	link = x.find("a").get("href")
	ep_list.append({"ep_num": num, "ep_title": title, "ep_link": link})

	full_list = []
	get_vid = re.compile("/player\?url=(\S+)&\S+")
	base_url = "https://anime-frost.com"
	get_gdrive_url = base_url + "/getplayercontents.php?id="
	for x in ep_list:
	soup = BeautifulSoup(requests.get(x["ep_link"]).text, 'html.parser')
	player_src = soup.find("iframe").get("src")
	video_id = get_vid.findall(player_src)[0]
	gdrive_video_url = requests.get(get_gdrive_url + video_id, headers={
	"referer": base_url + player_src}).url
	x.update({"gdrive_video_url": gdrive_video_url})
	print("Got episode #{0} - {1}".format(x['ep_num'], x['ep_title']))

	with open(anime_name + ".txt", "w") as f:
	for x in ep_list:
	f.write("#{0} - {1}. Link: {2}\n".format(x['ep_num'], x['ep_title'], x['ep_link']))
	f.write(x['gdrive_video_url'] + "\n\n")
	f.flush()