Created
January 31, 2018 13:08
-
-
Save Yureien/2341179395e06f6c8b27e19ae79fa56d to your computer and use it in GitHub Desktop.
Scraper for - https://anime-frost.com. Usage is in file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
USAGE = """ | |
Scraper for anime-frost.com. | |
Author: Soham Sen (FadedCoder) - http://sohamsen.me | |
Usage: | |
python animefrost_scraper.py <link to anime> | |
It will output a txt file with the Google Drive URLs. | |
Example: | |
python animefrost_scraper.py "https://anime-frost.com/anime/sword-art-online/" | |
Outputs - | |
A file called "sword-art-online.txt" which is like this - | |
----------------------------------------------------------------------------------- | |
#1 - The World of Swords. Link: https://anime-frost.com/anime/sword-art-online/0/1 | |
https://drive.google.com/file/d/0BwBKVy9cKcUcbW5xT0xQUHBQR2c/preview | |
#2 - Beater. Link: https://anime-frost.com/anime/sword-art-online/0/2 | |
https://drive.google.com/file/d/0BwBKVy9cKcUcUE92UURCVWs0dkE/preview | |
... | |
----------------------------------------------------------------------------------- | |
""" | |
import sys | |
import requests | |
import re | |
from bs4 import BeautifulSoup | |
if len(sys.argv) != 2: | |
print(USAGE) | |
exit(0) | |
anime_name = re.findall("\S+anime-frost.com/anime/(\S+)/", sys.argv[1])[0] | |
soup = BeautifulSoup(requests.get(sys.argv[1]).text, 'html.parser') | |
_ep_list = soup.find_all(attrs="episode-row") | |
ep_list = [] | |
get_ep_num = re.compile("(\d+)") | |
for x in _ep_list: | |
num = int(get_ep_num.findall(x.find(attrs={'class': "episode-number"}).text)[0]) | |
title = x.find(attrs={'class': "episode-title"}).text | |
link = x.find("a").get("href") | |
ep_list.append({"ep_num": num, "ep_title": title, "ep_link": link}) | |
full_list = [] | |
get_vid = re.compile("/player\?url=(\S+)&\S+") | |
base_url = "https://anime-frost.com" | |
get_gdrive_url = base_url + "/getplayercontents.php?id=" | |
for x in ep_list: | |
soup = BeautifulSoup(requests.get(x["ep_link"]).text, 'html.parser') | |
player_src = soup.find("iframe").get("src") | |
video_id = get_vid.findall(player_src)[0] | |
gdrive_video_url = requests.get(get_gdrive_url + video_id, headers={ | |
"referer": base_url + player_src}).url | |
x.update({"gdrive_video_url": gdrive_video_url}) | |
print("Got episode #{0} - {1}".format(x['ep_num'], x['ep_title'])) | |
with open(anime_name + ".txt", "w") as f: | |
for x in ep_list: | |
f.write("#{0} - {1}. Link: {2}\n".format(x['ep_num'], x['ep_title'], x['ep_link'])) | |
f.write(x['gdrive_video_url'] + "\n\n") | |
f.flush() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment