-
Star
(148)
You must be signed in to star a gist -
Fork
(37)
You must be signed in to fork a gist
-
-
Save alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd to your computer and use it in GitHub Desktop.
import requests | |
import base64 | |
from tqdm import tqdm | |
master_json_url = 'https://178skyfiregce-a.akamaihd.net/exp=1474107106~acl=%2F142089577%2F%2A~hmac=0d9becc441fc5385462d53bf59cf019c0184690862f49b414e9a2f1c5bafbe0d/142089577/video/426274424,426274425,426274423,426274422/master.json?base64_init=1' | |
base_url = master_json_url[:master_json_url.rfind('/', 0, -26) + 1] | |
resp = requests.get(master_json_url) | |
content = resp.json() | |
heights = [(i, d['height']) for (i, d) in enumerate(content['video'])] | |
idx, _ = max(heights, key=lambda (_, h): h) | |
video = content['video'][idx] | |
video_base_url = base_url + video['base_url'] | |
print 'base url:', video_base_url | |
filename = 'video_%d.mp4' % video['id'] | |
print 'saving to %s' % filename | |
video_file = open(filename, 'wb') | |
init_segment = base64.b64decode(video['init_segment']) | |
video_file.write(init_segment) | |
for segment in tqdm(video['segments']): | |
segment_url = video_base_url + segment['url'] | |
resp = requests.get(segment_url, stream=True) | |
if resp.status_code != 200: | |
print 'not 200!' | |
print resp | |
print segment_url | |
break | |
for chunk in resp: | |
video_file.write(chunk) | |
video_file.flush() | |
video_file.close() |
If it gives problems try to remove the moviepy import.
I think I'm not actually using it anywhere
moviepy.editor suppression and simplified importation
Before v2.0, it was advised to import from moviepy.editor whenever you needed to do some sort of manual operations, such as previewing or hand editing, because the editor package handled a lot of magic and initialization, making your life easier, at the cost of initializing some complex modules like pygame.
With version 2.0, the moviepy.editor namespace simply no longer exists. You simply import everything from moviepy like this:
from moviepy import * # Simple and nice, the all is set in moviepy so only useful things will be loaded
from moviepy import VideoFileClip # You can also import only the things you really need
https://zulko.github.io/moviepy/getting_started/updating_to_v2.html
Here is new version, which support ffpmeg, youtube-dl, yt-dlp, moviepy 2.0, videos without audio, automatic dependencies installation and multithreading (thx to @Javi3rV ). I have not tested all conditional flows, but it should work. If you find bug or something, i can fix it later.
import importlib.metadata
import subprocess
import sys
required = {'requests', 'tqdm', 'moviepy'}
installed = {pkg.metadata['Name'] for pkg in importlib.metadata.distributions()}
missing = required - installed
if missing:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', 'pip'])
subprocess.check_call([sys.executable, '-m', 'pip', 'install', *missing])
import os
import base64
import requests
from shutil import which
from tqdm import tqdm
from random import choice
from string import ascii_lowercase
from concurrent.futures import ThreadPoolExecutor
has_ffmpeg = False
moviepy_deprecated = False
has_youtube_dl = False
has_yt_dlp = False
if which('ffmpeg') is not None:
has_ffmpeg = True
if which('youtube-dl') is not None:
has_youtube_dl = True
if which('yt-dlp') is not None:
has_yt_dlp = True
if not has_ffmpeg:
try:
from moviepy.editor import * # before 2.0, deprecated
moviepy_deprecated = True
except ImportError:
from moviepy import * # after 2.0
url = input('enter [master|playlist].json url: ')
name = input('enter output name: ')
if 'master.json' in url:
url = url[:url.find('?')] + '?query_string_ranges=1'
url = url.replace('master.json', 'master.mpd')
print(url)
if has_youtube_dl:
subprocess.run(['youtube-dl', url, '-o', name])
sys.exit(0)
if has_yt_dlp:
subprocess.run(['yt-dlp', url, '-o', name])
sys.exit(0)
print('you should have youtube-dl or yt-dlp in your PATH to download master.json like links')
sys.exit(1)
def download_segment(segment_url, segment_path):
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print('not 200!')
print(segment_url)
return
with open(segment_path, 'wb') as segment_file:
for chunk in resp:
segment_file.write(chunk)
def download(what, to, base):
print('saving', what['mime_type'], 'to', to)
init_segment = base64.b64decode(what['init_segment'])
# suffix for support multiple downloads in same folder
segment_suffix = ''.join(choice(ascii_lowercase) for i in range(20)) + '_'
segment_urls = [base + segment['url'] for segment in what['segments']]
segment_paths = [f"segment_{i}_" + segment_suffix + ".tmp" for i in range(len(segment_urls))]
with ThreadPoolExecutor(max_workers=15) as executor:
list(tqdm(executor.map(download_segment, segment_urls, segment_paths), total=len(segment_urls)))
with open(to, 'wb') as file:
file.write(init_segment)
for segment_path in segment_paths:
with open(segment_path, 'rb') as segment_file:
file.write(segment_file.read())
os.remove(segment_path)
print('done')
name += '.mp4'
base_url = url[:url.rfind('/', 0, -26) + 1]
response = requests.get(url)
if response.status_code >= 400:
print('error: cant get url content, test your link in browser, code=', response.status_code, '\ncontent:\n', response.content)
sys.exit(1)
content = response.json()
vid_heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
vid_idx, _ = max(vid_heights, key=lambda _h: _h[1])
audio_present = True
if not content['audio']:
audio_present = False
audio_quality = None
audio_idx = None
if audio_present:
audio_quality = [(i, d['bitrate']) for (i, d) in enumerate(content['audio'])]
audio_idx, _ = max(audio_quality, key=lambda _h: _h[1])
base_url = base_url + content['base_url']
# prefix for support multiple downloads in same folder
files_prefix = ''.join(choice(ascii_lowercase) for i in range(20)) + '_'
video_tmp_file = files_prefix + 'video.mp4'
video = content['video'][vid_idx]
download(video, video_tmp_file, base_url + video['base_url'])
audio_tmp_file = None
if audio_present:
audio_tmp_file = files_prefix + 'audio.mp4'
audio = content['audio'][audio_idx]
download(audio, audio_tmp_file, base_url + audio['base_url'])
if not audio_present:
os.rename(video_tmp_file, name)
sys.exit(0)
if has_ffmpeg:
subprocess.run(['ffmpeg', '-i', video_tmp_file, '-i', audio_tmp_file, '-c:v', 'copy', '-c:a', 'copy', name])
os.remove(video_tmp_file)
os.remove(audio_tmp_file)
sys.exit(0)
video_clip = VideoFileClip(video_tmp_file)
audio_clip = AudioFileClip(audio_tmp_file)
final_clip = None
if moviepy_deprecated:
final_clip = video_clip.set_audio(audio_clip)
else:
final_clip = video_clip.with_audio(audio_clip)
final_clip.write_videofile(name)
os.remove(video_tmp_file)
os.remove(audio_tmp_file)
@kbabanov This worked like a charm. Thank you so much
Here is new version, which support ffpmeg, youtube-dl, yt-dlp, moviepy 2.0, videos without audio, automatic dependencies installation and multithreading (thx to @Javi3rV ). I have not tested all conditional flows, but it should work. If you find bug or something, i can fix it later.
I just updated the container image at https://github.com/davidecavestro/vimeo-dl
Is there any video URL that does not expire, so that I can add an automated test to the build?
EDIT: I could also use it to trigger deps installation at build time
EDIT2: I leveraged automatic deps installation passing a fake url
I didn't try this, but maybe downloading the json file and using it to test?
We know the json url changes but I'm not sure about the json contents
Edit: as an idea, in my personal script I added the possibility to add more than 1 url in a list of dataclasses (url, outputName). Then it just iterates the list and it downloads them one by one.
I didnt share it because it was just a personal preference, but it can be done in @kbabanov 's script easily aswell. I also thought about using multithreading and download all of them like there is no tomorrow but vimeo would take a look at net traffic and would suspect something lol.
@Javi3rV :
I also thought about using multithreading and download all of them like there is no tomorrow but vimeo would take a look at net traffic and would suspect something
if so, you can tweak number of workers in line
with ThreadPoolExecutor(max_workers=15) as executor:
and, if wanted, set it to 1 for disable multithreading at all
I also thought about ability to download multiple urls and may be i came with solution a bit later
I find it useful to have a way to avoid asking for user input, so that the whole thing can be easily scripted.
It's often just a matter of supporting env vars such as
url = url = os.getenv("SRC_URL") or input('enter [master|playlist].json url: ')
name = os.getenv("OUT_FILE") or input('enter output name: ')
max_workers = min(int(os.getenv("MAX_WORKERS", 5)), 15)
or/and if you prefer the launch args could be parsed.
Anyway IMHO multithreading is a different matter: as too many simultaneous requests from the same IP are a PITA, I consider a simple loop safer.
I'll check for using json contents for tests.
I installed all the packages (requests, tqdm, moviepy) and saved the code of the script to vimeo_script.py. When I try to run it with
python vimeo_script.py
, I get the following Error:I deinstalled moviepy and installed it again, but no change. I also tested it in a virtual environment, no change. Can you help me with that? Thanks in advance!