Created
May 31, 2018 03:23
-
-
Save jarhill0/6d575329b67e8e8a6c32bdb573b89915 to your computer and use it in GitHub Desktop.
Use regex to parse HTML and make a Bandcamp embed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Use regex to parse HTML and make a Bandcamp embed.""" | |
import re | |
import requests | |
TEMPLATE = """ | |
<iframe style="border: 0; width: 276px; height: 390px;" | |
src="https://bandcamp.com/EmbeddedPlayer/track={track_number}/size=large/bgcol=ffffff/linkcol=17a2b8/tracklist=false/" | |
seamless> | |
<a href="{track_url}">{track_name}</a> | |
</iframe> | |
""" | |
def make_embed(url): | |
try: | |
response = requests.get(url) | |
except requests.exceptions.RequestException: | |
return | |
page_content = response.text | |
track_name = get_track_name(page_content) | |
track_number = get_track_number(page_content) | |
track_url = get_track_url(page_content) | |
if not (track_name and track_number and track_url): | |
return | |
return TEMPLATE.format(track_name=track_name, track_number=track_number, track_url=track_url) | |
def get_track_name(html): | |
result = re.search(r'<meta\s+name="title"\s*content="(.+)">', html) | |
if result: | |
return result.group(1) | |
def get_track_number(html): | |
result = re.search(r'/track=(\d+)/', html) | |
if result: | |
return result.group(1) | |
def get_track_url(html): | |
result = re.search(r'<meta\s+property="og:url"\s*content="(\S+)\">', html) | |
if result: | |
return result.group(1) | |
if __name__ == '__main__': | |
print(make_embed('https://boypablo.bandcamp.com/track/losing-you')) # valid track link returns embed | |
print(make_embed('')) # invalid link returns None | |
print(make_embed('https://bandcamp.com')) # non-track link returns None | |
print(make_embed('https://boypablo.bandcamp.com')) # non-track link returns None | |
print(make_embed('https://boypablo.bandcamp.com/album/roy-pablo')) # non-track link returns None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment