Created
July 26, 2018 10:19
-
-
Save aljiwala/5af709f48619ed5893db0840ce6a0cab to your computer and use it in GitHub Desktop.
Parse URL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from re import match | |
from urllib.parse import urlparse | |
from urllib.parse import ParseResult | |
def get_parsed_url(url): | |
p = urlparse(url, 'http') | |
netloc = p.netloc or p.path | |
path = p.path if p.netloc else '' | |
if not netloc.startswith('www.'): | |
netloc = 'www.' + netloc | |
p = ParseResult('http', netloc, path, *p[3:]) | |
return p.geturl() | |
''' | |
In [2]: url = 'flipkart.com' | |
...: get_parsed_url(url) | |
...: | |
Out[2]: 'http://www.flipkart.com' | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment