Created
February 2, 2012 02:59
-
-
Save just-digital/1721132 to your computer and use it in GitHub Desktop.
Simple Python script to manifest a list of potential domain names from a keyword phrase/string. Script will also validate the domain.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import sys | |
stopwords = " a to the ltd is for of on will with pty" # not definitive | |
legalchars = "abcdefghijklmnopqrstuvwxyx1234567890" | |
replace = ( | |
(("Ã","Å","Ä","À","Á","Â","å","å","ä","à","á","â"),"a"), | |
(("Ç","Č","ç","č"),"c"), | |
(("É","È","Ê","Ë","Ĕ","è","ê","ë","ĕ","é"),"e"), | |
(("Ğ","Ģ","ģ","ğ"),"g"), | |
(("Ï","Î","Í","Ì","ï","î","í","ì"),"i"), | |
(("Ñ","ñ"),"n"), | |
(("Ö","Ô","Ō","Ò","Ó","Ø","ö","ô","ō","ò","ó","ø"),"o"), | |
(("Ŝ","Ş","ŝ","ş"),"s"), | |
(("Ü","Ū","Û","Ù","Ú","ü","ū","û","ù","ú"),"u"), | |
(("Ÿ","ÿ"),"y"), | |
) | |
domains = ( | |
".co.nz", | |
".net.nz", | |
".org.nz", | |
".ac.nz", | |
".geek.nz", | |
".gen.nz", | |
".iwi.nz", | |
".maori.nz", | |
".school.nz", | |
) | |
def remove_blanks(lst): | |
newlst = [] | |
for l in lst: | |
if l: | |
newlst.append(l) | |
return newlst | |
def replace_chars(subject): | |
""" swap out gliphs for acceptible replacement """ | |
for r in replace: | |
for c in r[0]: | |
subject = subject.replace(c,r[1]) | |
return subject | |
def remove_chars(subject): | |
""" Replace all chars that arent in allowed list """ | |
for c in subject: | |
if c != " " and c not in legalchars: | |
subject=subject.replace(c,"") | |
return subject | |
def remove_stopwords(subjects): | |
""" removes any stop words in the provided list """ | |
new_subject = [] | |
sw = [] | |
stops = stopwords.split(" ") | |
for s in stops: | |
if s: | |
sw.append(s) | |
for w in subjects: | |
if w not in sw: | |
new_subject.append(w) | |
return new_subject | |
def wordvariants(name): | |
""" returns a list of word variants (doesn't append domain name)""" | |
name = name.lower() | |
name = replace_chars(name) | |
name = remove_chars(name) | |
words = remove_blanks(name.split(" ")) | |
variants = [] | |
variants.append("".join(words)) #all spaces removed | |
variants.append("-".join(words)) #words joined by dashes | |
words_no_stopwords = remove_blanks(remove_stopwords(words)) | |
nsw = "".join(words_no_stopwords) #words with stop words removed spaces removed | |
# no point in processing any dups | |
if nsw not in variants: | |
variants.append(nsw) #words with stop words removed joined by dashes | |
nsw = "-".join(words_no_stopwords) | |
if nsw not in variants: | |
variants.append(nsw) #words with stop words removed spaces removed | |
for w in words_no_stopwords: | |
variants.append(w) #try just single words | |
return variants | |
def manifest_domains(subject): | |
words = wordvariants(subject) | |
domain_options = [] | |
for w in words: | |
for d in domains: | |
domain_options.append(w + d) | |
return domain_options | |
def test_domains(domains): | |
import socket | |
passed = [] | |
failed = [] | |
for d in domains: | |
try: | |
ip = socket.gethostbyname(d) | |
if ip: | |
passed.append({ d:ip }) | |
print d,ip | |
else: | |
failed.append(d) | |
except socket.gaierror: | |
# [Errno -5] No address associated with hostname | |
failed.append(d) | |
return passed, failed | |
def manifest_and_test(subject): | |
domains = manifest_domains(subject) | |
return test_domains(domains) | |
if __name__ == "__main__": | |
if len(sys.argv) >= 2: | |
subject = sys.argv[1] | |
passed, failed = manifest_and_test(subject) | |
print "Available:" | |
for d in failed: | |
print d | |
print "Domains in use:" | |
for d in passed: | |
#print "%s (%s)" % (d,passed[d]) | |
print d | |
else: | |
print "Usage: python manifest_url.py \"A name to check\"" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment