Created
December 3, 2024 22:07
-
-
Save zvodd/cf497f902b2392cd04478c99bc533cc3 to your computer and use it in GitHub Desktop.
Check a list of domains from a text file. To see if they resolve.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import OrderedDict | |
import socket | |
import sys | |
import re | |
def extract_first_hostname(text): | |
""" | |
Extract the first hostname from a given string. | |
Args: | |
text (str): The input string to search for a hostname. | |
Returns: | |
str or None: The first hostname found, or None if no hostname is detected. | |
""" | |
# Regular expression pattern to match hostnames | |
# Matches typical hostname formats like: | |
# - example.com | |
# - www.example.com | |
# - subdomain.example.co.uk | |
hostname_pattern = r'\b((?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6})\b' | |
# Find the first match | |
match = re.search(hostname_pattern, text) | |
# Return the matched hostname or None | |
return match.group(1) if match else None | |
def dns_lookup(url): | |
try: | |
result = socket.gethostbyname(url) | |
return result | |
except socket.gaierror: | |
return None | |
def process_urls(input_file): | |
domain_ip_map = OrderedDict() | |
dead_domains = [] | |
with open(input_file, 'r') as f: | |
for line in f: | |
domain = extract_first_hostname(line.strip()) | |
if domain is None: | |
continue | |
ip_address = dns_lookup(domain) | |
if ip_address: | |
domain_ip_map[domain] = ip_address | |
print(f" {domain}: {ip_address}") | |
else: | |
dead_domains.append(domain) | |
print(f" - {domain}", file=sys.stderr) | |
# print("Ordered Dictionary of Hosts with DNS Records:") | |
# for url, ip in url_ip_map.items(): | |
# print(f" {url}: {ip}") | |
# print("\nList of Dead Hosts:") | |
# for domain in dead_domains: | |
# print(f" - {domain}", file=sys.stderr) | |
return domain_ip_map, dead_domains | |
# Usage | |
input_file = sys.argv[1] if len(sys.argv) > 1 else "" | |
url_ip_map, dead_domains = process_urls(input_file) | |
print(f"Number of valid domains: {len(url_ip_map)}") | |
print(f"Number of dead domains: {len(dead_domains)}", file=sys.stderr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment