Skip to content

Instantly share code, notes, and snippets.

@petersandor
Created April 18, 2025 20:56
Show Gist options
  • Save petersandor/7c5882fcfec7103f305ed381507a99c1 to your computer and use it in GitHub Desktop.
Save petersandor/7c5882fcfec7103f305ed381507a99c1 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import getpass
import csv
import argparse
import sys
LOGIN_URL = "https://news.ycombinator.com/login"
UPVOTED_URL = "https://news.ycombinator.com/upvoted"
FAVORITES_URL = "https://news.ycombinator.com/favorites"
session = requests.Session()
session.headers.update({"User-Agent": "Mozilla/5.0"})
def login(username, password):
payload = {
"acct": username,
"pw": password
}
res = session.post(LOGIN_URL, data=payload)
if "Bad login" in res.text:
raise Exception("❌ Login failed. Check your username/password.")
print("βœ… Login successful!")
def use_cookie(cookie_value):
session.cookies.set("user", cookie_value)
print("πŸ” Using provided session cookie.")
def scrape_upvoted(username):
return scrape_story_list(f"{UPVOTED_URL}?id={username}", "Upvoted")
def scrape_favorites(username):
return scrape_story_list(f"{FAVORITES_URL}?id={username}", "Favorites", filter_comments=True)
def scrape_story_list(base_url, label, filter_comments=False):
results = []
page = 1
while True:
url = f"{base_url}&p={page}"
print(f"πŸ” Fetching {label} page {page}...")
res = session.get(url)
soup = BeautifulSoup(res.text, "html.parser")
items = soup.select("tr.athing")
print(f" - Found {len(items)} stories on page {page}")
if not items:
break
for item in items:
# Optional filtering to skip comments
# subtext = item.find_next_sibling("tr")
# if filter_comments and subtext and "comment" in subtext.text.lower():
# continue
title_elem = item.select_one(".titleline > a")
if not title_elem:
print(" -- Couldn't find link")
continue
results.append({
"title": title_elem.text.strip(),
"url": title_elem["href"]
})
# if len(items) < 30:
# break
page += 1
print(f"πŸŽ‰ Found {len(results)} {label.lower()} stories.")
return results
def save_to_csv(data, filename="hn_stories.csv"):
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["title", "url"])
writer.writeheader()
writer.writerows(data)
print(f"πŸ’Ύ Saved to {filename}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="πŸ“° Hacker News Scraper (Upvotes / Favorites)")
parser.add_argument("-u", "--username", help="HN username (required)")
parser.add_argument("-p", "--password", help="HN password (optional, will be prompted)")
parser.add_argument("-c", "--cookie", help="HN session cookie (alternative to login)")
parser.add_argument("-f", "--favorites", action="store_true", help="Scrape favorites instead of upvotes")
parser.add_argument("-o", "--output", default="hn_stories.csv", help="Output CSV file (default: hn_stories.csv)")
args = parser.parse_args()
if not args.username:
print("❌ --username is required.")
sys.exit(1)
print("πŸ” Hacker News Scraper\n")
try:
if args.cookie:
use_cookie(args.cookie)
else:
password = args.password or getpass.getpass("HN Password: ")
login(args.username, password)
if args.favorites:
stories = scrape_favorites(args.username)
else:
stories = scrape_upvoted(args.username)
save_to_csv(stories, args.output)
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment