mys721tx · June 28, 2025 03:27
diff --git a/summarizer.py b/summarizer.py
 import re
 import openai
 import os

 openai.api_key = "YOUR_OPENAI_API_KEY"

 # Load the text (replace with your actual file path)
 with open("pg14833.txt", "r", encoding="utf-8") as f:
    text = f.read()

 # Split by chapters (adjust the regex if needed)
 chapters = re.split(r"(?:CHAPTER|Chapter)\s+[IVXLCDM0-9]+\.?", text)
 chapter_titles = re.findall(r"(?:CHAPTER|Chapter)\s+[IVXLCDM0-9]+\.?", text)

 # Remove any preface or content before the first chapter
 if len(chapters) > len(chapter_titles):
    chapters = chapters[1:]

 # Create output directory for chapter summaries
 os.makedirs("varney_summaries", exist_ok=True)

 # Define keywords to search for in chapters
 keywords = [
    "daylight", "day", "light", "moonlight", "moon", "sunlight", "sun"
 ]

 # Set a safe chunk size for GPT-4.1 (in characters, e.g., ~8000 chars ~ 2000 tokens)
 CHUNK_SIZE = 8000

 for idx, (title, chapter) in enumerate(zip(chapter_titles, chapters), 1):
    chapter_lower = chapter.lower()
    if not any(kw in chapter_lower for kw in keywords):
        continue  # Skip chapters without any of the keywords
    # Paginate the chapter if too long
    chapter_chunks = [chapter[i:i+CHUNK_SIZE] for i in range(0, len(chapter), CHUNK_SIZE)]
    all_summaries = []
    for page_num, chunk in enumerate(chapter_chunks, 1):
        prompt = (
            "You are a literary analyst. For the following passage from a chapter of Varney the Vampire, return your answer in this exact format:"
            "\nSUMMARY: <3-5 sentence summary>"
            "\nSUNLIGHT: <'Yes' or 'No'>"
            "\nDAYLIGHT: <'Yes' or 'No'>"
            "\nMOONLIGHT: <'Yes' or 'No'>"
            "\nCONTEXT: <If 'Yes' to any, describe the context of Varney in sunlight, daylight, or moonlight. If 'No', say 'No mention of Varney in sunlight, daylight, or moonlight.'>"
            "\n---\n"
            f"{title} (Part {page_num} of {len(chapter_chunks)})\n{chunk.strip()}"
        )
        response = openai.chat.completions.create(
            model="gpt-4.1",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=400,
            temperature=0.2,
        )
        summary = ""
        if response.choices and response.choices[0].message and response.choices[0].message.content:
            summary = response.choices[0].message.content.strip()
        else:
            summary = f"[No summary returned by OpenAI for part {page_num}]"
        all_summaries.append(f"PART {page_num} OF {len(chapter_chunks)}\n{summary}")
    # Write each summary and the chapter text to a file
    safe_title = re.sub(r"[^a-zA-Z0-9_\-]", "_", title.strip())
    filename = f"varney_summaries/{idx:03d}_{safe_title}.txt"
    with open(filename, "w", encoding="utf-8") as out:
        out.write(f"{title}\n\n" + "\n\n".join(all_summaries) + f"\n\nCHAPTER TEXT:\n{chapter.strip()}\n")
    print(f"Wrote summary for {title} to {filename}")
	import re
	import openai
	import os

	openai.api_key = "YOUR_OPENAI_API_KEY"

	# Load the text (replace with your actual file path)
	with open("pg14833.txt", "r", encoding="utf-8") as f:
	text = f.read()

	# Split by chapters (adjust the regex if needed)
	chapters = re.split(r"(?:CHAPTER\|Chapter)\s+[IVXLCDM0-9]+\.?", text)
	chapter_titles = re.findall(r"(?:CHAPTER\|Chapter)\s+[IVXLCDM0-9]+\.?", text)

	# Remove any preface or content before the first chapter
	if len(chapters) > len(chapter_titles):
	chapters = chapters[1:]

	# Create output directory for chapter summaries
	os.makedirs("varney_summaries", exist_ok=True)

	# Define keywords to search for in chapters
	keywords = [
	"daylight", "day", "light", "moonlight", "moon", "sunlight", "sun"
	]

	# Set a safe chunk size for GPT-4.1 (in characters, e.g., ~8000 chars ~ 2000 tokens)
	CHUNK_SIZE = 8000

	for idx, (title, chapter) in enumerate(zip(chapter_titles, chapters), 1):
	chapter_lower = chapter.lower()
	if not any(kw in chapter_lower for kw in keywords):
	continue # Skip chapters without any of the keywords
	# Paginate the chapter if too long
	chapter_chunks = [chapter[i:i+CHUNK_SIZE] for i in range(0, len(chapter), CHUNK_SIZE)]
	all_summaries = []
	for page_num, chunk in enumerate(chapter_chunks, 1):
	prompt = (
	"You are a literary analyst. For the following passage from a chapter of Varney the Vampire, return your answer in this exact format:"
	"\nSUMMARY: <3-5 sentence summary>"
	"\nSUNLIGHT: <'Yes' or 'No'>"
	"\nDAYLIGHT: <'Yes' or 'No'>"
	"\nMOONLIGHT: <'Yes' or 'No'>"
	"\nCONTEXT: <If 'Yes' to any, describe the context of Varney in sunlight, daylight, or moonlight. If 'No', say 'No mention of Varney in sunlight, daylight, or moonlight.'>"
	"\n---\n"
	f"{title} (Part {page_num} of {len(chapter_chunks)})\n{chunk.strip()}"
	)
	response = openai.chat.completions.create(
	model="gpt-4.1",
	messages=[{"role": "user", "content": prompt}],
	max_tokens=400,
	temperature=0.2,
	)
	summary = ""
	if response.choices and response.choices[0].message and response.choices[0].message.content:
	summary = response.choices[0].message.content.strip()
	else:
	summary = f"[No summary returned by OpenAI for part {page_num}]"
	all_summaries.append(f"PART {page_num} OF {len(chapter_chunks)}\n{summary}")
	# Write each summary and the chapter text to a file
	safe_title = re.sub(r"[^a-zA-Z0-9_\-]", "_", title.strip())
	filename = f"varney_summaries/{idx:03d}_{safe_title}.txt"
	with open(filename, "w", encoding="utf-8") as out:
	out.write(f"{title}\n\n" + "\n\n".join(all_summaries) + f"\n\nCHAPTER TEXT:\n{chapter.strip()}\n")
	print(f"Wrote summary for {title} to {filename}")