Skip to content

Instantly share code, notes, and snippets.

@mys721tx
Created June 28, 2025 03:27
Show Gist options
  • Save mys721tx/68870e9190b03893db793eeb520ba600 to your computer and use it in GitHub Desktop.
Save mys721tx/68870e9190b03893db793eeb520ba600 to your computer and use it in GitHub Desktop.
Call GPT to summarize a book
import re
import openai
import os
openai.api_key = "YOUR_OPENAI_API_KEY"
# Load the text (replace with your actual file path)
with open("pg14833.txt", "r", encoding="utf-8") as f:
text = f.read()
# Split by chapters (adjust the regex if needed)
chapters = re.split(r"(?:CHAPTER|Chapter)\s+[IVXLCDM0-9]+\.?", text)
chapter_titles = re.findall(r"(?:CHAPTER|Chapter)\s+[IVXLCDM0-9]+\.?", text)
# Remove any preface or content before the first chapter
if len(chapters) > len(chapter_titles):
chapters = chapters[1:]
# Create output directory for chapter summaries
os.makedirs("varney_summaries", exist_ok=True)
# Define keywords to search for in chapters
keywords = [
"daylight", "day", "light", "moonlight", "moon", "sunlight", "sun"
]
# Set a safe chunk size for GPT-4.1 (in characters, e.g., ~8000 chars ~ 2000 tokens)
CHUNK_SIZE = 8000
for idx, (title, chapter) in enumerate(zip(chapter_titles, chapters), 1):
chapter_lower = chapter.lower()
if not any(kw in chapter_lower for kw in keywords):
continue # Skip chapters without any of the keywords
# Paginate the chapter if too long
chapter_chunks = [chapter[i:i+CHUNK_SIZE] for i in range(0, len(chapter), CHUNK_SIZE)]
all_summaries = []
for page_num, chunk in enumerate(chapter_chunks, 1):
prompt = (
"You are a literary analyst. For the following passage from a chapter of Varney the Vampire, return your answer in this exact format:"
"\nSUMMARY: <3-5 sentence summary>"
"\nSUNLIGHT: <'Yes' or 'No'>"
"\nDAYLIGHT: <'Yes' or 'No'>"
"\nMOONLIGHT: <'Yes' or 'No'>"
"\nCONTEXT: <If 'Yes' to any, describe the context of Varney in sunlight, daylight, or moonlight. If 'No', say 'No mention of Varney in sunlight, daylight, or moonlight.'>"
"\n---\n"
f"{title} (Part {page_num} of {len(chapter_chunks)})\n{chunk.strip()}"
)
response = openai.chat.completions.create(
model="gpt-4.1",
messages=[{"role": "user", "content": prompt}],
max_tokens=400,
temperature=0.2,
)
summary = ""
if response.choices and response.choices[0].message and response.choices[0].message.content:
summary = response.choices[0].message.content.strip()
else:
summary = f"[No summary returned by OpenAI for part {page_num}]"
all_summaries.append(f"PART {page_num} OF {len(chapter_chunks)}\n{summary}")
# Write each summary and the chapter text to a file
safe_title = re.sub(r"[^a-zA-Z0-9_\-]", "_", title.strip())
filename = f"varney_summaries/{idx:03d}_{safe_title}.txt"
with open(filename, "w", encoding="utf-8") as out:
out.write(f"{title}\n\n" + "\n\n".join(all_summaries) + f"\n\nCHAPTER TEXT:\n{chapter.strip()}\n")
print(f"Wrote summary for {title} to {filename}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment