Skip to content

Instantly share code, notes, and snippets.

@rafapolo
Last active March 31, 2025 21:37
Show Gist options
  • Save rafapolo/59594d51b72c9beb7df8bf763284f9a8 to your computer and use it in GitHub Desktop.
Save rafapolo/59594d51b72c9beb7df8bf763284f9a8 to your computer and use it in GitHub Desktop.
Summarize youtube transcripts using a local gemma3 on ollama
import sys
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
import time
import requests
class Summarizer:
def prompt(self, text):
model_url="http://localhost:11434/api/generate"
payload = {
"stream": False, "model": "gemma3",
"prompt": text
}
try:
response = requests.post(model_url, json=payload)
response.raise_for_status()
summary = response.json().get("response", "")
return summary
except requests.exceptions.RequestException as e:
print(f"Error communicating with the model: {e}")
return summary
def summarize(self, transcript):
# Split transcript into chunks of up to 2000 characters (adjustable for model limits)
chunk_size = 2000
chunks = [transcript[i:i + chunk_size] for i in range(0, len(transcript), chunk_size)]
print(f"chunks:", len(chunks))
# Summarize each chunk using the local model
summaries = []
for chunk in chunks:
chunk_summary = self.prompt(
f"Summarize the following long-form interview transcript into a clear, one-page executive summary into the same original language. Focus on key themes, insights and relevant statements. Structure the output with a title, a brief introductory paragraph, and bullet points highlighting the most important ideas. Avoid quoting directly and instead paraphrase in concise academic language that captures the essence of his vision and arguments. Transcript:, {chunk}"
)
summaries.append(chunk_summary)
open(f"{video_id}_summary_chunk_{chunks.index(chunk)}.txt", "w").write(chunk_summary)
summary = self.prompt(
f"Synthesize in max 3 pages these section summaries into a cohesive abstract. Organize them by themes, eliminate repetition, and preserve a clear academic tone throughout. Summaries: {''.join(summaries)}"
)
open(f"{video_id}_summary.txt", "w").write(summary)
return summary
class TranscriptFetcher:
def fetch_transcript(self, video_id):
try:
# Fetch available languages for the video
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
languages = [transcript.language_code for transcript in transcript_list]
print("transcript languages:", languages)
# Attempt to fetch the transcript
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
transcript_text = " ".join([entry["text"] for entry in transcript])
print("Transcript:", transcript_text)
open(f"{video_id}_transcript.txt", "w").write(transcript_text)
return transcript_text
except TranscriptsDisabled:
print("Transcripts are disabled for this video.")
except NoTranscriptFound:
print(f"No transcript found for the specified language(s): {languages}")
except Exception as e:
print(f"An error occurred: {e}")
return None
class App:
def run(self, video_id):
print("Fetching transcript for video", video_id)
transcript = TranscriptFetcher().fetch_transcript(video_id)
if transcript:
print("Generating summary...")
summary = Summarizer().summarize(transcript)
if summary:
print("Summary:\n", summary)
else:
print("Failed to generate summary.")
else:
print("Failed to fetch the transcript.")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python main.py <video_id>")
else:
print("Lets' Sumarize!")
video_id = sys.argv[1]
start_time = time.time()
App().run(video_id)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"in {elapsed_time:.2f} seconds")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment