Last active
March 31, 2025 21:37
-
-
Save rafapolo/59594d51b72c9beb7df8bf763284f9a8 to your computer and use it in GitHub Desktop.
Summarize youtube transcripts using a local gemma3 on ollama
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound | |
import time | |
import requests | |
class Summarizer: | |
def prompt(self, text): | |
model_url="http://localhost:11434/api/generate" | |
payload = { | |
"stream": False, "model": "gemma3", | |
"prompt": text | |
} | |
try: | |
response = requests.post(model_url, json=payload) | |
response.raise_for_status() | |
summary = response.json().get("response", "") | |
return summary | |
except requests.exceptions.RequestException as e: | |
print(f"Error communicating with the model: {e}") | |
return summary | |
def summarize(self, transcript): | |
# Split transcript into chunks of up to 2000 characters (adjustable for model limits) | |
chunk_size = 2000 | |
chunks = [transcript[i:i + chunk_size] for i in range(0, len(transcript), chunk_size)] | |
print(f"chunks:", len(chunks)) | |
# Summarize each chunk using the local model | |
summaries = [] | |
for chunk in chunks: | |
chunk_summary = self.prompt( | |
f"Summarize the following long-form interview transcript into a clear, one-page executive summary into the same original language. Focus on key themes, insights and relevant statements. Structure the output with a title, a brief introductory paragraph, and bullet points highlighting the most important ideas. Avoid quoting directly and instead paraphrase in concise academic language that captures the essence of his vision and arguments. Transcript:, {chunk}" | |
) | |
summaries.append(chunk_summary) | |
open(f"{video_id}_summary_chunk_{chunks.index(chunk)}.txt", "w").write(chunk_summary) | |
summary = self.prompt( | |
f"Synthesize in max 3 pages these section summaries into a cohesive abstract. Organize them by themes, eliminate repetition, and preserve a clear academic tone throughout. Summaries: {''.join(summaries)}" | |
) | |
open(f"{video_id}_summary.txt", "w").write(summary) | |
return summary | |
class TranscriptFetcher: | |
def fetch_transcript(self, video_id): | |
try: | |
# Fetch available languages for the video | |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
languages = [transcript.language_code for transcript in transcript_list] | |
print("transcript languages:", languages) | |
# Attempt to fetch the transcript | |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=languages) | |
transcript_text = " ".join([entry["text"] for entry in transcript]) | |
print("Transcript:", transcript_text) | |
open(f"{video_id}_transcript.txt", "w").write(transcript_text) | |
return transcript_text | |
except TranscriptsDisabled: | |
print("Transcripts are disabled for this video.") | |
except NoTranscriptFound: | |
print(f"No transcript found for the specified language(s): {languages}") | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
return None | |
class App: | |
def run(self, video_id): | |
print("Fetching transcript for video", video_id) | |
transcript = TranscriptFetcher().fetch_transcript(video_id) | |
if transcript: | |
print("Generating summary...") | |
summary = Summarizer().summarize(transcript) | |
if summary: | |
print("Summary:\n", summary) | |
else: | |
print("Failed to generate summary.") | |
else: | |
print("Failed to fetch the transcript.") | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
print("Usage: python main.py <video_id>") | |
else: | |
print("Lets' Sumarize!") | |
video_id = sys.argv[1] | |
start_time = time.time() | |
App().run(video_id) | |
end_time = time.time() | |
elapsed_time = end_time - start_time | |
print(f"in {elapsed_time:.2f} seconds") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment