|
//usr/bin/env jbang |
|
//JAVA 21 |
|
//DEPS com.openai:openai-java:1.6.0 |
|
|
|
import com.openai.client.OpenAIClient; |
|
import com.openai.client.okhttp.OpenAIOkHttpClient; |
|
import com.openai.models.responses.ResponseCreateParams; |
|
import com.openai.models.responses.ResponseOutputText; |
|
|
|
import java.io.IOException; |
|
import java.nio.file.Files; |
|
import java.nio.file.Path; |
|
import java.nio.file.Paths; |
|
import java.util.ArrayList; |
|
import java.util.List; |
|
import java.util.stream.Stream; |
|
|
|
/** |
|
* JBang script for hierarchical summarization of Markdown files in a directory. |
|
* Generates leaf summaries, a global summary, and extracts personality traits as JSON. |
|
* Requires OPENAI_API_KEY environment variable. |
|
*/ |
|
public class EgoEchoAgentSummarizer { |
|
|
|
private static final String INPUT_DIR = "export"; |
|
private static final int CHUNK_SIZE = 20_000; |
|
private static final String MODEL_SUMMARY = "gpt-4.1-mini"; |
|
private static final String MODEL_EXTRACTION = "gpt-4.1"; |
|
|
|
public static void main(String[] args) throws IOException { |
|
// 0. Check API key |
|
String apiKey = System.getenv("OPENAI_API_KEY"); |
|
if (apiKey == null || apiKey.isBlank()) { |
|
System.err.println("ERROR: OPENAI_API_KEY is not set."); |
|
System.exit(1); |
|
} |
|
System.out.println("[DEBUG] Initializing OpenAI client"); |
|
OpenAIClient client = OpenAIOkHttpClient.builder().apiKey(apiKey).build(); |
|
|
|
// 1. Discover markdown files |
|
List<Path> mdFiles; |
|
try (Stream<Path> stream = Files.walk(Paths.get(INPUT_DIR))) { |
|
mdFiles = stream.filter(p -> p.toString().endsWith(".md")).toList(); |
|
} |
|
System.out.printf("[DEBUG] Found %d markdown files in '%s'%n", mdFiles.size(), INPUT_DIR); |
|
|
|
// 2. Read and chunk files with debug logging |
|
List<String> leafChunks = new ArrayList<>(); |
|
for (int i = 0, n = mdFiles.size(); i < n; i++) { |
|
Path file = mdFiles.get(i); |
|
System.out.printf("[DEBUG] Reading file %d/%d: %s%n", i + 1, n, file); |
|
String text = Files.readString(file); |
|
List<String> chunks = chunkText(text); |
|
System.out.printf("[DEBUG] Split '%s' into %d chunk(s)%n", file.getFileName(), chunks.size()); |
|
for (int j = 0, m = chunks.size(); j < m; j++) { |
|
System.out.printf("[DEBUG] Adding chunk %d/%d from %s%n", j + 1, m, file.getFileName()); |
|
leafChunks.add(chunks.get(j)); |
|
} |
|
} |
|
|
|
// 3. Leaf-level summaries |
|
System.out.printf("[DEBUG] Summarizing %d leaf chunk(s)%n", leafChunks.size()); |
|
List<String> leafSummaries = new ArrayList<>(); |
|
for (int i = 0, n = leafChunks.size(); i < n; i++) { |
|
System.out.printf("[DEBUG] Summarizing chunk %d/%d%n", i + 1, n); |
|
String summary = callOpenAI(client, |
|
"Summarize this text focusing on the author's personality and character traits. State the source (self, other) if possible.\n" + leafChunks.get(i), |
|
MODEL_SUMMARY |
|
); |
|
leafSummaries.add(summary); |
|
} |
|
|
|
// 4. Global summary |
|
System.out.printf("[DEBUG] Generating global summary from %d leaf summaries%n", leafSummaries.size()); |
|
String aggregated = String.join("\n", leafSummaries); |
|
String globalSummary = callOpenAI(client, |
|
"Aggregate these character and personality summaries into a global summary of the author's traits, keep additional information like source." + aggregated, |
|
MODEL_SUMMARY |
|
); |
|
|
|
// 5. Extract personality traits as JSON |
|
System.out.println("[DEBUG] Extracting personality traits as JSON"); |
|
String extractionPrompt = "Extract a JSON array of personality traits with category, facet, degree (high, medium, low), confidence score (0-1 how confident the model is about the statement, 0 not at all, 1 very confident due to high evidence), source (self or other) and evidence. Evidence must be at least one, but can be multiple. Try to find all characteristics with a high confidence. Also very unlikely traits can be listed with low degrees.\n\n" + globalSummary; |
|
String traitsJson = callOpenAI(client, extractionPrompt, MODEL_EXTRACTION); |
|
|
|
// 6. Write outputs |
|
System.out.println("[DEBUG] Writing outputs to files"); |
|
writeOutput("leaf_summaries.txt", leafSummaries); |
|
writeOutput("global_summary.txt", List.of(globalSummary)); |
|
writeOutput("personality_traits.json", List.of(traitsJson)); |
|
|
|
System.out.println("[DEBUG] Finished. Outputs: leaf_summaries.txt, global_summary.txt, personality_traits.json"); |
|
} |
|
|
|
private static List<String> chunkText(String text) { |
|
List<String> chunks = new ArrayList<>(); |
|
int length = text.length(); |
|
for (int start = 0; start < length; start += CHUNK_SIZE) { |
|
int end = Math.min(length, start + CHUNK_SIZE); |
|
chunks.add(text.substring(start, end)); |
|
} |
|
return chunks; |
|
} |
|
|
|
private static String callOpenAI(OpenAIClient client, String content, String model) { |
|
ResponseCreateParams params = ResponseCreateParams.builder() |
|
.model(model) |
|
.input(content) |
|
.build(); |
|
|
|
return client.responses() |
|
.create(params) |
|
.output().stream() |
|
.flatMap(r -> r.message().stream()) |
|
.flatMap(m -> m.content().stream()) |
|
.flatMap(c -> c.outputText().stream()) |
|
.findFirst() |
|
.map(ResponseOutputText::text) |
|
.orElseThrow(() -> new RuntimeException("No response from OpenAI")); |
|
} |
|
|
|
private static void writeOutput(String filename, List<String> lines) throws IOException { |
|
System.out.printf("[DEBUG] Writing %d section(s) to %s%n", lines.size(), filename); |
|
Files.writeString(Path.of(filename), String.join("\n---\n", lines)); |
|
} |
|
} |