Skip to content

Instantly share code, notes, and snippets.

@ice09
Last active May 7, 2025 21:03
Show Gist options
  • Save ice09/eef654aaf6d2f4a08e8e1c263262c897 to your computer and use it in GitHub Desktop.
Save ice09/eef654aaf6d2f4a08e8e1c263262c897 to your computer and use it in GitHub Desktop.
JBang script for extracting personality traits and character from different Markdown sources and data model population with LLMs.

Setup

  • Install JBang
  • Export files in Markdown format to folder export
  • What kind of files?
    • Files created by you (articles, blog posts, thesis, CVs, forum discussions, mails, ...)
    • Files about you (references, qualifications, endorsements, ...)

Run

  • Set environment variable OPENAI_API_KEY
  • jbang EgoEchoAgentSummarizer.java

Result

  • leaf_summaries.txt: All summaries for the Markdown files in folder export
  • global_summary.txt: Summary of all leaf summaries.
  • personality_traits.json: Traits summarized in JSON format.

Subtrate JSON Format

  {
    "trait": "Effective Communicator",
    "category": "Communication",
    "facet": "Clarity, Educational Skill",
    "confidence_score": 0.96,
    "source": "other",
    "evidence": [
      "Employer references describe clear, patient, and structured communication in teaching and mentoring roles.",
      "Training proposals and educational materials show ability to make complex topics accessible."
    ]
  }
//usr/bin/env jbang
//JAVA 21
//DEPS com.openai:openai-java:1.6.0
import com.openai.client.OpenAIClient;
import com.openai.client.okhttp.OpenAIOkHttpClient;
import com.openai.models.responses.ResponseCreateParams;
import com.openai.models.responses.ResponseOutputText;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
/**
* JBang script for hierarchical summarization of Markdown files in a directory.
* Generates leaf summaries, a global summary, and extracts personality traits as JSON.
* Requires OPENAI_API_KEY environment variable.
*/
public class EgoEchoAgentSummarizer {
private static final String INPUT_DIR = "export";
private static final int CHUNK_SIZE = 20_000;
private static final String MODEL_SUMMARY = "gpt-4.1-mini";
private static final String MODEL_EXTRACTION = "gpt-4.1";
public static void main(String[] args) throws IOException {
// 0. Check API key
String apiKey = System.getenv("OPENAI_API_KEY");
if (apiKey == null || apiKey.isBlank()) {
System.err.println("ERROR: OPENAI_API_KEY is not set.");
System.exit(1);
}
System.out.println("[DEBUG] Initializing OpenAI client");
OpenAIClient client = OpenAIOkHttpClient.builder().apiKey(apiKey).build();
// 1. Discover markdown files
List<Path> mdFiles;
try (Stream<Path> stream = Files.walk(Paths.get(INPUT_DIR))) {
mdFiles = stream.filter(p -> p.toString().endsWith(".md")).toList();
}
System.out.printf("[DEBUG] Found %d markdown files in '%s'%n", mdFiles.size(), INPUT_DIR);
// 2. Read and chunk files with debug logging
List<String> leafChunks = new ArrayList<>();
for (int i = 0, n = mdFiles.size(); i < n; i++) {
Path file = mdFiles.get(i);
System.out.printf("[DEBUG] Reading file %d/%d: %s%n", i + 1, n, file);
String text = Files.readString(file);
List<String> chunks = chunkText(text);
System.out.printf("[DEBUG] Split '%s' into %d chunk(s)%n", file.getFileName(), chunks.size());
for (int j = 0, m = chunks.size(); j < m; j++) {
System.out.printf("[DEBUG] Adding chunk %d/%d from %s%n", j + 1, m, file.getFileName());
leafChunks.add(chunks.get(j));
}
}
// 3. Leaf-level summaries
System.out.printf("[DEBUG] Summarizing %d leaf chunk(s)%n", leafChunks.size());
List<String> leafSummaries = new ArrayList<>();
for (int i = 0, n = leafChunks.size(); i < n; i++) {
System.out.printf("[DEBUG] Summarizing chunk %d/%d%n", i + 1, n);
String summary = callOpenAI(client,
"Summarize this text focusing on the author's personality and character traits. State the source (self, other) if possible.\n" + leafChunks.get(i),
MODEL_SUMMARY
);
leafSummaries.add(summary);
}
// 4. Global summary
System.out.printf("[DEBUG] Generating global summary from %d leaf summaries%n", leafSummaries.size());
String aggregated = String.join("\n", leafSummaries);
String globalSummary = callOpenAI(client,
"Aggregate these character and personality summaries into a global summary of the author's traits, keep additional information like source." + aggregated,
MODEL_SUMMARY
);
// 5. Extract personality traits as JSON
System.out.println("[DEBUG] Extracting personality traits as JSON");
String extractionPrompt = "Extract a JSON array of personality traits with category, facet, degree (high, medium, low), confidence score (0-1 how confident the model is about the statement, 0 not at all, 1 very confident due to high evidence), source (self or other) and evidence. Evidence must be at least one, but can be multiple. Try to find all characteristics with a high confidence. Also very unlikely traits can be listed with low degrees.\n\n" + globalSummary;
String traitsJson = callOpenAI(client, extractionPrompt, MODEL_EXTRACTION);
// 6. Write outputs
System.out.println("[DEBUG] Writing outputs to files");
writeOutput("leaf_summaries.txt", leafSummaries);
writeOutput("global_summary.txt", List.of(globalSummary));
writeOutput("personality_traits.json", List.of(traitsJson));
System.out.println("[DEBUG] Finished. Outputs: leaf_summaries.txt, global_summary.txt, personality_traits.json");
}
private static List<String> chunkText(String text) {
List<String> chunks = new ArrayList<>();
int length = text.length();
for (int start = 0; start < length; start += CHUNK_SIZE) {
int end = Math.min(length, start + CHUNK_SIZE);
chunks.add(text.substring(start, end));
}
return chunks;
}
private static String callOpenAI(OpenAIClient client, String content, String model) {
ResponseCreateParams params = ResponseCreateParams.builder()
.model(model)
.input(content)
.build();
return client.responses()
.create(params)
.output().stream()
.flatMap(r -> r.message().stream())
.flatMap(m -> m.content().stream())
.flatMap(c -> c.outputText().stream())
.findFirst()
.map(ResponseOutputText::text)
.orElseThrow(() -> new RuntimeException("No response from OpenAI"));
}
private static void writeOutput(String filename, List<String> lines) throws IOException {
System.out.printf("[DEBUG] Writing %d section(s) to %s%n", lines.size(), filename);
Files.writeString(Path.of(filename), String.join("\n---\n", lines));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment