β― bun run src/index.ts --agent aider --model gpt-4.1-mini --exercise 100 --verbose
==================================================
π Benchmark Results
==================================================
π― Success Rate: 76.0% (76/100)
β± Average Duration: 169.3s
β
Overall Success: 76
π€ Agent Success: 99
π§ͺ Test Success: 76
β Failed: 24
π Detailed Results:
β
accumulate 41.8s (π€π§ͺ)
β acronym 51.6s (π€β)
β
all-your-base 96.3s (π€π§ͺ)
β
allergies 52.2s (π€π§ͺ)
β
alphametics 83.3s (π€π§ͺ)
β anagram 42.3s (π€β)
β
armstrong-numbers 74.2s (π€π§ͺ)
β
atbash-cipher 52.4s (π€π§ͺ)
β bank-account 83.8s (π€β)
β
beer-song 45.2s (π€π§ͺ)
β binary-search 39.9s (π€β)
β binary-search-tree 44.6s (π€β)
β
bob 48.7s (π€π§ͺ)
β bowling 1030.1s (π€β)
β
circular-buffer 54.2s (π€π§ͺ)
β
clock 2892.3s (π€π§ͺ)
β
collatz-conjecture 960.5s (π€π§ͺ)
β complex-numbers 55.9s (π€β)
β connect 2113.2s (π€β)
β crypto-square 42.9s (π€β)
β
custom-set 1012.6s (π€π§ͺ)
β
darts 167.3s (π€π§ͺ)
β diamond 104.3s (π€β)
β
difference-of-squares 948.9s (π€π§ͺ)
β
diffie-hellman 56.2s (π€π§ͺ)
β dnd-character 50.2s (π€β)
β
eliuds-eggs 41.8s (π€π§ͺ)
β
etl 42.9s (π€π§ͺ)
β flatten-array 37.7s (π€β)
β food-chain 83.5s (π€β)
β
game-of-life 43.2s (π€π§ͺ)
β
gigasecond 44.3s (π€π§ͺ)
β
grade-school 50.4s (π€π§ͺ)
β
grains 44.3s (π€π§ͺ)
β
hamming 42.0s (π€π§ͺ)
β
hello-world 68.1s (π€π§ͺ)
β house 62.8s (π€β)
β
isbn-verifier 93.1s (π€π§ͺ)
β
isogram 81.2s (π€π§ͺ)
β
kindergarten-garden 47.6s (π€π§ͺ)
β
knapsack 48.1s (π€π§ͺ)
β
largest-series-product 48.2s (π€π§ͺ)
β
leap 40.3s (π€π§ͺ)
β
linked-list 65.1s (π€π§ͺ)
β
list-ops 64.2s (π€π§ͺ)
β
luhn 59.9s (π€π§ͺ)
β
matching-brackets 45.3s (π€π§ͺ)
β
matrix 45.8s (π€π§ͺ)
β
minesweeper 49.1s (π€π§ͺ)
β
nth-prime 42.6s (π€π§ͺ)
β
nucleotide-count 42.0s (π€π§ͺ)
β
ocr-numbers 52.8s (π€π§ͺ)
β
palindrome-products 81.1s (π€π§ͺ)
β
pangram 44.3s (π€π§ͺ)
β pascals-triangle 141.9s (ββ)
β
perfect-numbers 82.5s (π€π§ͺ)
β
phone-number 69.4s (π€π§ͺ)
β
pig-latin 53.8s (π€π§ͺ)
β
prime-factors 44.2s (π€π§ͺ)
β
protein-translation 92.3s (π€π§ͺ)
β
proverb 82.4s (π€π§ͺ)
β
pythagorean-triplet 54.3s (π€π§ͺ)
β
queen-attack 72.4s (π€π§ͺ)
β
raindrops 78.5s (π€π§ͺ)
β rational-numbers 58.6s (π€β)
β react 122.0s (π€β)
β rectangles 53.9s (π€β)
β relative-distance 74.6s (π€β)
β
resistor-color 76.6s (π€π§ͺ)
β
resistor-color-duo 42.9s (π€π§ͺ)
β
resistor-color-trio 75.6s (π€π§ͺ)
β
reverse-string 40.9s (π€π§ͺ)
β
rna-transcription 42.4s (π€π§ͺ)
β robot-name 47.5s (π€β)
β
robot-simulator 53.7s (π€π§ͺ)
β
roman-numerals 52.5s (π€π§ͺ)
β
rotational-cipher 78.9s (π€π§ͺ)
β
run-length-encoding 75.3s (π€π§ͺ)
β
saddle-points 56.9s (π€π§ͺ)
β
say 53.2s (π€π§ͺ)
β
scrabble-score 54.8s (π€π§ͺ)
β
secret-handshake 345.6s (π€π§ͺ)
β
series 974.5s (π€π§ͺ)
β
sieve 949.0s (π€π§ͺ)
β
simple-cipher 90.7s (π€π§ͺ)
β
space-age 114.8s (π€π§ͺ)
β spiral-matrix 60.0s (π€β)
β
square-root 76.8s (π€π§ͺ)
β
strain 80.3s (π€π§ͺ)
β
sublist 50.1s (π€π§ͺ)
β
sum-of-multiples 42.5s (π€π§ͺ)
β
tournament 58.5s (π€π§ͺ)
β transpose 80.7s (π€β)
β
triangle 78.9s (π€π§ͺ)
β
twelve-days 49.6s (π€π§ͺ)
β two-bucket 79.2s (π€β)
β
two-fer 42.9s (π€π§ͺ)
β variable-length-quantity 61.9s (π€β)
β
word-count 47.0s (π€π§ͺ)
β
word-search 56.7s (π€π§ͺ)
Last active
June 22, 2025 02:15
-
-
Save laiso/9869cd9d48595c13a1a3ff83419dfb56 to your computer and use it in GitHub Desktop.
Benchmarks CLI agents (Claude Code, Codex CLI, Goose CLI and Aider) on Exercism TypeScript programming exercises https://github.com/exercism/typescript/tree/main/exercises/practice
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bun | |
import { spawn } from "bun"; | |
import { join } from "path"; | |
import { readdir } from "fs/promises"; | |
const CLAUDE_CODE_CONTAINER = "cli-agents-benchmark"; | |
const EXERCISM_PRACTICE_PATH = "exercism/typescript/exercises/practice"; | |
const SYSTEM_PROMPT = "'Solve this TypeScript exercise. Read the test file to understand requirements and implement the solution.'"; | |
interface AgentResult { | |
exercise: string; | |
success: boolean; | |
error?: string; | |
duration: number; | |
output?: string; | |
} | |
interface TestResult { | |
exercise: string; | |
agentSuccess: boolean; | |
testSuccess: boolean; | |
overallSuccess: boolean; | |
agentError?: string; | |
testError?: string; | |
agentDuration: number; | |
testDuration: number; | |
totalDuration: number; | |
} | |
interface BenchmarkConfig { | |
testCommand: string; | |
agent: string; | |
model: string; | |
provider: string; | |
verbose: boolean; | |
} | |
async function getPracticeExercises(): Promise<string[]> { | |
const practiceDir = join(process.cwd(), EXERCISM_PRACTICE_PATH); | |
const entries = await readdir(practiceDir, { withFileTypes: true }); | |
return entries | |
.filter(entry => entry.isDirectory() && !entry.name.startsWith('.')) | |
.map(entry => entry.name) | |
.sort(); | |
} | |
async function getTestFiles(exercisePath: string): Promise<string[]> { | |
try { | |
const exerciseDir = join(process.cwd(), exercisePath); | |
const entries = await readdir(exerciseDir); | |
return entries.filter(file => file.endsWith('.test.ts')); | |
} catch (error) { | |
console.warn(`Warning: Could not read test files from ${exercisePath}`); | |
return []; | |
} | |
} | |
function buildTestCommand(config: BenchmarkConfig): string { | |
return config.testCommand; | |
} | |
function buildAgentCommand(config: BenchmarkConfig, exercisePath: string): string[] { | |
const { agent, model, provider } = config; | |
const baseArgs = ["docker", "run", "--rm", "-i"]; | |
if (agent === 'claude') { | |
return [ | |
...baseArgs, | |
"-e", "ANTHROPIC_API_KEY", | |
"-v", `${join(process.cwd(), exercisePath)}:/workspace`, | |
"-w", "/workspace", | |
CLAUDE_CODE_CONTAINER, | |
"sh", "-c", | |
`claude --dangerously-skip-permissions --model ${model} -p .docs/instructions.md --system-prompt "${SYSTEM_PROMPT}"` | |
]; | |
} else if (agent === 'goose') { | |
return [ | |
...baseArgs, | |
"-e", `GOOSE_MODEL=${model}`, | |
"-e", "OPENAI_API_KEY", | |
"-e", "ANTHROPIC_API_KEY", | |
"-e", "GOOGLE_API_KEY", | |
"-e", `GOOSE_PROVIDER=${provider}`, | |
"-e", "GOOSE_DISABLE_KEYRING=1", | |
"-v", `${join(process.cwd(), exercisePath)}:/workspace`, | |
"-w", "/workspace", | |
CLAUDE_CODE_CONTAINER, | |
"sh", "-c", | |
`goose run --with-builtin "developer" -i .docs/instructions.md --system "${SYSTEM_PROMPT}"` | |
]; | |
} else if (agent === 'aider') { | |
return [ | |
...baseArgs, | |
"-e", "OPENAI_API_KEY", | |
"-e", "ANTHROPIC_API_KEY", | |
"-e", "GOOGLE_API_KEY", | |
"-v", `${join(process.cwd(), exercisePath)}:/workspace`, | |
"-w", "/workspace", | |
CLAUDE_CODE_CONTAINER, | |
"sh", "-c", | |
`aider --yes-always --no-auto-commits --message "${SYSTEM_PROMPT} $(cat .docs/instructions.md)" --file *.ts --read *.test.ts` | |
]; | |
} else if (agent === 'codex') { | |
return [ | |
...baseArgs, | |
"-e", "OPENAI_API_KEY", | |
"-v", `${join(process.cwd(), exercisePath)}:/workspace`, | |
"-w", "/workspace", | |
CLAUDE_CODE_CONTAINER, | |
"sh", "-c", | |
`codex exec --full-auto --skip-git-repo-check -m ${model} "$(cat .docs/instructions.md)"` | |
]; | |
} else { | |
throw new Error(`Unknown agent: ${agent}`); | |
} | |
} | |
async function runAgentPhase(config: BenchmarkConfig, exercise: string, exercisePath: string): Promise<AgentResult> { | |
const startTime = Date.now(); | |
try { | |
const agentArgs = buildAgentCommand(config, exercisePath); | |
// Add test files as read-only mounts | |
const testFiles = await getTestFiles(exercisePath); | |
const mountIndex = agentArgs.findIndex(arg => arg === CLAUDE_CODE_CONTAINER); | |
testFiles.forEach(testFile => { | |
agentArgs.splice(mountIndex, 0, "-v", `${join(process.cwd(), exercisePath, testFile)}:/workspace/${testFile}:ro`); | |
}); | |
if (config.verbose) { | |
console.log(`π€ Agent command: ${agentArgs.join(" ")}`); | |
} | |
const proc = spawn(agentArgs); | |
await proc.exited; | |
const duration = Date.now() - startTime; | |
const stdout = await new Response(proc.stdout).text(); | |
const stderr = await new Response(proc.stderr).text(); | |
if (proc.exitCode === 0) { | |
console.log(`π€ ${exercise} - Agent Success (${duration}ms)`); | |
return { exercise, success: true, duration, output: stdout }; | |
} else { | |
console.log(`π€ ${exercise} - Agent Failed (${duration}ms)`); | |
if (config.verbose) { | |
console.log(` Agent STDOUT: ${stdout.slice(0, 500)}...`); | |
console.log(` Agent STDERR: ${stderr.slice(0, 500)}...`); | |
} | |
return { exercise, success: false, error: `STDOUT: ${stdout}\nSTDERR: ${stderr}`, duration, output: stdout }; | |
} | |
} catch (error) { | |
const duration = Date.now() - startTime; | |
const errorMsg = error instanceof Error ? error.message : String(error); | |
console.log(`π€ ${exercise} - Agent Error (${duration}ms): ${errorMsg}`); | |
return { exercise, success: false, error: errorMsg, duration }; | |
} | |
} | |
async function resetExercise(exercisePath: string, verbose: boolean = false): Promise<void> { | |
try { | |
if (verbose) { | |
console.log(`π Resetting exercise: ${exercisePath}`); | |
} | |
const fullExercisePath = join(process.cwd(), exercisePath); | |
const resetArgs = ["git", "-C", fullExercisePath, "checkout", "HEAD", "--", "."]; | |
const proc = spawn(resetArgs); | |
await proc.exited; | |
if (proc.exitCode !== 0) { | |
const stderr = await new Response(proc.stderr).text(); | |
console.warn(`Warning: Failed to reset ${exercisePath}: ${stderr}`); | |
} else if (verbose) { | |
console.log(`β Successfully reset ${exercisePath}`); | |
} | |
} catch (error) { | |
const errorMsg = error instanceof Error ? error.message : String(error); | |
console.warn(`Warning: Git reset failed for ${exercisePath}: ${errorMsg}`); | |
} | |
} | |
async function runTestPhase(config: BenchmarkConfig, exercise: string, exercisePath: string): Promise<AgentResult> { | |
const startTime = Date.now(); | |
try { | |
const testCommand = buildTestCommand(config); | |
let testArgs: string[]; | |
testArgs = [ | |
"docker", "run", "--rm", "-i", | |
"-v", `${join(process.cwd(), exercisePath)}:/workspace`, | |
"-w", "/workspace", | |
CLAUDE_CODE_CONTAINER, | |
"sh", "-c", testCommand | |
]; | |
if (config.verbose) { | |
console.log(`π§ͺ Test command: ${testArgs.join(" ")}`); | |
} | |
const proc = spawn(testArgs); | |
await proc.exited; | |
const duration = Date.now() - startTime; | |
const stdout = await new Response(proc.stdout).text(); | |
const stderr = await new Response(proc.stderr).text(); | |
if (proc.exitCode === 0) { | |
console.log(`π§ͺ ${exercise} - Test Success (${duration}ms)`); | |
return { exercise, success: true, duration, output: stdout }; | |
} else { | |
console.log(`π§ͺ ${exercise} - Test Failed (${duration}ms)`); | |
if (config.verbose) { | |
console.log(` Test STDOUT: ${stdout.slice(0, 500)}...`); | |
console.log(` Test STDERR: ${stderr.slice(0, 500)}...`); | |
} | |
return { exercise, success: false, error: `STDOUT: ${stdout}\nSTDERR: ${stderr}`, duration, output: stdout }; | |
} | |
} catch (error) { | |
const duration = Date.now() - startTime; | |
const errorMsg = error instanceof Error ? error.message : String(error); | |
console.log(`π§ͺ ${exercise} - Test Error (${duration}ms): ${errorMsg}`); | |
return { exercise, success: false, error: errorMsg, duration }; | |
} | |
} | |
async function runExercise(config: BenchmarkConfig, exercise: string): Promise<TestResult> { | |
const startTime = Date.now(); | |
const exercisePath = join(EXERCISM_PRACTICE_PATH, exercise); | |
console.log(`π§ͺ Starting ${exercise}... (Docker)`); | |
// Phase 0: Reset exercise to clean state | |
await resetExercise(exercisePath, config.verbose); | |
// Phase 1: Run AI Agent | |
const agentResult = await runAgentPhase(config, exercise, exercisePath); | |
// Phase 2: Run Tests (always run, even if agent failed) | |
const testResult = await runTestPhase(config, exercise, exercisePath); | |
const totalDuration = Date.now() - startTime; | |
const overallSuccess = agentResult.success && testResult.success; | |
if (overallSuccess) { | |
console.log(`β ${exercise} - Overall Success (${totalDuration}ms)`); | |
} else { | |
console.log(`β ${exercise} - Overall Failed (${totalDuration}ms)`); | |
if (!agentResult.success) console.log(` π€ Agent failed: ${agentResult.error?.slice(0, 200)}...`); | |
if (!testResult.success) console.log(` π§ͺ Test failed: ${testResult.error?.slice(0, 200)}...`); | |
} | |
return { | |
exercise, | |
agentSuccess: agentResult.success, | |
testSuccess: testResult.success, | |
overallSuccess, | |
agentError: agentResult.error, | |
testError: testResult.error, | |
agentDuration: agentResult.duration, | |
testDuration: testResult.duration, | |
totalDuration | |
}; | |
} | |
async function runBenchmark(): Promise<void> { | |
const modelIndex = process.argv.indexOf('--model'); | |
const model = modelIndex !== -1 && modelIndex + 1 < process.argv.length | |
? process.argv[modelIndex + 1] | |
: 'sonnet'; | |
const agentIndex = process.argv.indexOf('--agent'); | |
const agent = agentIndex !== -1 && agentIndex + 1 < process.argv.length | |
? process.argv[agentIndex + 1] | |
: 'claude'; | |
const providerIndex = process.argv.indexOf('--provider'); | |
const provider = providerIndex !== -1 && providerIndex + 1 < process.argv.length | |
? process.argv[providerIndex + 1] | |
: 'openai'; | |
const verbose = process.argv.includes('--verbose'); | |
const exerciseIndex = process.argv.indexOf('--exercise'); | |
let specificExercise = exerciseIndex !== -1 && exerciseIndex + 1 < process.argv.length | |
? process.argv[exerciseIndex + 1] | |
: null; | |
let exerciseCount: number | null = null; | |
if (specificExercise && /^\d+$/.test(specificExercise)) { | |
exerciseCount = parseInt(specificExercise, 10); | |
specificExercise = null; | |
} | |
else if (specificExercise && specificExercise.includes('/')) { | |
specificExercise = specificExercise.split('/').pop() || null; | |
} | |
const listExercises = process.argv.includes('--list'); | |
const allExercises = await getPracticeExercises(); | |
if (listExercises) { | |
console.log("π Available Exercism problems:"); | |
allExercises.forEach((exercise, index) => { | |
console.log(` ${(index + 1).toString().padStart(3)}: ${exercise}`); | |
}); | |
return; | |
} | |
console.log("π Starting Exercism TypeScript benchmark"); | |
console.log(`π Solving TypeScript problems with ${agent} agent (Docker mode, ${model} model)\n`); | |
let exercises: string[]; | |
if (specificExercise) { | |
if (!allExercises.includes(specificExercise)) { | |
console.error(`β Specified problem '${specificExercise}' not found`); | |
console.log("Use --list option to see available problems"); | |
return; | |
} | |
exercises = [specificExercise]; | |
console.log(`π― Specified problem: ${specificExercise}\n`); | |
} else if (exerciseCount) { | |
const count = Math.min(exerciseCount, allExercises.length); | |
exercises = allExercises.slice(0, count); | |
console.log(`π’ Number of problems: ${count} (out of ${allExercises.length})\n`); | |
} else { | |
exercises = allExercises.slice(0, 1); | |
console.log(`π Found problems: ${allExercises.length} (testing only the first one)\n`); | |
} | |
const results: TestResult[] = []; | |
const config: BenchmarkConfig = { | |
testCommand: 'yarn && yarn test', | |
agent, | |
model, | |
provider, | |
verbose | |
}; | |
for (const exercise of exercises) { | |
const result = await runExercise(config, exercise); | |
results.push(result); | |
await new Promise(resolve => setTimeout(resolve, 1000)); | |
} | |
const successCount = results.filter(r => r.overallSuccess).length; | |
const totalCount = results.length; | |
const successRate = (successCount / totalCount) * 100; | |
const avgDuration = results.reduce((sum, r) => sum + r.totalDuration, 0) / results.length; | |
const agentSuccessCount = results.filter(r => r.agentSuccess).length; | |
const testSuccessCount = results.filter(r => r.testSuccess).length; | |
console.log("\n" + "=".repeat(50)); | |
console.log("π Benchmark Results"); | |
console.log("=".repeat(50)); | |
console.log(`π― Success Rate: ${successRate.toFixed(1)}% (${successCount}/${totalCount})`); | |
console.log(`β±οΈ Average Duration: ${avgDuration.toFixed(0)}ms`); | |
console.log(`β Overall Success: ${successCount}`); | |
console.log(`π€ Agent Success: ${agentSuccessCount}`); | |
console.log(`π§ͺ Test Success: ${testSuccessCount}`); | |
console.log(`β Failed: ${totalCount - successCount}`); | |
console.log("\nπ Detailed Results:"); | |
results.forEach(result => { | |
const overallStatus = result.overallSuccess ? "β " : "β"; | |
const agentStatus = result.agentSuccess ? "π€" : "β"; | |
const testStatus = result.testSuccess ? "π§ͺ" : "β"; | |
const duration = `${result.totalDuration}ms`; | |
console.log(` ${overallStatus} ${result.exercise.padEnd(25)} ${duration} (${agentStatus}${testStatus})`); | |
}); | |
if (results.some(r => !r.overallSuccess)) { | |
console.log("\nπ Errors for failed problems:"); | |
results.filter(r => !r.overallSuccess).forEach(result => { | |
console.log(` β ${result.exercise}:`); | |
if (result.agentError) { | |
console.log(` π€ Agent: ${result.agentError.slice(0, 500)}${result.agentError.length > 500 ? '...' : ''}`); | |
} | |
if (result.testError) { | |
console.log(` π§ͺ Test: ${result.testError.slice(0, 500)}${result.testError.length > 500 ? '...' : ''}`); | |
} | |
}); | |
} | |
} | |
if (import.meta.main) { | |
runBenchmark().catch(console.error); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM node:22 | |
ARG TZ | |
ENV TZ="$TZ" | |
# Install basic development tools and iptables/ipset | |
RUN apt update && apt install -y less \ | |
git \ | |
procps \ | |
sudo \ | |
fzf \ | |
zsh \ | |
man-db \ | |
unzip \ | |
gnupg2 \ | |
gh \ | |
iptables \ | |
ipset \ | |
iproute2 \ | |
dnsutils \ | |
aggregate \ | |
ripgrep \ | |
jq | |
# Ensure default node user has access to /usr/local/share | |
RUN mkdir -p /usr/local/share/npm-global && \ | |
chown -R node:node /usr/local/share | |
ARG USERNAME=node | |
WORKDIR /workspace | |
RUN mkdir -p /workspace && \ | |
chown -R node:node /workspace | |
# Enable corepack for yarn version management (as root) | |
ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0 | |
RUN corepack enable && corepack prepare yarn@stable --activate | |
# Set up yarn and corepack environment | |
ENV YARN_CACHE_FOLDER=/home/node/.yarn/cache | |
ENV YARN_GLOBAL_FOLDER=/home/node/.yarn/global | |
RUN mkdir -p /home/node/.yarn/cache /home/node/.yarn/global /home/node/.cache/node/corepack && \ | |
chown -R node:node /home/node/.yarn /home/node/.cache | |
# Install global packages | |
ENV NPM_CONFIG_PREFIX=/usr/local/share/npm-global | |
ENV PATH=$PATH:/usr/local/share/npm-global/bin | |
# Set up non-root user | |
USER node | |
# Install Claude Code | |
RUN npm install -g @anthropic-ai/claude-code | |
RUN mkdir -p /home/node/.claude | |
# Install Codex CLI(Native) | |
RUN npm install -g @openai/codex@native | |
ENV CODEX_RUST=1 | |
RUN mkdir -p $HOME/.codex && \ | |
echo "Solve this TypeScript exercise. Read the test file to understand requirements and implement the solution." > AGENTS.md | |
# Install Goose CLI | |
RUN curl -fsSL https://github.com/block/goose/releases/download/stable/download_cli.sh | bash | |
ENV HOME=/home/node | |
ENV PATH=$HOME/.local/bin:$PATH | |
# Install Aider | |
RUN curl -LsSf https://aider.chat/install.sh | sh | |
ENV AIDER_GIT=false | |
ENV AIDER_AUTO_COMMITS=false | |
ENV AIDER_SHOW_RELEASE_NOTES=false | |
ENV AIDER_SKIP_SANITY_CHECK_REPO=true | |
ENV AIDER_CHAT_HISTORY_FILE="" | |
ENV AIDER_INPUT_HISTORY_FILE="" |
>β― bun run cli-agents-benchmark.ts --verbose --agent goose --provider google --model gemini-2.5-flash --exercise 100
==================================================
π Benchmark Results
==================================================
π― Success Rate: 96.0% (96/100)
β± Average Duration: 102.2s
β
Overall Success: 96
π€ Agent Success: 100
π§ͺ Test Success: 96
β Failed: 4
π Detailed Results:
β
accumulate 56.1s (π€π§ͺ)
β
acronym 97.3s (π€π§ͺ)
β
all-your-base 76.8s (π€π§ͺ)
β
allergies 64.5s (π€π§ͺ)
β
alphametics 88.2s (π€π§ͺ)
β anagram 62.2s (π€β)
β
armstrong-numbers 60.8s (π€π§ͺ)
β
atbash-cipher 70.6s (π€π§ͺ)
β
bank-account 61.3s (π€π§ͺ)
β
beer-song 88.1s (π€π§ͺ)
β
binary-search 64.7s (π€π§ͺ)
β
binary-search-tree 73.1s (π€π§ͺ)
β
bob 106.4s (π€π§ͺ)
β bowling 169.6s (π€β)
β
circular-buffer 69.3s (π€π§ͺ)
β
clock 79.3s (π€π§ͺ)
β
collatz-conjecture 77.1s (π€π§ͺ)
β
complex-numbers 177.7s (π€π§ͺ)
β
connect 491.3s (π€π§ͺ)
β crypto-square 84.4s (π€β)
β
custom-set 109.4s (π€π§ͺ)
β
darts 70.1s (π€π§ͺ)
β
diamond 138.3s (π€π§ͺ)
β
difference-of-squares 92.7s (π€π§ͺ)
β
diffie-hellman 102.0s (π€π§ͺ)
β
dnd-character 86.3s (π€π§ͺ)
β
eliuds-eggs 69.5s (π€π§ͺ)
β
etl 70.4s (π€π§ͺ)
β
flatten-array 68.9s (π€π§ͺ)
β
food-chain 280.0s (π€π§ͺ)
β
game-of-life 66.6s (π€π§ͺ)
β
gigasecond 84.1s (π€π§ͺ)
β
grade-school 85.9s (π€π§ͺ)
β
grains 77.1s (π€π§ͺ)
β
hamming 72.3s (π€π§ͺ)
β
hello-world 67.5s (π€π§ͺ)
β
house 91.1s (π€π§ͺ)
β
isbn-verifier 71.7s (π€π§ͺ)
β
isogram 68.6s (π€π§ͺ)
β
kindergarten-garden 107.8s (π€π§ͺ)
β
knapsack 70.8s (π€π§ͺ)
β
largest-series-product 72.7s (π€π§ͺ)
β
leap 70.0s (π€π§ͺ)
β
linked-list 71.7s (π€π§ͺ)
β
list-ops 104.4s (π€π§ͺ)
β
luhn 74.0s (π€π§ͺ)
β
matching-brackets 96.8s (π€π§ͺ)
β
matrix 96.1s (π€π§ͺ)
β
minesweeper 81.0s (π€π§ͺ)
β
nth-prime 74.1s (π€π§ͺ)
β
nucleotide-count 70.3s (π€π§ͺ)
β
ocr-numbers 582.5s (π€π§ͺ)
β
palindrome-products 161.3s (π€π§ͺ)
β
pangram 73.4s (π€π§ͺ)
β
pascals-triangle 77.0s (π€π§ͺ)
β
perfect-numbers 78.2s (π€π§ͺ)
β
phone-number 87.4s (π€π§ͺ)
β
pig-latin 121.3s (π€π§ͺ)
β
prime-factors 75.2s (π€π§ͺ)
β
protein-translation 70.7s (π€π§ͺ)
β
proverb 154.3s (π€π§ͺ)
β
pythagorean-triplet 264.7s (π€π§ͺ)
β
queen-attack 92.9s (π€π§ͺ)
β
raindrops 71.5s (π€π§ͺ)
β
rational-numbers 95.0s (π€π§ͺ)
β
react 297.5s (π€π§ͺ)
β
rectangles 96.5s (π€π§ͺ)
β
relative-distance 86.3s (π€π§ͺ)
β
resistor-color 79.2s (π€π§ͺ)
β
resistor-color-duo 72.4s (π€π§ͺ)
β
resistor-color-trio 75.7s (π€π§ͺ)
β
reverse-string 71.3s (π€π§ͺ)
β
rna-transcription 72.0s (π€π§ͺ)
β
robot-name 99.9s (π€π§ͺ)
β robot-simulator 92.0s (π€β)
β
roman-numerals 79.1s (π€π§ͺ)
β
rotational-cipher 73.1s (π€π§ͺ)
β
run-length-encoding 73.5s (π€π§ͺ)
β
saddle-points 94.1s (π€π§ͺ)
β
say 90.3s (π€π§ͺ)
β
scrabble-score 68.9s (π€π§ͺ)
β
secret-handshake 73.8s (π€π§ͺ)
β
series 96.0s (π€π§ͺ)
β
sieve 76.5s (π€π§ͺ)
β
simple-cipher 104.8s (π€π§ͺ)
β
space-age 73.5s (π€π§ͺ)
β
spiral-matrix 77.5s (π€π§ͺ)
β
square-root 65.9s (π€π§ͺ)
β
strain 71.8s (π€π§ͺ)
β
sublist 85.6s (π€π§ͺ)
β
sum-of-multiples 72.6s (π€π§ͺ)
β
tournament 157.6s (π€π§ͺ)
β
transpose 192.5s (π€π§ͺ)
β
triangle 74.9s (π€π§ͺ)
β
twelve-days 99.6s (π€π§ͺ)
β
two-bucket 101.3s (π€π§ͺ)
β
two-fer 74.3s (π€π§ͺ)
β
variable-length-quantity 124.7s (π€π§ͺ)
β
word-count 75.7s (π€π§ͺ)
β
word-search 115.7s (π€π§ͺ)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment