Last active
June 21, 2025 05:08
-
-
Save mikegwhit/0e240ff9e1e48d69de1d98f015685b71 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Chat-session cost estimator, accounting for character-based token growth | |
* and optional OpenAI-style cached token discounting. | |
* | |
* @param {Object} opts | |
* @param {number} opts.turns – number of user↔assistant turns | |
* @param {number} opts.totalChars – total characters across the full script | |
* @param {number} [opts.charsPerTok] – avg chars per token (English ≈ 4) | |
* @param {number} [opts.inputCost] – $ / 1,000 input tokens | |
* @param {number} [opts.outputCost] – $ / 1,000 output tokens | |
* @param {number} [opts.outputRatio] – assistant’s share of token budget | |
* @param {number} [opts.cacheDiscount] – percent of prior tokens that are cached | |
* (0 = no cache, 0.75 = 75% cached) | |
* @returns {{totalInputTokens:number, | |
* totalOutputTokens:number, | |
* totalCost:number}} | |
*/ | |
function estimateChatCostByChars({ | |
turns, | |
totalChars, | |
charsPerTok = 4, | |
inputCost = 0.002, // GPT-4.1 default | |
outputCost = 0.008, | |
outputRatio = 0.5, | |
cacheDiscount = 0.75 // OpenAI caches 75% of prior tokens at reduced rate | |
}) { | |
if (turns <= 0 || totalChars <= 0) { | |
return { totalInputTokens: 0, totalOutputTokens: 0, totalCost: 0 }; | |
} | |
const totalTokensLinear = totalChars / charsPerTok; | |
const totalOutputTokens = totalTokensLinear * outputRatio; | |
/** | |
* Stateless growth: | |
* Each turn re-sends more history, so total input tokens grow with: | |
* Σ₀ⁿ⁻¹ k = n(n-1)/2 | |
* | |
* Apply discount to history tokens from previous turns | |
*/ | |
const baseTokensPerTurn = totalTokensLinear / turns; | |
let totalInputTokens = 0; | |
let costInput = 0; | |
for (let i = 0; i < turns; i++) { | |
const priorTokens = baseTokensPerTurn * i; | |
const freshTokens = baseTokensPerTurn * (1 - outputRatio); | |
const cachedPortion = priorTokens * cacheDiscount; | |
const uncachedPortion = priorTokens * (1 - cacheDiscount); | |
const inputTokensThisTurn = freshTokens + uncachedPortion + cachedPortion; | |
totalInputTokens += inputTokensThisTurn; | |
const costCached = (cachedPortion / 1_000) * (inputCost / 4); | |
const costUncached = (uncachedPortion / 1_000) * inputCost; | |
const costFresh = (freshTokens / 1_000) * inputCost; | |
costInput += costCached + costUncached + costFresh; | |
} | |
const costOutput = (totalOutputTokens / 1_000) * outputCost; | |
return { | |
totalInputTokens : Math.round(totalInputTokens), | |
totalOutputTokens: Math.round(totalOutputTokens), | |
totalCost : +(costInput + costOutput).toFixed(4) | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment