mikegwhit · June 21, 2025 05:08
diff --git a/chat-session-cost-estimator.js b/chat-session-cost-estimator.js
 /**
 * Chat-session cost estimator, accounting for character-based token growth
 * and optional OpenAI-style cached token discounting.
 *
 * @param {Object} opts
 * @param {number} opts.turns           – number of user↔assistant turns
 * @param {number} opts.totalChars      – total characters across the full script
 * @param {number} [opts.charsPerTok]   – avg chars per token (English ≈ 4)
 * @param {number} [opts.inputCost]     – $ / 1,000 input tokens
 * @param {number} [opts.outputCost]    – $ / 1,000 output tokens
 * @param {number} [opts.outputRatio]   – assistant’s share of token budget
 * @param {number} [opts.cacheDiscount] – percent of prior tokens that are cached
 *                                         (0 = no cache, 0.75 = 75% cached)
 * @returns {{totalInputTokens:number,
 *            totalOutputTokens:number,
 *            totalCost:number}}
 */
 function estimateChatCostByChars({
  turns,
  totalChars,
  charsPerTok = 4,
  inputCost  = 0.002,  // GPT-4.1 default
  outputCost = 0.008,
  outputRatio = 0.5,
  cacheDiscount = 0.75 // OpenAI caches 75% of prior tokens at reduced rate
 }) {
  if (turns <= 0 || totalChars <= 0) {
    return { totalInputTokens: 0, totalOutputTokens: 0, totalCost: 0 };
  }

  const totalTokensLinear = totalChars / charsPerTok;
  const totalOutputTokens = totalTokensLinear * outputRatio;

  /**
   * Stateless growth:
   * Each turn re-sends more history, so total input tokens grow with:
   * Σ₀ⁿ⁻¹ k = n(n-1)/2
   *
   * Apply discount to history tokens from previous turns
   */
  const baseTokensPerTurn = totalTokensLinear / turns;
  let totalInputTokens = 0;
  let costInput = 0;

  for (let i = 0; i < turns; i++) {
    const priorTokens = baseTokensPerTurn * i;
    const freshTokens = baseTokensPerTurn * (1 - outputRatio);

    const cachedPortion   = priorTokens * cacheDiscount;
    const uncachedPortion = priorTokens * (1 - cacheDiscount);

    const inputTokensThisTurn = freshTokens + uncachedPortion + cachedPortion;
    totalInputTokens += inputTokensThisTurn;

    const costCached   = (cachedPortion     / 1_000) * (inputCost / 4);
    const costUncached = (uncachedPortion   / 1_000) * inputCost;
    const costFresh    = (freshTokens       / 1_000) * inputCost;

    costInput += costCached + costUncached + costFresh;
  }

  const costOutput = (totalOutputTokens / 1_000) * outputCost;

  return {
    totalInputTokens : Math.round(totalInputTokens),
    totalOutputTokens: Math.round(totalOutputTokens),
    totalCost        : +(costInput + costOutput).toFixed(4)
  };
 }
	/**
	* Chat-session cost estimator, accounting for character-based token growth
	* and optional OpenAI-style cached token discounting.
	*
	* @param {Object} opts
	* @param {number} opts.turns – number of user↔assistant turns
	* @param {number} opts.totalChars – total characters across the full script
	* @param {number} [opts.charsPerTok] – avg chars per token (English ≈ 4)
	* @param {number} [opts.inputCost] – $ / 1,000 input tokens
	* @param {number} [opts.outputCost] – $ / 1,000 output tokens
	* @param {number} [opts.outputRatio] – assistant’s share of token budget
	* @param {number} [opts.cacheDiscount] – percent of prior tokens that are cached
	* (0 = no cache, 0.75 = 75% cached)
	* @returns {{totalInputTokens:number,
	* totalOutputTokens:number,
	* totalCost:number}}
	*/
	function estimateChatCostByChars({
	turns,
	totalChars,
	charsPerTok = 4,
	inputCost = 0.002, // GPT-4.1 default
	outputCost = 0.008,
	outputRatio = 0.5,
	cacheDiscount = 0.75 // OpenAI caches 75% of prior tokens at reduced rate
	}) {
	if (turns <= 0 \|\| totalChars <= 0) {
	return { totalInputTokens: 0, totalOutputTokens: 0, totalCost: 0 };
	}

	const totalTokensLinear = totalChars / charsPerTok;
	const totalOutputTokens = totalTokensLinear * outputRatio;

	/**
	* Stateless growth:
	* Each turn re-sends more history, so total input tokens grow with:
	* Σ₀ⁿ⁻¹ k = n(n-1)/2
	*
	* Apply discount to history tokens from previous turns
	*/
	const baseTokensPerTurn = totalTokensLinear / turns;
	let totalInputTokens = 0;
	let costInput = 0;

	for (let i = 0; i < turns; i++) {
	const priorTokens = baseTokensPerTurn * i;
	const freshTokens = baseTokensPerTurn * (1 - outputRatio);

	const cachedPortion = priorTokens * cacheDiscount;
	const uncachedPortion = priorTokens * (1 - cacheDiscount);

	const inputTokensThisTurn = freshTokens + uncachedPortion + cachedPortion;
	totalInputTokens += inputTokensThisTurn;

	const costCached = (cachedPortion / 1_000) * (inputCost / 4);
	const costUncached = (uncachedPortion / 1_000) * inputCost;
	const costFresh = (freshTokens / 1_000) * inputCost;

	costInput += costCached + costUncached + costFresh;
	}

	const costOutput = (totalOutputTokens / 1_000) * outputCost;

	return {
	totalInputTokens : Math.round(totalInputTokens),
	totalOutputTokens: Math.round(totalOutputTokens),
	totalCost : +(costInput + costOutput).toFixed(4)
	};
	}