Last active
June 28, 2025 07:05
-
-
Save celsowm/1cb86b7964651edd6a95b3ef1f50c23d to your computer and use it in GitHub Desktop.
JsonStreamParser.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @typedef {'object' | 'array'} ContainerType | |
*/ | |
/** | |
* @typedef {Object} ParserEvent | |
* @property {'objectStart' | 'objectEnd' | 'arrayStart' | 'arrayEnd' | 'key' | 'valueChunk' | 'value'} type | |
* @property {(string|number)[]} path | |
* @property {*} [value] | |
* @property {string} [chunk] | |
*/ | |
// Parser States | |
const STATE = { | |
AWAITING_VALUE: 0, | |
IN_OBJECT_AWAITING_KEY: 1, | |
IN_OBJECT_AWAITING_COLON: 2, | |
IN_OBJECT_AWAITING_VALUE: 3, | |
IN_OBJECT_AWAITING_SEPARATOR: 4, | |
IN_ARRAY_AWAITING_VALUE: 5, | |
IN_ARRAY_AWAITING_SEPARATOR: 6, | |
IN_STRING: 7, | |
IN_NUMBER: 8, | |
IN_LITERAL: 9, // true, false, null | |
DONE: 10, | |
ERROR: 11, | |
}; | |
export class JsonStreamParser { | |
// --- Private fields --- | |
#options; | |
#mode; | |
#lineBuffer; | |
#state; | |
#buffer; | |
#pos; | |
#isEscaped; | |
#valueBuffer; | |
#stringChunks; | |
#stack; | |
#controller; | |
// --- EventTarget public fields (when in 'event' mode) --- | |
addEventListener; | |
removeEventListener; | |
dispatchEvent; | |
/** | |
* @param {object} [o] | |
* @param {boolean} [o.keepFullString=true] - Se false, strings não são | |
* armazenadas inteiras (apenas chunks). | |
* @param {boolean} [o.sse=false] – Ative para aceitar fluxo | |
* `text/event-stream` (Server-Sent Events). | |
* @param {'transform'|'event'} [o.mode] – Default: 'transform'. Se não for | |
* fornecido, usa 'event' quando `sse:true`, senão 'transform'. | |
*/ | |
constructor({ keepFullString = true, sse = false, mode } = {}) { | |
// ── configuração ──────────────────────────────────────────────── | |
this.#options = { keepFullString, sse }; | |
this.#mode = mode ?? (sse ? 'event' : 'transform'); | |
// ── internals do parser ───────────────────────────────────────── | |
this.#lineBuffer = ''; | |
this.#state = STATE.AWAITING_VALUE; | |
this.#buffer = ''; | |
this.#pos = 0; | |
this.#isEscaped = false; | |
this.#valueBuffer = ''; | |
this.#stringChunks = []; | |
/** @type {{type: ContainerType, keyOrIndex: string|number, parentState: number}[]} */ | |
this.#stack = []; | |
this.#controller = null; | |
// ── se for modo EVENT injeta mini-EventTarget ────────────────── | |
if (this.#mode === 'event') { | |
const tgt = document.createDocumentFragment(); // leve! | |
this.addEventListener = tgt.addEventListener.bind(tgt); | |
this.removeEventListener = tgt.removeEventListener.bind(tgt); | |
this.dispatchEvent = tgt.dispatchEvent.bind(tgt); | |
} | |
} | |
// ───────────────── interface de TransformStream ────────────────── | |
start(controller) { | |
this.#controller = controller; | |
} | |
/** | |
* @param {string} chunk | |
* @param {TransformStreamDefaultController<ParserEvent>} [controller] | |
*/ | |
transform(chunk, controller) { | |
if (controller && !this.#controller) this.#controller = controller; | |
if (this.#state === STATE.ERROR) return; // ALTERADO: Não para mais no DONE | |
let incoming = chunk; | |
if (this.#options.sse) { | |
incoming = this.#processSSEChunk(incoming); | |
if (!incoming) return; // only meta or [DONE] | |
} | |
this.#buffer += incoming; | |
this.#parse(); | |
} | |
flush() { | |
// Se o stream termina com apenas whitespace, está OK. | |
if (this.#state === STATE.AWAITING_VALUE && this.#buffer.trim().length === 0) { | |
this.#state = STATE.DONE; | |
return; | |
} | |
// Qualquer outro estado que não seja DONE é um final inesperado. | |
if (this.#state !== STATE.DONE && this.#state !== STATE.AWAITING_VALUE) { // ALTERADO | |
this.#emitError('JSON incompleto no final do fluxo'); | |
} | |
} | |
// ───────────────────── helpers comuns ──────────────────────────── | |
/** @private */ | |
#enqueue(ev) { | |
if (this.#mode === 'event') { | |
// No modo evento, o detalhe do evento é o próprio payload | |
this.dispatchEvent(new CustomEvent(ev.type, { detail: ev })); | |
} else if (this.#controller) { | |
this.#controller.enqueue(ev); | |
} | |
} | |
/** @private */ | |
#emitError(msg) { | |
if (this.#state === STATE.ERROR) return; // Evita erros em cascata | |
const path = this.#getCurrentPath().join('.') || 'root'; | |
const err = new Error(`${msg} em path ${path}`); | |
this.#state = STATE.ERROR; | |
// Limpa estado interno para parar o processamento | |
this.#buffer = ''; | |
this.#valueBuffer = ''; | |
this.#stringChunks = []; | |
this.#isEscaped = false; | |
if (this.#mode === 'event') { | |
this.dispatchEvent(new ErrorEvent('error', { error: err })); | |
} else if (this.#controller) { | |
this.#controller.error(err); | |
} | |
} | |
// ---- Core Parsing Logic ---- | |
/** @private */ | |
#getCurrentPath() { | |
return this.#stack | |
.filter(f => f.keyOrIndex !== undefined) | |
.map(f => f.keyOrIndex); | |
} | |
/** @private */ | |
#parse() { | |
while (this.#pos < this.#buffer.length && this.#state < STATE.DONE) { | |
const char = this.#buffer[this.#pos]; | |
// Ignora espaços em branco fora de strings | |
if (/\s/.test(char) && this.#state !== STATE.IN_STRING) { | |
this.#pos++; | |
continue; | |
} | |
this.#processChar(char); | |
} | |
// Libera a porção já processada do buffer | |
if (this.#pos > 0) { | |
this.#buffer = this.#buffer.slice(this.#pos); | |
this.#pos = 0; | |
} | |
} | |
/** @private */ | |
#processChar(char) { | |
switch (this.#state) { | |
case STATE.AWAITING_VALUE: | |
case STATE.IN_OBJECT_AWAITING_VALUE: | |
case STATE.IN_ARRAY_AWAITING_VALUE: | |
this.#handleAwaitValue(char); | |
break; | |
case STATE.IN_OBJECT_AWAITING_KEY: | |
if (char === '"') { this.#pos++; this.#state = STATE.IN_STRING; } | |
else if (char === '}') { this.#closeContainer(); } | |
else { this.#emitError(`Esperava '"' para uma chave ou '}' mas recebeu '${char}'`); } | |
break; | |
case STATE.IN_OBJECT_AWAITING_COLON: | |
if (char === ':') { this.#pos++; this.#state = STATE.IN_OBJECT_AWAITING_VALUE; } | |
else { this.#emitError(`Esperava ':' mas recebeu '${char}'`); } | |
break; | |
case STATE.IN_OBJECT_AWAITING_SEPARATOR: | |
if (char === ',') { this.#pos++; this.#state = STATE.IN_OBJECT_AWAITING_KEY; } | |
else if (char === '}') { this.#closeContainer(); } | |
else { this.#emitError(`Esperava ',' ou '}' mas recebeu '${char}'`); } | |
break; | |
case STATE.IN_ARRAY_AWAITING_SEPARATOR: | |
if (char === ',') { this.#pos++; this.#state = STATE.IN_ARRAY_AWAITING_VALUE; } | |
else if (char === ']') { this.#closeContainer(); } | |
else { this.#emitError(`Esperava ',' ou ']' mas recebeu '${char}'`); } | |
break; | |
case STATE.IN_STRING: this.#handleInString(); break; | |
case STATE.IN_NUMBER: this.#handleInNumber(); break; | |
case STATE.IN_LITERAL: this.#handleInLiteral(); break; | |
default: this.#emitError('Estado desconhecido do parser.'); | |
} | |
} | |
/** @private */ | |
#handleAwaitValue(char) { | |
if (char === '"') { this.#pos++; this.#state = STATE.IN_STRING; } | |
else if (char === '{') { this.#openContainer('object'); } | |
else if (char === '[') { this.#openContainer('array'); } | |
else if (/[0-9\-]/.test(char)) { this.#state = STATE.IN_NUMBER; } | |
else if (/[tfn]/.test(char)) { this.#state = STATE.IN_LITERAL; } | |
else if (char === ']' && this.#stack.at(-1)?.type === 'array') { this.#closeContainer(); } // Lida com array vazio `[]` | |
else { this.#emitError(`Caractere inesperado '${char}' ao aguardar um valor`); } | |
} | |
/** | |
* Pré-processa pedaços de Server-Sent Events. | |
* @private | |
*/ | |
#processSSEChunk(chunk) { | |
chunk = this.#lineBuffer + chunk; | |
this.#lineBuffer = ''; | |
const lines = chunk.split(/\r?\n/); | |
if (!chunk.endsWith('\n') && !chunk.endsWith('\r')) { | |
this.#lineBuffer = lines.pop() || ''; | |
} | |
let jsonPart = ''; | |
for (const line of lines) { | |
if (line.length === 0 || line.startsWith(':')) continue; // Ignora keep-alive e comentários | |
if (line.startsWith('data:')) { | |
const payload = line.slice(5).trimStart(); | |
if (payload === '[DONE]') { | |
this.#state = STATE.DONE; | |
continue; | |
} | |
jsonPart += payload + '\n'; | |
} | |
// Ignora outras diretivas SSE como event:, retry:, id: | |
} | |
return jsonPart; | |
} | |
/** | |
* @param {ContainerType} type | |
* @private | |
*/ | |
#openContainer(type) { | |
const parentFrame = this.#stack.at(-1); | |
const parentState = parentFrame ? (parentFrame.type === 'object' ? STATE.IN_OBJECT_AWAITING_SEPARATOR : STATE.IN_ARRAY_AWAITING_SEPARATOR) : STATE.DONE; | |
let path = this.#getCurrentPath(); | |
if (parentFrame?.type === 'array') { | |
const newIndex = (parentFrame.keyOrIndex ?? -1) + 1; | |
parentFrame.keyOrIndex = newIndex; | |
path = [...path, newIndex]; | |
} | |
const frame = { type, keyOrIndex: undefined, parentState }; | |
this.#stack.push(frame); | |
if (type === 'object') { | |
this.#enqueue({ type: 'objectStart', path }); | |
this.#state = STATE.IN_OBJECT_AWAITING_KEY; | |
} else { // array | |
this.#enqueue({ type: 'arrayStart', path }); | |
this.#state = STATE.IN_ARRAY_AWAITING_VALUE; | |
} | |
this.#pos++; | |
} | |
/** @private */ | |
#closeContainer() { | |
const frame = this.#stack.at(-1); | |
if (!frame) { | |
this.#emitError(`Tentativa de fechar um container que não foi aberto.`); | |
return; | |
} | |
const pathBeforePop = this.#getCurrentPath(); | |
this.#stack.pop(); | |
if (frame.type === 'object') { | |
this.#enqueue({ type: 'objectEnd', path: pathBeforePop }); | |
} else { // array | |
this.#enqueue({ type: 'arrayEnd', path: pathBeforePop }); | |
} | |
this.#state = frame.parentState; | |
// ALTERADO: Resetar o estado se um objeto de nível superior foi concluído. | |
if (this.#state === STATE.DONE) { | |
this.#state = STATE.AWAITING_VALUE; | |
} | |
const parentFrame = this.#stack.at(-1); | |
// [FIX] A completed container is a value. If its parent is an object, | |
// we must reset its `keyOrIndex` to signal that the key-value pair is complete. | |
// This allows the parser to correctly identify the next string as a new key. | |
if (parentFrame?.type === 'object') { | |
parentFrame.keyOrIndex = undefined; | |
} | |
this.#pos++; | |
} | |
/** @private */ | |
#handleInString() { | |
const start = this.#pos; | |
while (this.#pos < this.#buffer.length) { | |
const char = this.#buffer[this.#pos]; | |
if (this.#isEscaped) { | |
this.#isEscaped = false; | |
} else if (char === '\\') { | |
this.#isEscaped = true; | |
} else if (char === '"') { | |
const chunk = this.#buffer.substring(start, this.#pos); | |
if (chunk) { | |
this.#emitValueChunk(chunk); | |
if (this.#options.keepFullString) this.#stringChunks.push(chunk); | |
} | |
this.#pos++; // Consome a aspa final | |
let finalValue = null; | |
if (this.#options.keepFullString) { | |
try { | |
// Usa JSON.parse para decodificar escapes (\n, \uXXXX, etc.) | |
finalValue = JSON.parse(`"${this.#stringChunks.join('')}"`); | |
} catch (e) { | |
this.#emitError(`Sequência de string inválida: ${e.message}`); | |
return; | |
} | |
} | |
this.#finalizeValue(finalValue, 'string'); | |
this.#stringChunks = []; | |
return; | |
} | |
this.#pos++; | |
} | |
// Se chegou aqui, a string não terminou. Guarda o chunk parcial. | |
const chunk = this.#buffer.substring(start, this.#pos); | |
if (chunk) { | |
this.#emitValueChunk(chunk); | |
if (this.#options.keepFullString) this.#stringChunks.push(chunk); | |
} | |
} | |
/** | |
* @param {'number' | 'literal'} finalType | |
* @private | |
*/ | |
#handleSimpleValue(finalType) { | |
const regex = finalType === 'number' ? /[0-9\.eE\+\-]/ : /[a-z]/; | |
const start = this.#pos; | |
while (this.#pos < this.#buffer.length && regex.test(this.#buffer[this.#pos])) { | |
this.#pos++; | |
} | |
const chunk = this.#buffer.substring(start, this.#pos); | |
if (chunk) this.#valueBuffer += chunk; | |
// Se o buffer acabou, pode ser que o valor continue no próximo chunk | |
if (this.#pos === this.#buffer.length) { | |
// Exceção: número com expoente incompleto (ex: "1e", "1e-") | |
if (finalType === 'number' && /[eE][-+]?$/.test(this.#valueBuffer)) { | |
return; // Aguarda mais dados | |
} | |
return; // Aguarda mais dados | |
} | |
// Validação do valor completo | |
if (finalType === 'number') { | |
if (!/^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(this.#valueBuffer)) { | |
this.#emitError(`Formato de número inválido: '${this.#valueBuffer}'`); | |
return; | |
} | |
} else { // literal | |
if (this.#valueBuffer !== 'true' && this.#valueBuffer !== 'false' && this.#valueBuffer !== 'null') { | |
this.#emitError(`Literal inválido encontrado: '${this.#valueBuffer}'`); | |
return; | |
} | |
} | |
try { | |
const value = JSON.parse(this.#valueBuffer); | |
this.#finalizeValue(value, finalType); | |
} catch (e) { | |
this.#emitError(`Falha ao parsear valor ${finalType}: '${this.#valueBuffer}'`); | |
} finally { | |
this.#valueBuffer = ''; | |
} | |
} | |
/** @private */ | |
#handleInNumber() { this.#handleSimpleValue('number'); } | |
/** @private */ | |
#handleInLiteral() { this.#handleSimpleValue('literal'); } | |
/** @private */ | |
#emitValueChunk(chunk) { | |
const parentFrame = this.#stack.at(-1); | |
// Só emite chunk se estiver dentro de um objeto (como valor) ou array | |
if (!parentFrame || parentFrame.type === 'array' || parentFrame.keyOrIndex !== undefined) { | |
let path; | |
if (parentFrame?.type === 'array') { | |
const newIndex = (parentFrame.keyOrIndex ?? -1) + 1; | |
const currentPath = this.#getCurrentPath(); | |
const containerPath = parentFrame.keyOrIndex !== undefined ? currentPath.slice(0, -1) : currentPath; | |
path = [...containerPath, newIndex]; | |
} else { | |
path = this.#getCurrentPath(); | |
} | |
this.#enqueue({ type: 'valueChunk', path, chunk }); | |
} | |
} | |
/** | |
* @param {*} value | |
* @param {'string' | 'number' | 'literal'} valueType | |
* @private | |
*/ | |
#finalizeValue(value, valueType) { | |
const parent = this.#stack.at(-1); | |
// Valor raiz (fora de qualquer objeto ou array) | |
if (!parent) { | |
this.#enqueue({ type: 'value', path: [], value }); | |
this.#state = STATE.AWAITING_VALUE; // ALTERADO: Resetar para o próximo objeto JSON | |
return; | |
} | |
if (parent.type === 'object') { | |
// Se keyOrIndex está `undefined`, este valor é a CHAVE | |
if (parent.keyOrIndex === undefined) { | |
const containerPath = this.#getCurrentPath(); | |
parent.keyOrIndex = value; | |
this.#enqueue({ type: 'key', path: containerPath, value }); | |
this.#state = STATE.IN_OBJECT_AWAITING_COLON; | |
} else { // Senão, é o VALOR associado à chave anterior | |
const currentPath = this.#getCurrentPath(); | |
this.#enqueue({ type: 'value', path: currentPath, value }); | |
parent.keyOrIndex = undefined; // Reseta para a próxima chave | |
this.#state = STATE.IN_OBJECT_AWAITING_SEPARATOR; | |
} | |
} else if (parent.type === 'array') { | |
if (parent.keyOrIndex === undefined) { | |
parent.keyOrIndex = 0; | |
} else { | |
parent.keyOrIndex++; | |
} | |
this.#enqueue({ type: 'value', path: this.#getCurrentPath(), value }); | |
this.#state = STATE.IN_ARRAY_AWAITING_SEPARATOR; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment