Skip to content

Instantly share code, notes, and snippets.

@celsowm
Last active June 28, 2025 07:05
Show Gist options
  • Save celsowm/1cb86b7964651edd6a95b3ef1f50c23d to your computer and use it in GitHub Desktop.
Save celsowm/1cb86b7964651edd6a95b3ef1f50c23d to your computer and use it in GitHub Desktop.
JsonStreamParser.js
/**
* @typedef {'object' | 'array'} ContainerType
*/
/**
* @typedef {Object} ParserEvent
* @property {'objectStart' | 'objectEnd' | 'arrayStart' | 'arrayEnd' | 'key' | 'valueChunk' | 'value'} type
* @property {(string|number)[]} path
* @property {*} [value]
* @property {string} [chunk]
*/
// Parser States
const STATE = {
AWAITING_VALUE: 0,
IN_OBJECT_AWAITING_KEY: 1,
IN_OBJECT_AWAITING_COLON: 2,
IN_OBJECT_AWAITING_VALUE: 3,
IN_OBJECT_AWAITING_SEPARATOR: 4,
IN_ARRAY_AWAITING_VALUE: 5,
IN_ARRAY_AWAITING_SEPARATOR: 6,
IN_STRING: 7,
IN_NUMBER: 8,
IN_LITERAL: 9, // true, false, null
DONE: 10,
ERROR: 11,
};
export class JsonStreamParser {
// --- Private fields ---
#options;
#mode;
#lineBuffer;
#state;
#buffer;
#pos;
#isEscaped;
#valueBuffer;
#stringChunks;
#stack;
#controller;
// --- EventTarget public fields (when in 'event' mode) ---
addEventListener;
removeEventListener;
dispatchEvent;
/**
* @param {object} [o]
* @param {boolean} [o.keepFullString=true] - Se false, strings não são
* armazenadas inteiras (apenas chunks).
* @param {boolean} [o.sse=false] – Ative para aceitar fluxo
* `text/event-stream` (Server-Sent Events).
* @param {'transform'|'event'} [o.mode] – Default: 'transform'. Se não for
* fornecido, usa 'event' quando `sse:true`, senão 'transform'.
*/
constructor({ keepFullString = true, sse = false, mode } = {}) {
// ── configuração ────────────────────────────────────────────────
this.#options = { keepFullString, sse };
this.#mode = mode ?? (sse ? 'event' : 'transform');
// ── internals do parser ─────────────────────────────────────────
this.#lineBuffer = '';
this.#state = STATE.AWAITING_VALUE;
this.#buffer = '';
this.#pos = 0;
this.#isEscaped = false;
this.#valueBuffer = '';
this.#stringChunks = [];
/** @type {{type: ContainerType, keyOrIndex: string|number, parentState: number}[]} */
this.#stack = [];
this.#controller = null;
// ── se for modo EVENT injeta mini-EventTarget ──────────────────
if (this.#mode === 'event') {
const tgt = document.createDocumentFragment(); // leve!
this.addEventListener = tgt.addEventListener.bind(tgt);
this.removeEventListener = tgt.removeEventListener.bind(tgt);
this.dispatchEvent = tgt.dispatchEvent.bind(tgt);
}
}
// ───────────────── interface de TransformStream ──────────────────
start(controller) {
this.#controller = controller;
}
/**
* @param {string} chunk
* @param {TransformStreamDefaultController<ParserEvent>} [controller]
*/
transform(chunk, controller) {
if (controller && !this.#controller) this.#controller = controller;
if (this.#state === STATE.ERROR) return; // ALTERADO: Não para mais no DONE
let incoming = chunk;
if (this.#options.sse) {
incoming = this.#processSSEChunk(incoming);
if (!incoming) return; // only meta or [DONE]
}
this.#buffer += incoming;
this.#parse();
}
flush() {
// Se o stream termina com apenas whitespace, está OK.
if (this.#state === STATE.AWAITING_VALUE && this.#buffer.trim().length === 0) {
this.#state = STATE.DONE;
return;
}
// Qualquer outro estado que não seja DONE é um final inesperado.
if (this.#state !== STATE.DONE && this.#state !== STATE.AWAITING_VALUE) { // ALTERADO
this.#emitError('JSON incompleto no final do fluxo');
}
}
// ───────────────────── helpers comuns ────────────────────────────
/** @private */
#enqueue(ev) {
if (this.#mode === 'event') {
// No modo evento, o detalhe do evento é o próprio payload
this.dispatchEvent(new CustomEvent(ev.type, { detail: ev }));
} else if (this.#controller) {
this.#controller.enqueue(ev);
}
}
/** @private */
#emitError(msg) {
if (this.#state === STATE.ERROR) return; // Evita erros em cascata
const path = this.#getCurrentPath().join('.') || 'root';
const err = new Error(`${msg} em path ${path}`);
this.#state = STATE.ERROR;
// Limpa estado interno para parar o processamento
this.#buffer = '';
this.#valueBuffer = '';
this.#stringChunks = [];
this.#isEscaped = false;
if (this.#mode === 'event') {
this.dispatchEvent(new ErrorEvent('error', { error: err }));
} else if (this.#controller) {
this.#controller.error(err);
}
}
// ---- Core Parsing Logic ----
/** @private */
#getCurrentPath() {
return this.#stack
.filter(f => f.keyOrIndex !== undefined)
.map(f => f.keyOrIndex);
}
/** @private */
#parse() {
while (this.#pos < this.#buffer.length && this.#state < STATE.DONE) {
const char = this.#buffer[this.#pos];
// Ignora espaços em branco fora de strings
if (/\s/.test(char) && this.#state !== STATE.IN_STRING) {
this.#pos++;
continue;
}
this.#processChar(char);
}
// Libera a porção já processada do buffer
if (this.#pos > 0) {
this.#buffer = this.#buffer.slice(this.#pos);
this.#pos = 0;
}
}
/** @private */
#processChar(char) {
switch (this.#state) {
case STATE.AWAITING_VALUE:
case STATE.IN_OBJECT_AWAITING_VALUE:
case STATE.IN_ARRAY_AWAITING_VALUE:
this.#handleAwaitValue(char);
break;
case STATE.IN_OBJECT_AWAITING_KEY:
if (char === '"') { this.#pos++; this.#state = STATE.IN_STRING; }
else if (char === '}') { this.#closeContainer(); }
else { this.#emitError(`Esperava '"' para uma chave ou '}' mas recebeu '${char}'`); }
break;
case STATE.IN_OBJECT_AWAITING_COLON:
if (char === ':') { this.#pos++; this.#state = STATE.IN_OBJECT_AWAITING_VALUE; }
else { this.#emitError(`Esperava ':' mas recebeu '${char}'`); }
break;
case STATE.IN_OBJECT_AWAITING_SEPARATOR:
if (char === ',') { this.#pos++; this.#state = STATE.IN_OBJECT_AWAITING_KEY; }
else if (char === '}') { this.#closeContainer(); }
else { this.#emitError(`Esperava ',' ou '}' mas recebeu '${char}'`); }
break;
case STATE.IN_ARRAY_AWAITING_SEPARATOR:
if (char === ',') { this.#pos++; this.#state = STATE.IN_ARRAY_AWAITING_VALUE; }
else if (char === ']') { this.#closeContainer(); }
else { this.#emitError(`Esperava ',' ou ']' mas recebeu '${char}'`); }
break;
case STATE.IN_STRING: this.#handleInString(); break;
case STATE.IN_NUMBER: this.#handleInNumber(); break;
case STATE.IN_LITERAL: this.#handleInLiteral(); break;
default: this.#emitError('Estado desconhecido do parser.');
}
}
/** @private */
#handleAwaitValue(char) {
if (char === '"') { this.#pos++; this.#state = STATE.IN_STRING; }
else if (char === '{') { this.#openContainer('object'); }
else if (char === '[') { this.#openContainer('array'); }
else if (/[0-9\-]/.test(char)) { this.#state = STATE.IN_NUMBER; }
else if (/[tfn]/.test(char)) { this.#state = STATE.IN_LITERAL; }
else if (char === ']' && this.#stack.at(-1)?.type === 'array') { this.#closeContainer(); } // Lida com array vazio `[]`
else { this.#emitError(`Caractere inesperado '${char}' ao aguardar um valor`); }
}
/**
* Pré-processa pedaços de Server-Sent Events.
* @private
*/
#processSSEChunk(chunk) {
chunk = this.#lineBuffer + chunk;
this.#lineBuffer = '';
const lines = chunk.split(/\r?\n/);
if (!chunk.endsWith('\n') && !chunk.endsWith('\r')) {
this.#lineBuffer = lines.pop() || '';
}
let jsonPart = '';
for (const line of lines) {
if (line.length === 0 || line.startsWith(':')) continue; // Ignora keep-alive e comentários
if (line.startsWith('data:')) {
const payload = line.slice(5).trimStart();
if (payload === '[DONE]') {
this.#state = STATE.DONE;
continue;
}
jsonPart += payload + '\n';
}
// Ignora outras diretivas SSE como event:, retry:, id:
}
return jsonPart;
}
/**
* @param {ContainerType} type
* @private
*/
#openContainer(type) {
const parentFrame = this.#stack.at(-1);
const parentState = parentFrame ? (parentFrame.type === 'object' ? STATE.IN_OBJECT_AWAITING_SEPARATOR : STATE.IN_ARRAY_AWAITING_SEPARATOR) : STATE.DONE;
let path = this.#getCurrentPath();
if (parentFrame?.type === 'array') {
const newIndex = (parentFrame.keyOrIndex ?? -1) + 1;
parentFrame.keyOrIndex = newIndex;
path = [...path, newIndex];
}
const frame = { type, keyOrIndex: undefined, parentState };
this.#stack.push(frame);
if (type === 'object') {
this.#enqueue({ type: 'objectStart', path });
this.#state = STATE.IN_OBJECT_AWAITING_KEY;
} else { // array
this.#enqueue({ type: 'arrayStart', path });
this.#state = STATE.IN_ARRAY_AWAITING_VALUE;
}
this.#pos++;
}
/** @private */
#closeContainer() {
const frame = this.#stack.at(-1);
if (!frame) {
this.#emitError(`Tentativa de fechar um container que não foi aberto.`);
return;
}
const pathBeforePop = this.#getCurrentPath();
this.#stack.pop();
if (frame.type === 'object') {
this.#enqueue({ type: 'objectEnd', path: pathBeforePop });
} else { // array
this.#enqueue({ type: 'arrayEnd', path: pathBeforePop });
}
this.#state = frame.parentState;
// ALTERADO: Resetar o estado se um objeto de nível superior foi concluído.
if (this.#state === STATE.DONE) {
this.#state = STATE.AWAITING_VALUE;
}
const parentFrame = this.#stack.at(-1);
// [FIX] A completed container is a value. If its parent is an object,
// we must reset its `keyOrIndex` to signal that the key-value pair is complete.
// This allows the parser to correctly identify the next string as a new key.
if (parentFrame?.type === 'object') {
parentFrame.keyOrIndex = undefined;
}
this.#pos++;
}
/** @private */
#handleInString() {
const start = this.#pos;
while (this.#pos < this.#buffer.length) {
const char = this.#buffer[this.#pos];
if (this.#isEscaped) {
this.#isEscaped = false;
} else if (char === '\\') {
this.#isEscaped = true;
} else if (char === '"') {
const chunk = this.#buffer.substring(start, this.#pos);
if (chunk) {
this.#emitValueChunk(chunk);
if (this.#options.keepFullString) this.#stringChunks.push(chunk);
}
this.#pos++; // Consome a aspa final
let finalValue = null;
if (this.#options.keepFullString) {
try {
// Usa JSON.parse para decodificar escapes (\n, \uXXXX, etc.)
finalValue = JSON.parse(`"${this.#stringChunks.join('')}"`);
} catch (e) {
this.#emitError(`Sequência de string inválida: ${e.message}`);
return;
}
}
this.#finalizeValue(finalValue, 'string');
this.#stringChunks = [];
return;
}
this.#pos++;
}
// Se chegou aqui, a string não terminou. Guarda o chunk parcial.
const chunk = this.#buffer.substring(start, this.#pos);
if (chunk) {
this.#emitValueChunk(chunk);
if (this.#options.keepFullString) this.#stringChunks.push(chunk);
}
}
/**
* @param {'number' | 'literal'} finalType
* @private
*/
#handleSimpleValue(finalType) {
const regex = finalType === 'number' ? /[0-9\.eE\+\-]/ : /[a-z]/;
const start = this.#pos;
while (this.#pos < this.#buffer.length && regex.test(this.#buffer[this.#pos])) {
this.#pos++;
}
const chunk = this.#buffer.substring(start, this.#pos);
if (chunk) this.#valueBuffer += chunk;
// Se o buffer acabou, pode ser que o valor continue no próximo chunk
if (this.#pos === this.#buffer.length) {
// Exceção: número com expoente incompleto (ex: "1e", "1e-")
if (finalType === 'number' && /[eE][-+]?$/.test(this.#valueBuffer)) {
return; // Aguarda mais dados
}
return; // Aguarda mais dados
}
// Validação do valor completo
if (finalType === 'number') {
if (!/^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(this.#valueBuffer)) {
this.#emitError(`Formato de número inválido: '${this.#valueBuffer}'`);
return;
}
} else { // literal
if (this.#valueBuffer !== 'true' && this.#valueBuffer !== 'false' && this.#valueBuffer !== 'null') {
this.#emitError(`Literal inválido encontrado: '${this.#valueBuffer}'`);
return;
}
}
try {
const value = JSON.parse(this.#valueBuffer);
this.#finalizeValue(value, finalType);
} catch (e) {
this.#emitError(`Falha ao parsear valor ${finalType}: '${this.#valueBuffer}'`);
} finally {
this.#valueBuffer = '';
}
}
/** @private */
#handleInNumber() { this.#handleSimpleValue('number'); }
/** @private */
#handleInLiteral() { this.#handleSimpleValue('literal'); }
/** @private */
#emitValueChunk(chunk) {
const parentFrame = this.#stack.at(-1);
// Só emite chunk se estiver dentro de um objeto (como valor) ou array
if (!parentFrame || parentFrame.type === 'array' || parentFrame.keyOrIndex !== undefined) {
let path;
if (parentFrame?.type === 'array') {
const newIndex = (parentFrame.keyOrIndex ?? -1) + 1;
const currentPath = this.#getCurrentPath();
const containerPath = parentFrame.keyOrIndex !== undefined ? currentPath.slice(0, -1) : currentPath;
path = [...containerPath, newIndex];
} else {
path = this.#getCurrentPath();
}
this.#enqueue({ type: 'valueChunk', path, chunk });
}
}
/**
* @param {*} value
* @param {'string' | 'number' | 'literal'} valueType
* @private
*/
#finalizeValue(value, valueType) {
const parent = this.#stack.at(-1);
// Valor raiz (fora de qualquer objeto ou array)
if (!parent) {
this.#enqueue({ type: 'value', path: [], value });
this.#state = STATE.AWAITING_VALUE; // ALTERADO: Resetar para o próximo objeto JSON
return;
}
if (parent.type === 'object') {
// Se keyOrIndex está `undefined`, este valor é a CHAVE
if (parent.keyOrIndex === undefined) {
const containerPath = this.#getCurrentPath();
parent.keyOrIndex = value;
this.#enqueue({ type: 'key', path: containerPath, value });
this.#state = STATE.IN_OBJECT_AWAITING_COLON;
} else { // Senão, é o VALOR associado à chave anterior
const currentPath = this.#getCurrentPath();
this.#enqueue({ type: 'value', path: currentPath, value });
parent.keyOrIndex = undefined; // Reseta para a próxima chave
this.#state = STATE.IN_OBJECT_AWAITING_SEPARATOR;
}
} else if (parent.type === 'array') {
if (parent.keyOrIndex === undefined) {
parent.keyOrIndex = 0;
} else {
parent.keyOrIndex++;
}
this.#enqueue({ type: 'value', path: this.#getCurrentPath(), value });
this.#state = STATE.IN_ARRAY_AWAITING_SEPARATOR;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment