Created
March 28, 2024 08:35
-
-
Save SiZapPaaiGwat/0ef3e8b745ba89877cf0dc1b9dbe4d86 to your computer and use it in GitHub Desktop.
bigJsonParse - JSON.parse alternative for big json files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from "node:fs"; | |
function bigJsonParse( | |
jsonString, | |
{ | |
maxArrayItemThershold = Number.MAX_SAFE_INTEGER, | |
recordOversizedArrayThershold = 50, | |
} | |
) { | |
let oversizedArraysPaths = []; // 用于存储超出长度限制的数组路径 | |
// 解析给定的JSON字符串片段 | |
function parseValue(segment, currentPath = "") { | |
segment = segment.trim(); // 去除字符串首尾的空白字符 | |
// 根据字符串的起始字符判断该如何解析 | |
if (segment.startsWith("{")) { | |
return parseObject(segment, currentPath); | |
} else if (segment.startsWith("[")) { | |
return parseArray(segment, currentPath); | |
} else if (segment.startsWith('"')) { | |
// 解析字符串,去除两边的引号 | |
return segment.slice(1, segment.lastIndexOf('"')); | |
} else { | |
// 直接解析数字或布尔值 | |
return JSON.parse(segment); | |
} | |
} | |
// 解析数组,考虑嵌套和超出长度限制的情况 | |
function parseArray(segment, currentPath) { | |
const array = []; | |
let itemBuffer = ""; // 用于缓存当前解析项的字符 | |
let depth = 0; // 用于跟踪嵌套的深度 | |
let isInString = false; // 标记是否处于字符串内 | |
let escapeChar = false; // 标记转义字符 | |
// 逐字符解析数组内容 | |
for (let i = 1; i < segment.length - 1; i++) { | |
const char = segment[i]; | |
// 处理转义字符 | |
if (escapeChar) { | |
itemBuffer += char; | |
escapeChar = false; | |
continue; | |
} | |
// 进入或退出字符串 | |
if (char === '"' && !isInString) { | |
isInString = true; | |
} else if (char === '"' && isInString) { | |
isInString = false; | |
} else if (char === "\\" && isInString) { | |
escapeChar = true; | |
} | |
// 不在字符串中时,处理嵌套和项分隔符 | |
if (!isInString) { | |
if (char === "[" || char === "{") { | |
depth++; | |
} else if (char === "]" || char === "}") { | |
depth--; | |
} else if (char === "," && depth === 0) { | |
// 解析并添加当前项 | |
array.push( | |
parseValue(itemBuffer, `${currentPath}[${array.length - 1}]`) | |
); | |
itemBuffer = ""; | |
if (array.length >= maxArrayItemThershold) { | |
// 提前结束解析 | |
break; | |
} | |
continue; | |
} | |
} | |
itemBuffer += char; // 将字符添加到当前项缓存中 | |
} | |
// 解析并添加最后一项(如果有) | |
if (itemBuffer && array.length < maxArrayItemThershold) { | |
array.push(parseValue(itemBuffer, `${currentPath}[${array.length - 1}]`)); | |
} | |
if (array.length > recordOversizedArrayThershold) { | |
oversizedArraysPaths.push(currentPath); | |
} | |
return array; | |
} | |
// 解析对象,考虑属性值的嵌套 | |
function parseObject(segment, currentPath) { | |
const obj = {}; | |
let key = ""; | |
let value = ""; | |
let isInString = false; // 标记是否处于字符串内 | |
let isParsingKey = true; // 标记当前是否在解析键 | |
let braces = 0; // 用于跟踪嵌套的深度 | |
// 逐字符解析对象内容 | |
for (let i = 1; i < segment.length - 1; i++) { | |
const char = segment[i]; | |
// 进入或退出字符串 | |
if (char === '"' && segment[i - 1] !== "\\") { | |
isInString = !isInString; | |
} else if ((char === "{" || char === "[") && !isInString) { | |
braces++; | |
} else if ((char === "}" || char === "]") && !isInString) { | |
braces--; | |
} | |
// 处理键值对的分隔和项分隔符 | |
if (char === ":" && braces === 0 && !isInString) { | |
isParsingKey = false; | |
} else if (char === "," && braces === 0 && !isInString) { | |
// 解析并添加当前键值对 | |
const newPath = currentPath | |
? `${currentPath}.${key.trim().slice(1, -1)}` | |
: key.trim().slice(1, -1); | |
obj[key.trim().slice(1, -1)] = parseValue(value, newPath); | |
key = ""; | |
value = ""; | |
isParsingKey = true; | |
} else { | |
if (isParsingKey) { | |
key += char; // 添加字符到键 | |
} else { | |
value += char; // 添加字符到值 | |
} | |
} | |
} | |
// 解析并添加最后一个键值对(如果有) | |
if (key && value) { | |
const newPath = currentPath | |
? `${currentPath}.${key.trim().slice(1, -1)}` | |
: key.trim().slice(1, -1); | |
obj[key.trim().slice(1, -1)] = parseValue(value, newPath); | |
} | |
return obj; | |
} | |
// 解析入口点 | |
const result = parseValue(jsonString); | |
// 返回解析结果和超长数组路径 | |
return { result, oversizedArraysPaths }; | |
} | |
const { result, oversizedArraysPaths } = bigJsonParse( | |
fs.readFileSync("example.json", "utf8") | |
); | |
fs.writeFileSync("out.json", JSON.stringify(result, null, 2)); | |
console.log(oversizedArraysPaths); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment