jcrist · March 4, 2025 03:25 · jcrist · Jul 11, 2023
diff --git a/benchmark.py b/benchmark.py
 # This is a modified version of `orig_benchmark.py`, using different data to
 # highlight performance differences.
 import json
 import random
 import string
 import timeit
 from statistics import mean, stdev

 import orjson
 import simdjson
 import msgspec
 import pydantic


 def describe_json(buf: bytes) -> None:
    """Describe the type of values found in a JSON message"""
    json_types = [
        ("objects", dict),
        ("arrays", list),
        ("strs", str),
        ("ints", int),
        ("floats", float),
        ("bools", bool),
        ("nulls", type(None)),
    ]
    counts = dict.fromkeys([v for _, v in json_types], 0)

    def inner(obj):
        typ = type(obj)
        counts[typ] += 1
        if typ is list:
            for i in obj:
                inner(i)
        elif typ is dict:
            for k, v in obj.items():
                inner(k)
                inner(v)

    inner(msgspec.json.decode(buf))

    total = sum(counts.values())
    print("JSON Types:")

    results = [(k, counts[v]) for k, v in json_types if counts[v]]
    results.sort(key=lambda row: row[1], reverse=True)
    for kind, count in results:
        print(f"- {kind}: {count} ({count/total:.2f})")


 random.seed(42)


 def randstr():
    return "".join(random.choices(string.printable, k=10))


 class ItemStruct(msgspec.Struct):
    name: str
    value: int


 class UserStruct(msgspec.Struct):
    username: str
    exp: float
    level: float
    items: list[ItemStruct]


 class ItemPydantic(pydantic.BaseModel):
    name: str
    value: int


 class UserPydantic(pydantic.BaseModel):
    username: str
    exp: float
    level: float
    items: list[ItemPydantic]


 N = 10000
 msg = msgspec.json.encode(
    [
        UserStruct(
            randstr(),
            random.random(),
            random.uniform(0, 100),
            [
                ItemStruct(randstr(), random.randint(0, 100))
                for _ in range(random.randrange(10, 20))
            ],
        )
        for _ in range(N)
    ]
 )

 BENCHMARKS = [
    ("stdlib json", json.loads),
    ("orjson", orjson.loads),
    ("simdjson", simdjson.loads),
    ("msgspec-dict", msgspec.json.decode),
    ("msgspec-struct", msgspec.json.Decoder(list[UserStruct]).decode),
    ("pydantic-v2", pydantic.TypeAdapter(list[UserPydantic]).validate_json),
 ]

 describe_json(msg)

 print("")
 print("Benchmarks:")
 results = {}
 for name, fun in BENCHMARKS:
    results[name] = times = timeit.repeat(
        "loads(msg)",
        repeat=20,
        number=10,
        globals={"loads": fun, "msg": msg},
    )
    print(f"- {name}: {mean(times):.2f} ± {stdev(times):.2f}")
diff --git a/orig_benchmark.py b/orig_benchmark.py
 # This is a cleaned up version of the original benchmark. Semantically it's the
 # same, just a bit easier to read. The performance numbers also match those
 # seen on discord.
 import json
 import random
 import string
 import timeit
 from statistics import mean, stdev

 import orjson
 import simdjson
 import msgspec
 import pydantic


 def describe_json(buf: bytes) -> None:
    """Describe the type of values found in a JSON message"""
    json_types = [
        ("objects", dict),
        ("arrays", list),
        ("strs", str),
        ("ints", int),
        ("floats", float),
        ("bools", bool),
        ("nulls", type(None)),
    ]
    counts = dict.fromkeys([v for _, v in json_types], 0)

    def inner(obj):
        typ = type(obj)
        counts[typ] += 1
        if typ is list:
            for i in obj:
                inner(i)
        elif typ is dict:
            for k, v in obj.items():
                inner(k)
                inner(v)

    inner(msgspec.json.decode(buf))

    total = sum(counts.values())
    print("JSON Types:")

    results = [(k, counts[v]) for k, v in json_types if counts[v]]
    results.sort(key=lambda row: row[1], reverse=True)
    for kind, count in results:
        print(f"- {kind}: {count} ({count/total:.2f})")


 random.seed(42)


 def randstr():
    return "".join(random.choices(string.printable, k=10))


 class UserStruct(msgspec.Struct):
    username: str
    exp: float
    level: float
    last_values: list[float]


 class UserPydantic(pydantic.BaseModel):
    username: str
    exp: float
    level: float
    last_values: list[float]


 N = 10000
 msg = msgspec.json.encode(
    [
        UserStruct(
            randstr(),
            random.random(),
            random.uniform(0, 100),
            [random.uniform(0, 100) for _ in range(random.randrange(10, 20))],
        )
        for _ in range(N)
    ]
 )

 BENCHMARKS = [
    ("stdlib json", json.loads),
    ("orjson", orjson.loads),
    ("simdjson", simdjson.loads),
    ("msgspec-dict", msgspec.json.decode),
    ("msgspec-struct", msgspec.json.Decoder(list[UserStruct]).decode),
    ("pydantic-v2", pydantic.TypeAdapter(list[UserPydantic]).validate_json),
 ]

 describe_json(msg)

 print("")
 print("Benchmarks:")
 results = {}
 for name, fun in BENCHMARKS:
    results[name] = times = timeit.repeat(
        "loads(msg)",
        repeat=20,
        number=10,
        globals={"loads": fun, "msg": msg},
    )
    print(f"- {name}: {mean(times):.2f} ± {stdev(times):.2f}")
	# This is a modified version of `orig_benchmark.py`, using different data to
	# highlight performance differences.
	import json
	import random
	import string
	import timeit
	from statistics import mean, stdev

	import orjson
	import simdjson
	import msgspec
	import pydantic


	def describe_json(buf: bytes) -> None:
	"""Describe the type of values found in a JSON message"""
	json_types = [
	("objects", dict),
	("arrays", list),
	("strs", str),
	("ints", int),
	("floats", float),
	("bools", bool),
	("nulls", type(None)),
	]
	counts = dict.fromkeys([v for _, v in json_types], 0)

	def inner(obj):
	typ = type(obj)
	counts[typ] += 1
	if typ is list:
	for i in obj:
	inner(i)
	elif typ is dict:
	for k, v in obj.items():
	inner(k)
	inner(v)

	inner(msgspec.json.decode(buf))

	total = sum(counts.values())
	print("JSON Types:")

	results = [(k, counts[v]) for k, v in json_types if counts[v]]
	results.sort(key=lambda row: row[1], reverse=True)
	for kind, count in results:
	print(f"- {kind}: {count} ({count/total:.2f})")


	random.seed(42)


	def randstr():
	return "".join(random.choices(string.printable, k=10))


	class ItemStruct(msgspec.Struct):
	name: str
	value: int


	class UserStruct(msgspec.Struct):
	username: str
	exp: float
	level: float
	items: list[ItemStruct]


	class ItemPydantic(pydantic.BaseModel):
	name: str
	value: int


	class UserPydantic(pydantic.BaseModel):
	username: str
	exp: float
	level: float
	items: list[ItemPydantic]


	N = 10000
	msg = msgspec.json.encode(
	[
	UserStruct(
	randstr(),
	random.random(),
	random.uniform(0, 100),
	[
	ItemStruct(randstr(), random.randint(0, 100))
	for _ in range(random.randrange(10, 20))
	],
	)
	for _ in range(N)
	]
	)

	BENCHMARKS = [
	("stdlib json", json.loads),
	("orjson", orjson.loads),
	("simdjson", simdjson.loads),
	("msgspec-dict", msgspec.json.decode),
	("msgspec-struct", msgspec.json.Decoder(list[UserStruct]).decode),
	("pydantic-v2", pydantic.TypeAdapter(list[UserPydantic]).validate_json),
	]

	describe_json(msg)

	print("")
	print("Benchmarks:")
	results = {}
	for name, fun in BENCHMARKS:
	results[name] = times = timeit.repeat(
	"loads(msg)",
	repeat=20,
	number=10,
	globals={"loads": fun, "msg": msg},
	)
	print(f"- {name}: {mean(times):.2f} ± {stdev(times):.2f}")
	# This is a cleaned up version of the original benchmark. Semantically it's the
	# same, just a bit easier to read. The performance numbers also match those
	# seen on discord.
	import json
	import random
	import string
	import timeit
	from statistics import mean, stdev

	import orjson
	import simdjson
	import msgspec
	import pydantic


	def describe_json(buf: bytes) -> None:
	"""Describe the type of values found in a JSON message"""
	json_types = [
	("objects", dict),
	("arrays", list),
	("strs", str),
	("ints", int),
	("floats", float),
	("bools", bool),
	("nulls", type(None)),
	]
	counts = dict.fromkeys([v for _, v in json_types], 0)

	def inner(obj):
	typ = type(obj)
	counts[typ] += 1
	if typ is list:
	for i in obj:
	inner(i)
	elif typ is dict:
	for k, v in obj.items():
	inner(k)
	inner(v)

	inner(msgspec.json.decode(buf))

	total = sum(counts.values())
	print("JSON Types:")

	results = [(k, counts[v]) for k, v in json_types if counts[v]]
	results.sort(key=lambda row: row[1], reverse=True)
	for kind, count in results:
	print(f"- {kind}: {count} ({count/total:.2f})")


	random.seed(42)


	def randstr():
	return "".join(random.choices(string.printable, k=10))


	class UserStruct(msgspec.Struct):
	username: str
	exp: float
	level: float
	last_values: list[float]


	class UserPydantic(pydantic.BaseModel):
	username: str
	exp: float
	level: float
	last_values: list[float]


	N = 10000
	msg = msgspec.json.encode(
	[
	UserStruct(
	randstr(),
	random.random(),
	random.uniform(0, 100),
	[random.uniform(0, 100) for _ in range(random.randrange(10, 20))],
	)
	for _ in range(N)
	]
	)

	BENCHMARKS = [
	("stdlib json", json.loads),
	("orjson", orjson.loads),
	("simdjson", simdjson.loads),
	("msgspec-dict", msgspec.json.decode),
	("msgspec-struct", msgspec.json.Decoder(list[UserStruct]).decode),
	("pydantic-v2", pydantic.TypeAdapter(list[UserPydantic]).validate_json),
	]

	describe_json(msg)

	print("")
	print("Benchmarks:")
	results = {}
	for name, fun in BENCHMARKS:
	results[name] = times = timeit.repeat(
	"loads(msg)",
	repeat=20,
	number=10,
	globals={"loads": fun, "msg": msg},
	)
	print(f"- {name}: {mean(times):.2f} ± {stdev(times):.2f}")