This commit is contained in:
Iliyan Angelov
2025-09-19 11:58:53 +03:00
parent 306b20e24a
commit 6b247e5b9f
11423 changed files with 1500615 additions and 778 deletions

View File

@@ -0,0 +1,5 @@
"""
Benchmarks for validation.
This package is *not* public API.
"""

View File

@@ -0,0 +1,30 @@
"""
A benchmark for comparing equivalent validation of `const` and `enum`.
"""
from pyperf import Runner
from jsonschema import Draft202012Validator
value = [37] * 100
const_schema = {"const": list(value)}
enum_schema = {"enum": [list(value)]}
valid = list(value)
invalid = [*valid, 73]
const = Draft202012Validator(const_schema)
enum = Draft202012Validator(enum_schema)
assert const.is_valid(valid)
assert enum.is_valid(valid)
assert not const.is_valid(invalid)
assert not enum.is_valid(invalid)
if __name__ == "__main__":
runner = Runner()
runner.bench_func("const valid", lambda: const.is_valid(valid))
runner.bench_func("const invalid", lambda: const.is_valid(invalid))
runner.bench_func("enum valid", lambda: enum.is_valid(valid))
runner.bench_func("enum invalid", lambda: enum.is_valid(invalid))

View File

@@ -0,0 +1,28 @@
"""
A benchmark for validation of the `contains` keyword.
"""
from pyperf import Runner
from jsonschema import Draft202012Validator
schema = {
"type": "array",
"contains": {"const": 37},
}
validator = Draft202012Validator(schema)
size = 1000
beginning = [37] + [0] * (size - 1)
middle = [0] * (size // 2) + [37] + [0] * (size // 2)
end = [0] * (size - 1) + [37]
invalid = [0] * size
if __name__ == "__main__":
runner = Runner()
runner.bench_func("baseline", lambda: validator.is_valid([]))
runner.bench_func("beginning", lambda: validator.is_valid(beginning))
runner.bench_func("middle", lambda: validator.is_valid(middle))
runner.bench_func("end", lambda: validator.is_valid(end))
runner.bench_func("invalid", lambda: validator.is_valid(invalid))

View File

@@ -0,0 +1,25 @@
"""
A performance benchmark using the example from issue #232.
See https://github.com/python-jsonschema/jsonschema/pull/232.
"""
from pathlib import Path
from pyperf import Runner
from referencing import Registry
from jsonschema.tests._suite import Version
import jsonschema
issue232 = Version(
path=Path(__file__).parent / "issue232",
remotes=Registry(),
name="issue232",
)
if __name__ == "__main__":
issue232.benchmark(
runner=Runner(),
Validator=jsonschema.Draft4Validator,
)

View File

@@ -0,0 +1,12 @@
"""
A performance benchmark using the official test suite.
This benchmarks jsonschema using every valid example in the
JSON-Schema-Test-Suite. It will take some time to complete.
"""
from pyperf import Runner
from jsonschema.tests._suite import Suite
if __name__ == "__main__":
Suite().benchmark(runner=Runner())

View File

@@ -0,0 +1,56 @@
"""
Validating highly nested schemas shouldn't cause exponential time blowups.
See https://github.com/python-jsonschema/jsonschema/issues/1097.
"""
from itertools import cycle
from jsonschema.validators import validator_for
metaschemaish = {
"$id": "https://example.com/draft/2020-12/schema/strict",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": True,
"https://json-schema.org/draft/2020-12/vocab/applicator": True,
"https://json-schema.org/draft/2020-12/vocab/unevaluated": True,
"https://json-schema.org/draft/2020-12/vocab/validation": True,
"https://json-schema.org/draft/2020-12/vocab/meta-data": True,
"https://json-schema.org/draft/2020-12/vocab/format-annotation": True,
"https://json-schema.org/draft/2020-12/vocab/content": True,
},
"$dynamicAnchor": "meta",
"$ref": "https://json-schema.org/draft/2020-12/schema",
"unevaluatedProperties": False,
}
def nested_schema(levels):
"""
Produce a schema which validates deeply nested objects and arrays.
"""
names = cycle(["foo", "bar", "baz", "quux", "spam", "eggs"])
schema = {"type": "object", "properties": {"ham": {"type": "string"}}}
for _, name in zip(range(levels - 1), names):
schema = {"type": "object", "properties": {name: schema}}
return schema
validator = validator_for(metaschemaish)(metaschemaish)
if __name__ == "__main__":
from pyperf import Runner
runner = Runner()
not_nested = nested_schema(levels=1)
runner.bench_func("not nested", lambda: validator.is_valid(not_nested))
for levels in range(1, 11, 3):
schema = nested_schema(levels=levels)
runner.bench_func(
f"nested * {levels}",
lambda schema=schema: validator.is_valid(schema),
)

View File

@@ -0,0 +1,42 @@
"""
A benchmark which tries to compare the possible slow subparts of validation.
"""
from referencing import Registry
from referencing.jsonschema import DRAFT202012
from rpds import HashTrieMap, HashTrieSet
from jsonschema import Draft202012Validator
schema = {
"type": "array",
"minLength": 1,
"maxLength": 1,
"items": {"type": "integer"},
}
hmap = HashTrieMap()
hset = HashTrieSet()
registry = Registry()
v = Draft202012Validator(schema)
def registry_data_structures():
return hmap.insert("foo", "bar"), hset.insert("foo")
def registry_add():
resource = DRAFT202012.create_resource(schema)
return registry.with_resource(uri="urn:example", resource=resource)
if __name__ == "__main__":
from pyperf import Runner
runner = Runner()
runner.bench_func("HashMap/HashSet insertion", registry_data_structures)
runner.bench_func("Registry insertion", registry_add)
runner.bench_func("Success", lambda: v.is_valid([1]))
runner.bench_func("Failure", lambda: v.is_valid(["foo"]))
runner.bench_func("Metaschema validation", lambda: v.check_schema(schema))

View File

@@ -0,0 +1,35 @@
"""
An unused schema registry should not cause slower validation.
"Unused" here means one where no reference resolution is occurring anyhow.
See https://github.com/python-jsonschema/jsonschema/issues/1088.
"""
from pyperf import Runner
from referencing import Registry
from referencing.jsonschema import DRAFT201909
from jsonschema import Draft201909Validator
registry = Registry().with_resource(
"urn:example:foo",
DRAFT201909.create_resource({}),
)
schema = {"$ref": "https://json-schema.org/draft/2019-09/schema"}
instance = {"maxLength": 4}
no_registry = Draft201909Validator(schema)
with_useless_registry = Draft201909Validator(schema, registry=registry)
if __name__ == "__main__":
runner = Runner()
runner.bench_func(
"no registry",
lambda: no_registry.is_valid(instance),
)
runner.bench_func(
"useless registry",
lambda: with_useless_registry.is_valid(instance),
)

View File

@@ -0,0 +1,106 @@
"""
A benchmark for validation of applicators containing lots of useless schemas.
Signals a small possible optimization to remove all such schemas ahead of time.
"""
from pyperf import Runner
from jsonschema import Draft202012Validator as Validator
NUM_USELESS = 100000
subschema = {"const": 37}
valid = 37
invalid = 12
baseline = Validator(subschema)
# These should be indistinguishable from just `subschema`
by_name = {
"single subschema": {
"anyOf": Validator({"anyOf": [subschema]}),
"allOf": Validator({"allOf": [subschema]}),
"oneOf": Validator({"oneOf": [subschema]}),
},
"redundant subschemas": {
"anyOf": Validator({"anyOf": [subschema] * NUM_USELESS}),
"allOf": Validator({"allOf": [subschema] * NUM_USELESS}),
},
"useless successful subschemas (beginning)": {
"anyOf": Validator({"anyOf": [subschema, *[True] * NUM_USELESS]}),
"allOf": Validator({"allOf": [subschema, *[True] * NUM_USELESS]}),
},
"useless successful subschemas (middle)": {
"anyOf": Validator(
{
"anyOf": [
*[True] * (NUM_USELESS // 2),
subschema,
*[True] * (NUM_USELESS // 2),
],
},
),
"allOf": Validator(
{
"allOf": [
*[True] * (NUM_USELESS // 2),
subschema,
*[True] * (NUM_USELESS // 2),
],
},
),
},
"useless successful subschemas (end)": {
"anyOf": Validator({"anyOf": [*[True] * NUM_USELESS, subschema]}),
"allOf": Validator({"allOf": [*[True] * NUM_USELESS, subschema]}),
},
"useless failing subschemas (beginning)": {
"anyOf": Validator({"anyOf": [subschema, *[False] * NUM_USELESS]}),
"oneOf": Validator({"oneOf": [subschema, *[False] * NUM_USELESS]}),
},
"useless failing subschemas (middle)": {
"anyOf": Validator(
{
"anyOf": [
*[False] * (NUM_USELESS // 2),
subschema,
*[False] * (NUM_USELESS // 2),
],
},
),
"oneOf": Validator(
{
"oneOf": [
*[False] * (NUM_USELESS // 2),
subschema,
*[False] * (NUM_USELESS // 2),
],
},
),
},
"useless failing subschemas (end)": {
"anyOf": Validator({"anyOf": [*[False] * NUM_USELESS, subschema]}),
"oneOf": Validator({"oneOf": [*[False] * NUM_USELESS, subschema]}),
},
}
if __name__ == "__main__":
runner = Runner()
runner.bench_func("baseline valid", lambda: baseline.is_valid(valid))
runner.bench_func("baseline invalid", lambda: baseline.is_valid(invalid))
for group, applicators in by_name.items():
for applicator, validator in applicators.items():
runner.bench_func(
f"{group}: {applicator} valid",
lambda validator=validator: validator.is_valid(valid),
)
runner.bench_func(
f"{group}: {applicator} invalid",
lambda validator=validator: validator.is_valid(invalid),
)

View File

@@ -0,0 +1,32 @@
"""
A benchmark for validation of schemas containing lots of useless keywords.
Checks we filter them out once, ahead of time.
"""
from pyperf import Runner
from jsonschema import Draft202012Validator
NUM_USELESS = 100000
schema = dict(
[
("not", {"const": 42}),
*((str(i), i) for i in range(NUM_USELESS)),
("type", "integer"),
*((str(i), i) for i in range(NUM_USELESS, NUM_USELESS)),
("minimum", 37),
],
)
validator = Draft202012Validator(schema)
valid = 3737
invalid = 12
if __name__ == "__main__":
runner = Runner()
runner.bench_func("beginning of schema", lambda: validator.is_valid(42))
runner.bench_func("middle of schema", lambda: validator.is_valid("foo"))
runner.bench_func("end of schema", lambda: validator.is_valid(12))
runner.bench_func("valid", lambda: validator.is_valid(3737))

View File

@@ -0,0 +1,14 @@
from pyperf import Runner
from jsonschema import Draft202012Validator
schema = {
"type": "array",
"minLength": 1,
"maxLength": 1,
"items": {"type": "integer"},
}
if __name__ == "__main__":
Runner().bench_func("validator creation", Draft202012Validator, schema)