Zion Boggan
repos/Oversight/tests/test_jcs_canonical_unit.py
zionboggan.com ↗
124 lines · python
History for this file →
1
"""
2
test_jcs_canonical_unit
3
=======================
4
 
5
Byte-exact fixtures for the JSON Canonicalization Scheme (RFC 8785) port.
6
 
7
Background: the Rust reference uses ``serde_jcs::to_vec`` everywhere it
8
canonicalizes for signing or hashing. Python was historically on
9
``json.dumps(sort_keys=True, separators=(",",":")).encode("utf-8")``, which is
10
byte-identical to JCS for the ASCII-only subset but diverges for any non-ASCII
11
string value, because Python's default ``ensure_ascii=True`` escapes non-ASCII
12
as ``\\uXXXX`` while JCS emits raw UTF-8. That divergence was a latent threat
13
to the "bit-identical / conformance is ground truth" claim: any manifest,
14
tlog leaf, or evidence bundle containing a non-ASCII character would hash and
15
sign to different bytes across the two implementations.
16
 
17
These tests pin the JCS algorithm itself on known vectors (so a future
18
refactor cannot silently regress it), prove the non-ASCII divergence is
19
closed (the actual bug fix), and prove no regression for the existing
20
ASCII-only content (so committed fixtures and existing signatures stay valid).
21
"""
22
 
23
from __future__ import annotations
24
 
25
import json
26
import os
27
import sys
28
from pathlib import Path
29
 
30
ROOT = Path(__file__).resolve().parent.parent
31
sys.path.insert(0, str(ROOT))
32
 
33
from oversight_core.jcs import jcs_dumps
34
 
35
 
36
def test_primitives():
37
    assert jcs_dumps(None) == b"null"
38
    assert jcs_dumps(True) == b"true"
39
    assert jcs_dumps(False) == b"false"
40
    assert jcs_dumps(0) == b"0"
41
    assert jcs_dumps(42) == b"42"
42
    assert jcs_dumps(-1) == b"-1"
43
    assert jcs_dumps(9223372036854775807) == b"9223372036854775807"
44
    assert jcs_dumps("hello") == b'"hello"'
45
    assert jcs_dumps("") == b'""'
46
    assert jcs_dumps([]) == b"[]"
47
    assert jcs_dumps({}) == b"{}"
48
 
49
 
50
def test_key_sorting_nested():
51
    assert jcs_dumps({"b": 1, "a": 2}) == b'{"a":2,"b":1}'
52
    assert jcs_dumps({"z": 1, "a": {"y": 2, "x": 3}}) == b'{"a":{"x":3,"y":2},"z":1}'
53
    assert jcs_dumps([3, 1, 2]) == b"[3,1,2]"
54
 
55
 
56
def test_string_escapes():
57
    assert jcs_dumps('a"b') == b'"a\\"b"'
58
    assert jcs_dumps("a\\b") == b'"a\\\\b"'
59
    assert jcs_dumps("a\nb") == b'"a\\nb"'
60
    assert jcs_dumps("a\tb") == b'"a\\tb"'
61
    assert jcs_dumps("a\rb") == b'"a\\rb"'
62
    assert jcs_dumps("a\bb") == b'"a\\bb"'
63
    assert jcs_dumps("a\fb") == b'"a\\fb"'
64
    assert jcs_dumps("a\x01b") == b'"a\\u0001b"'
65
 
66
 
67
def test_non_ascii_emits_raw_utf8_not_uXXXX_escape():
68
    assert jcs_dumps({"name": "café"}) == b'{"name":"caf\xc3\xa9"}'
69
    assert jcs_dumps({"k": "日本"}) == b'{"k":"\xe6\x97\xa5\xe6\x9c\xac"}'
70
    assert jcs_dumps({"k": "𝄞"}) == b'{"k":"\xf0\x9d\x84\x9e"}'
71
 
72
 
73
def test_non_ascii_key_sort_order():
74
    out = jcs_dumps({"ñ": 3, "z": 2, "abc": 1})
75
    assert out == b'{"abc":1,"z":2,"\xc3\xb1":3}'
76
 
77
 
78
def test_floats_rejected():
79
    try:
80
        jcs_dumps(1.0)
81
        raise AssertionError("jcs_dumps accepted a float")
82
    except TypeError:
83
        pass
84
    try:
85
        jcs_dumps({"x": 1.5})
86
        raise AssertionError("jcs_dumps accepted a nested float")
87
    except TypeError:
88
        pass
89
 
90
 
91
def test_unsupported_types_rejected():
92
    for bad in (object(), b"bytes", set(), frozenset()):
93
        try:
94
            jcs_dumps(bad)
95
            raise AssertionError(f"jcs_dumps accepted {type(bad).__name__}")
96
        except TypeError:
97
            pass
98
 
99
 
100
def test_ascii_content_byte_identical_to_legacy_sort_keys():
101
    samples = [
102
        {"event": "register", "file_id": "f0", "n": 3},
103
        {"a": ["x", "y"], "b": {"c": True, "d": None}},
104
        {"size": 7, "root": "00" * 32, "signature": "ab" * 64},
105
    ]
106
    for s in samples:
107
        legacy = json.dumps(s, sort_keys=True, separators=(",", ":")).encode("utf-8")
108
        assert jcs_dumps(s) == legacy, (
109
            f"ASCII divergence!\n  legacy: {legacy!r}\n  jcs:    {jcs_dumps(s)!r}"
110
        )
111
 
112
 
113
def test_tuple_serializes_like_list():
114
    assert jcs_dumps((1, 2, 3)) == b"[1,2,3]"
115
 
116
 
117
def test_round_trip_through_json_parser():
118
    cases = [
119
        {"a": 1, "b": [True, None, "x"], "c": {"d": "café"}},
120
        {"issuer": "Zión@test", "hash": "ab" * 16},
121
    ]
122
    for c in cases:
123
        rt = json.loads(jcs_dumps(c).decode("utf-8"))
124
        assert rt == c