Zion Boggan zionboggan.com ↗

Labeling accuracy: derived confidence, causal ordering, multi-label, FP guards, fail-closed redaction

Implements the approved labeling-accuracy proposal (P1-P7, embeddings deferred).

P7: secret-assignment redaction now fails closed on escaped JSON string values
whose escape-inflated length falls under the generic floor (e.g. {"api_key":"a\nz"});
adds a companion rule and end-to-end + unit fail-closed tests.

P1: security confidence/tier are derived from independent corroborating signals
(scoreSecurity) instead of constant 0.95/0.84 buckets; every contributing signal
is listed in the evidence text. A single strong signal still anchors at verified/0.95.

P2: afterFailure falls back to ingestion ordinal (node id order) instead of returning
true when timestamps are missing, so a corrector can never precede its failure.
Resolution returns honest null unless it shares evidence or is an explicit acceptance
turn, instead of guessing the temporally-nearest node.

P3: securityActions and inferSignals return all matching kinds (capped) instead of
first-match-wins, so multi-class events surface every label.

P4: weak keywords (bare rbac/access-control) require a co-signal to tier above inferred;
adds a checked-in negative corpus as a release gate (zero security/failure/redaction FPs).

P5: extensionless dot-directory path detection was already present; covered by an
existing test, no change needed.

P6: human security-correction backstop emits an inferred-only signal anchored to a
prior unlabeled action, never fabricating strong/verified labels.

Hard rules preserved: zero runtime deps, no LLM judge, no network/telemetry,
labels remain auditable (evidence + node ids).
81d5699   Zion Boggan committed on Jun 15, 2026 (1 week ago)
src/analyze.js +224 -39
@@ -50,6 +50,7 @@ const STOPWORDS = new Set([
'when', 'where', 'which', 'will', 'about', 'agent', 'make', 'made', 'show', 'look',
]);
+const PROCESS_LABEL_CAP = 2;
const CONSTRAINT_PER_NODE_CAP = 3;
const CONSTRAINT_LIST_CAP = 10;
const CONSTRAINT_CLAUSE_MAX = 160;
@@ -106,6 +107,15 @@ const TEST_SKIP_API_RE =
const TEST_SKIP_RE =
/\b(?:disabl|skip|remov|delet|comment(?:ed)? out|drop|turn(?:ed)? off|x?(?:it|describe)\.skip|--no-tests?|--skip-tests?)\w*\b[^.\n]{0,24}\btests?\b|\btests?\b[^.\n]{0,24}\b(?:disabl|skip|remov|delet|comment(?:ed)? out|turn(?:ed)? off)\w*/i;
+// P6: strong human security-correction phrasing. Used as a corroborating co-signal and as
+// the inferred-tier recall backstop (must never mint a strong/verified label by itself).
+const SECURITY_CORRECTION_RE =
+ /\b(?:don'?t|do not|never)\b[^.]{0,30}\b(?:leak|expose|commit|hardcode|hard[- ]?code|push|publish)\b[^.]{0,30}\b(?:secret|secrets|token|tokens|key|keys|credential|credentials|password|passwords|env|api)\b|\b(?:rotate|revoke|regenerate|invalidate)\b[^.]{0,25}\b(?:that|the|this|those|your|my)?\s*(?:secret|token|key|credential|password|pat|api[- ]?key|access token)\b|\bthat'?s? (?:a|the|my|our) (?:secret|credential|api[- ]?key|token|password)\b|\b(?:revert|undo|roll ?back)\b[^.]{0,25}\b(?:the|that|those)?\s*(?:auth|security|permission|access[- ]?control|rbac|credential)\b|\b(?:you|it)\b[^.]{0,20}\b(?:leaked|exposed|hardcoded|hard[- ]?coded|committed)\b[^.]{0,25}\b(?:secret|token|key|credential|password|env)\b/i;
+
+function hasSecurityCorrection(text) {
+ return typeof text === 'string' && text.length <= 4000 && SECURITY_CORRECTION_RE.test(text);
+}
+
export function classifySecuritySurface(file) {
if (!file) return null;
for (const rule of SECURITY_SURFACE_RULES) {
@@ -126,31 +136,87 @@ export function mentionsTestSkip(text) {
);
}
+// P3: return ALL matching kinds per action instead of first-match-wins, so a node that
+// is both a credential leak and a disabled-test (etc.) surfaces every class. Each kind
+// carries its own strong/weak flag and the body that triggered it (for the audit trail).
+// `weak` marks a lone keyword (bare rbac/access-control) that needs a co-signal (P4).
function securityActions(node) {
const out = [];
for (const a of node.actions || []) {
const body = `${a.command || ''} ${a.input || ''}`;
- let kind = null;
- let strong = false;
- if (SECRET_CONTENT_RE.test(body)) {
- kind = 'credential';
- strong = true;
- } else if (a.file && isCredentialFile(a.file)) {
- kind = 'file';
- strong = true;
- } else if (ACCESS_CONTROL_CONTENT_RE.test(body)) {
- kind = 'access-control';
- strong = true;
- } else if (a.command && RISKY_CMD_RE.test(a.command)) {
- kind = 'risky-command';
- } else if (ACCESS_CONTROL_WEAK_RE.test(body)) {
- kind = 'access-control';
+ const kinds = [];
+ if (SECRET_CONTENT_RE.test(body)) kinds.push({ kind: 'credential', strong: true });
+ if (a.file && isCredentialFile(a.file)) kinds.push({ kind: 'file', strong: true });
+ if (ACCESS_CONTROL_CONTENT_RE.test(body)) kinds.push({ kind: 'access-control', strong: true });
+ if (a.command && RISKY_CMD_RE.test(a.command)) kinds.push({ kind: 'risky-command', strong: false });
+ // Weak keyword: only counts when no strong access-control content already fired on this action.
+ if (ACCESS_CONTROL_WEAK_RE.test(body) && !kinds.some((k) => k.kind === 'access-control')) {
+ kinds.push({ kind: 'access-control', strong: false, weak: true });
}
- if (kind) out.push({ action: a, kind, strong });
+ for (const k of kinds) out.push({ action: a, ...k });
}
return out;
}
+// Anchor confidences kept stable so existing tiers/numbers do not regress:
+// one strong signal -> verified / 0.95 (unchanged anchor the suite asserts on)
+// weak-only + cosignal-> high / 0.84
+// inferred backstops -> 0.62-0.70
+const SECURITY_STRONG_BASE = 0.95;
+const SECURITY_WEAK_BASE = 0.84;
+
+// P1: derive a security signal's confidence and tier from how many INDEPENDENT signals
+// corroborate it, instead of a constant two-bucket value. Each contributing signal is
+// listed in the evidence text (with node ids upstream) so the verdict stays auditable.
+// P4: a lone weak keyword (bare rbac/access-control) scores low and lands `inferred`
+// unless a real co-signal (credential content, security surface file, or human security
+// correction) is present.
+function scoreSecurity({ secActs, surface, humanCorrection }) {
+ const signals = [];
+ const strongActs = secActs.filter((s) => s.strong);
+ const weakActs = secActs.filter((s) => !s.strong);
+ const hasStrong = strongActs.length > 0;
+ const hasWeakKeywordOnly = !hasStrong && secActs.some((s) => s.weak);
+
+ if (strongActs.some((s) => s.kind === 'credential')) signals.push('strong credential content');
+ if (strongActs.some((s) => s.kind === 'file')) signals.push('credential filename');
+ if (strongActs.some((s) => s.kind === 'access-control')) signals.push('access-control command');
+ if (weakActs.some((s) => s.kind === 'risky-command')) signals.push('risky command');
+ if (weakActs.some((s) => s.weak)) signals.push('access-control keyword');
+ if (surface) signals.push(`security surface (${surface})`);
+ if (humanCorrection) signals.push('human security correction');
+
+ // Independent corroboration count beyond the primary signal nudges confidence within band.
+ const corroboration = Math.max(0, signals.length - 1);
+
+ let tier;
+ let base;
+ if (hasStrong) {
+ tier = 'verified';
+ base = SECURITY_STRONG_BASE;
+ } else if (hasWeakKeywordOnly) {
+ // P4 co-signal gate: a bare keyword with a real co-signal earns `high`; alone it stays `inferred`.
+ const cosignal = Boolean(surface) || humanCorrection || weakActs.some((s) => s.kind === 'risky-command');
+ if (cosignal) {
+ tier = 'high';
+ base = SECURITY_WEAK_BASE;
+ } else {
+ tier = 'inferred';
+ base = 0.62;
+ }
+ } else {
+ // risky-command (no keyword) or surface-only corroboration
+ tier = 'high';
+ base = SECURITY_WEAK_BASE;
+ }
+
+ // Within-band lift from extra corroboration, clamped to the band ceiling so the
+ // verified anchor (0.95) and existing assertions never move.
+ const ceiling = tier === 'verified' ? 0.95 : tier === 'high' ? 0.9 : 0.7;
+ const confidence = Math.min(ceiling, Math.round((base + 0.02 * corroboration) * 100) / 100);
+ return { tier, confidence, signals };
+}
+
function fileHint(node) {
for (const a of node.actions || []) {
if (a.file) return a.file;
@@ -306,14 +372,18 @@ export function analyzeTree(tree) {
return failure;
};
+ const securityNodeIds = new Set();
tree.nodes.forEach((node, index) => {
const secActs = securityActions(node);
if (secActs.length) {
- const hasStrong = secActs.some((s) => s.strong);
- const tier = hasStrong ? 'verified' : 'high';
- const confidence = hasStrong ? 0.95 : 0.84;
+ // P1: corroborating co-signals -- surface class on a touched file, and a human
+ // security correction that points back at this node -- feed the derived score.
+ const surface = uniq((node.actions || []).map((a) => classifySecuritySurface(a.file))).filter(Boolean)[0] || null;
+ const humanCorrection =
+ node.kind !== 'correction' ? Boolean(nearestSecurityCorrection(tree.nodes, node)) : false;
+ const { tier, confidence, signals } = scoreSecurity({ secActs, surface, humanCorrection });
const targets = uniq(secActs.map((s) => s.action.file || s.action.command || s.action.input)).slice(0, 3);
- const kinds = uniq(secActs.map((s) => s.kind));
+ const kinds = uniq(secActs.map((s) => s.kind)); // P3: every matching class, not first-match-wins
addFailure({
type: 'security_or_privacy_risk',
confidence,
@@ -321,9 +391,10 @@ export function analyzeTree(tree) {
failureNode: node,
correctionNode: node.kind === 'correction' ? null : nearestCorrectionAfter(tree.nodes, node),
resolvedNode: nearestAcceptedAfter(tree.nodes, node, null),
- evidence: `Agent action touched ${kinds.join(', ')}: ${targets.map((t) => `"${truncate(String(t), 80)}"`).join(', ')}`,
+ evidence: `Agent action touched ${kinds.join(', ')} [signals: ${signals.join('; ')}]: ${targets.map((t) => `"${truncate(String(t), 80)}"`).join(', ')}`,
summary: `An agent action touched auth, secrets, or access control near "${truncate(node.title, 90)}".`,
});
+ securityNodeIds.add(node.id);
} else if (node.text.length <= 1200 && SECURITY_INTENT_RE.test(node.text)) {
addFailure({
type: 'security_or_privacy_risk',
@@ -335,6 +406,30 @@ export function analyzeTree(tree) {
evidence: `User stated a security-sensitive intent: "${quote(node.text)}"`,
summary: `A security-sensitive intent was stated near "${truncate(node.title, 90)}".`,
});
+ securityNodeIds.add(node.id);
+ }
+
+ // P6: human-correction security-recall backstop. A human turn with a strong security
+ // correction ("don't leak that", "rotate that key", "revert the auth change") whose
+ // corrected (prior) node carried NO security label catches a real security event whose
+ // action phrasing missed the keyword list. Strictly `inferred` and human-grounded -- it
+ // never fabricates a strong/verified label.
+ if (hasSecurityCorrection(node.text)) {
+ const prior = nearestFailureTarget(node, tree.nodes);
+ const anchor = prior ? prior.target : null;
+ if (anchor && !securityNodeIds.has(anchor.id) && anchor.id !== node.id) {
+ addFailure({
+ type: 'security_or_privacy_risk',
+ confidence: 0.62,
+ tier: 'inferred',
+ failureNode: anchor,
+ correctionNode: node,
+ resolvedNode: nearestAcceptedAfter(tree.nodes, anchor, node),
+ evidence: `Human flagged a security concern about a prior action with no security label [signal: human security correction]: "${quote(node.text)}"`,
+ summary: `A human security correction was raised near "${truncate(anchor.title, 90)}" with no matching action-level signal.`,
+ });
+ securityNodeIds.add(anchor.id);
+ }
}
if (node.status === 'abandoned') {
@@ -681,10 +776,18 @@ function inferSignals(node) {
if (!matched.size && node.kind === 'correction') consider('misunderstood_goal', 0.62);
if (!matched.size) return [];
+ // P3: return all matching process kinds in priority order (capped) instead of
+ // first-match-wins, so a node that is e.g. both scope_drift and ignored_constraint
+ // surfaces both. misunderstood_goal is a fallback-only label and never co-emits.
+ const out = [];
for (const type of SIGNAL_PRIORITY) {
- if (matched.has(type)) return [{ type, confidence: matched.get(type) }];
+ if (type === 'misunderstood_goal') continue;
+ if (matched.has(type)) out.push({ type, confidence: matched.get(type) });
}
- return [];
+ if (!out.length && matched.has('misunderstood_goal')) {
+ return [{ type: 'misunderstood_goal', confidence: matched.get('misunderstood_goal') }];
+ }
+ return out.slice(0, PROCESS_LABEL_CAP);
}
function tsOf(node) {
@@ -692,11 +795,29 @@ function tsOf(node) {
return Number.isFinite(t) ? t : null;
}
+// Ingestion ordinal: node ids are assigned in stream order as `node_NNN` (src/tree.js),
+// so the numeric suffix is a stable parse-time ordinal. This is the causality tiebreak
+// used when timestamps are missing, instead of optimistically returning true (STRUCT-1).
+function ordinalOf(node) {
+ if (!node) return null;
+ if (Number.isFinite(node._ord)) return node._ord;
+ const m = /(\d+)\s*$/.exec(String(node.id || ''));
+ return m ? Number(m[1]) : null;
+}
+
+// P2: when timestamps are present, enforce ts ordering. When either timestamp is
+// missing, fall back to ingestion-ordinal ordering rather than returning true, so
+// timestamp-less adapters still get a real causal ordering and a corrector can never
+// be linked to a failure it preceded in the stream.
function afterFailure(candidate, failureNode) {
const ct = tsOf(candidate);
const ft = tsOf(failureNode);
- if (ct === null || ft === null) return true;
- return ct >= ft;
+ if (ct !== null && ft !== null) return ct >= ft;
+ const co = ordinalOf(candidate);
+ const fo = ordinalOf(failureNode);
+ if (co !== null && fo !== null) return co >= fo;
+ // No timestamp and no ordinal on either side: cannot establish ordering -> fail closed.
+ return false;
}
function actionFiles(node) {
@@ -712,8 +833,17 @@ function sharedFiles(a, b) {
function tokenSet(node) {
const out = new Set();
- for (const raw of String(node.text || '').toLowerCase().match(/[a-z][a-z0-9_-]{2,}/g) || []) {
- if (!STOPWORDS.has(raw)) out.add(raw);
+ const harvest = (s) => {
+ for (const raw of String(s || '').toLowerCase().match(/[a-z][a-z0-9_-]{2,}/g) || []) {
+ if (!STOPWORDS.has(raw)) out.add(raw);
+ }
+ };
+ harvest(node.text);
+ // Include path tokens from this node's action files so a correction that names the
+ // touched surface ("the auth flow") ties back to an edit of `src/auth/session.ts`.
+ // This strengthens semantic linkage (STRUCT-3) without temporal guessing.
+ for (const a of node.actions || []) {
+ if (a.file) harvest(String(a.file).replace(/[\\/.+_-]+/g, ' '));
}
return out;
}
@@ -727,8 +857,25 @@ function tokenOverlap(a, b) {
return hits;
}
+// Distinctive surface tokens: a single shared one between a security-file edit and a
+// correction is a strong semantic tie (e.g. an `auth/session.ts` edit + "fix the auth flow"),
+// where generic token overlap >= 3 would miss the link.
+const SURFACE_TOKENS = new Set([
+ 'auth', 'session', 'login', 'signin', 'signup', 'oauth', 'jwt', 'sso', 'saml',
+ 'secret', 'secrets', 'credential', 'credentials', 'password', 'token', 'apikey',
+ 'rbac', 'permission', 'permissions', 'middleware', 'crypto', 'encrypt', 'decrypt',
+]);
+
+function sharedSurfaceToken(a, b) {
+ const ta = tokenSet(a);
+ const tb = tokenSet(b);
+ for (const t of ta) if (SURFACE_TOKENS.has(t) && tb.has(t)) return true;
+ return false;
+}
+
function sharesEvidence(failureNode, candidate) {
if (sharedFiles(failureNode, candidate)) return true;
+ if (sharedSurfaceToken(failureNode, candidate)) return true;
return tokenOverlap(failureNode, candidate) >= 3;
}
@@ -737,7 +884,7 @@ function nearestFailureTarget(node, nodes) {
(n) => n.status !== 'abandoned' && n.id !== node.id && afterFailure(node, n)
);
if (!earlier.length) return null;
- earlier.sort((a, b) => (tsOf(b) ?? 0) - (tsOf(a) ?? 0));
+ earlier.sort((a, b) => orderAfter(b, a));
const semantic = earlier.find((n) => sharesEvidence(n, node));
if (semantic) return { target: semantic, linkage: 'semantic' };
if (node.parent && node.parent.status !== 'abandoned' && node.parent.id !== node.id && afterFailure(node, node.parent)) {
@@ -746,25 +893,63 @@ function nearestFailureTarget(node, nodes) {
return { target: earlier[0], linkage: 'positional' };
}
+// Acceptance/confirmation cue: an explicit "looks good / that works / fixed" turn is a
+// semantic resolution even when it shares no tokens or files with the failure.
+const ACCEPTANCE_RE =
+ /\b(?:that(?:'?s| is| works| fixed)|works now|looks? good|lgtm|perfect|great|nice|fixed|resolved|that did it|that worked|much better|exactly|correct now)\b/i;
+
+function laterCandidates(nodes, failureNode, anchor, extraExcludeId) {
+ return nodes
+ .filter((n) => n.status !== 'abandoned' && n.id !== failureNode.id && afterFailure(n, anchor))
+ .filter((n) => !extraExcludeId || n.id !== extraExcludeId)
+ .sort(orderAfter);
+}
+
+function orderAfter(a, b) {
+ const ta = tsOf(a);
+ const tb = tsOf(b);
+ if (ta !== null && tb !== null) return ta - tb;
+ return (ordinalOf(a) ?? Infinity) - (ordinalOf(b) ?? Infinity);
+}
+
+// P2: only return a resolution when it actually ties back to the failure -- it shares
+// evidence (file or token overlap) OR it is an explicit acceptance/confirmation turn.
+// Otherwise return null. An honest null beats the temporally-nearest node, which is
+// frequently just "the next thing that happened" and poisons eval candidates.
function nearestAcceptedAfter(nodes, failureNode, correctionNode) {
const anchor = correctionNode || failureNode;
- const later = nodes
- .filter((n) => n.status !== 'abandoned' && n.id !== failureNode.id && afterFailure(n, anchor))
- .filter((n) => !correctionNode || n.id !== correctionNode.id);
+ const later = laterCandidates(nodes, failureNode, anchor, correctionNode?.id);
if (!later.length) return null;
- later.sort((a, b) => (tsOf(a) ?? Infinity) - (tsOf(b) ?? Infinity));
const semantic = later.find((n) => sharesEvidence(failureNode, n));
- return semantic || later[0];
+ if (semantic) return semantic;
+ const accepted = later.find((n) => ACCEPTANCE_RE.test(String(n.text || '')));
+ return accepted || null;
}
+// P2: only treat a later correction as the corrector when it semantically ties back to
+// the failure (shared evidence). A correction that merely happened later, about something
+// else, is not the corrector -- return null and let the signal stand uncorrected.
function nearestCorrectionAfter(nodes, failureNode) {
- const later = nodes.filter(
- (n) => n.status !== 'abandoned' && n.kind === 'correction' && n.id !== failureNode.id && afterFailure(n, failureNode)
- );
+ const later = nodes
+ .filter((n) => n.status !== 'abandoned' && n.kind === 'correction' && n.id !== failureNode.id && afterFailure(n, failureNode))
+ .sort(orderAfter);
if (!later.length) return null;
- later.sort((a, b) => (tsOf(a) ?? Infinity) - (tsOf(b) ?? Infinity));
- const semantic = later.find((n) => sharesEvidence(failureNode, n));
- return semantic || later[0];
+ return later.find((n) => sharesEvidence(failureNode, n)) || null;
+}
+
+// Co-signal lookup for P1: a later human turn that both carries security-correction
+// phrasing and ties back to this node by shared evidence corroborates the signal.
+function nearestSecurityCorrection(nodes, failureNode) {
+ const later = nodes
+ .filter(
+ (n) =>
+ n.status !== 'abandoned' &&
+ n.id !== failureNode.id &&
+ afterFailure(n, failureNode) &&
+ hasSecurityCorrection(n.text)
+ )
+ .sort(orderAfter);
+ return later.find((n) => sharesEvidence(failureNode, n)) || null;
}
function tierRank(tier) {
src/redact.js +5 -0
@@ -26,6 +26,11 @@ export const RULES = [
{ id: 'url-basic-auth', severity: 'medium', re: /\b[a-z][a-z0-9+.-]{0,30}:\/\/[^/\s:@'"`]{2,256}:[^/\s@'"`]{2,256}@[^\s'"`]{1,512}/gi },
{ id: 'bearer-header', severity: 'medium', re: /\bBearer\s+[A-Za-z0-9._+/=-]{20,}\b/g },
{ id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.){4,512}"|'(?:[^'\\]|\\.){4,512}'|`(?:[^`\\]|\\.){4,512}`|[^\s'"`,;){}]{6,512})/gi },
+ // Fail-closed companion: a secret-key assignment whose quoted value contains ANY backslash escape
+ // is redacted even when the escape-inflated character count falls under the generic floor above.
+ // Escaped JSON string values (literal \n, \t, \", \\) are the common serialized form of a secret;
+ // counting an escape as two characters must never let a short escaped value slip the gate.
+ { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.)*?\\.(?:[^"\\]|\\.)*?"|'(?:[^'\\]|\\.)*?\\.(?:[^'\\]|\\.)*?'|`(?:[^`\\]|\\.)*?\\.(?:[^`\\]|\\.)*?`)/gi },
{ id: 'email', severity: 'soft', re: /\b[A-Za-z0-9._%+-]+@(?!(?:users\.noreply\.github\.com|example\.(?:com|org)))[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g },
{ id: 'ipv4', severity: 'soft', re: /\b(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\b(?!\.\d)/g },
test/treetrace.test.js +263 -0
@@ -1360,3 +1360,266 @@ test('cli: --stdin --from claude is rejected', () => {
assert.throws(() => parseArgs(['--stdin', '--from', 'claude']), /cannot be combined with --from claude/);
});
+
+// ---------------------------------------------------------------------------
+// Labeling-accuracy fixes (proposal P1-P7) + negative-corpus release gate.
+// ---------------------------------------------------------------------------
+
+test('P7: short escaped-JSON secret values fail closed (redaction gate)', () => {
+ // Escape-inflated character counts must never let a short escaped value slip the floor.
+ const cases = [
+ ['short escaped newline', '{"api_key":"a\\nz"}'],
+ ['tiny escaped value', '{"api_key":"x\\ny"}'],
+ ['escaped quote', '{"token":"a\\"b"}'],
+ ['escaped backslash', '{"secret":"a\\\\b"}'],
+ ['spec literal-\\n form', '{"api_key":"line1\\nline2line2line2"}'],
+ ];
+ for (const [label, sample] of cases) {
+ const hits = scanText(sample).map((f) => f.ruleId);
+ assert.ok(hits.includes('secret-assignment'), `${label}: escaped secret must be caught (got ${JSON.stringify(hits)})`);
+ }
+ // Must not over-fire on benign short non-escaped values or placeholders.
+ assert.equal(scanText('{"api_key":"ab"}').length, 0, 'benign short value below floor must stay clean');
+ assert.equal(scanText('{"api_key":"${SECRET}"}').filter((f) => f.ruleId === 'secret-assignment').length, 0, 'placeholder must stay clean');
+});
+
+test('P7: a short escaped-JSON secret leaves no raw value in any artifact end to end', async () => {
+ const rawValue = 'a\\nz';
+ const secretLine = `config is {"api_key":"${rawValue}"}`;
+ const dir = mkdtempSync(join(tmpdir(), 'treetrace-p7-'));
+ const file = join(dir, 'escconv.json');
+ const convo = [{
+ mapping: {
+ r: { message: null, parent: null, children: ['u'] },
+ u: { message: { author: { role: 'user' }, content: { parts: [secretLine] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
+ a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] },
+ },
+ }];
+ writeFileSync(file, JSON.stringify(convo));
+ try {
+ await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']);
+ const artifacts = [
+ 'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json',
+ '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md',
+ ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n');
+ assert.ok(!artifacts.includes(rawValue), 'raw short escaped-JSON secret leaked into an artifact');
+ assert.ok(artifacts.includes('[REDACTED:secret-assignment]'), 'expected a secret-assignment redaction marker');
+ } finally {
+ rmSync(dir, { recursive: true, force: true });
+ }
+});
+
+test('P1: a single strong security signal stays verified at exactly 0.95', () => {
+ const node = {
+ id: 'node_001', text: 'harden auth', title: 'harden auth', kind: 'root', status: 'accepted', parent: null,
+ actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'm' }],
+ };
+ const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk');
+ assert.ok(sec && sec.tier === 'verified' && sec.confidence === 0.95, 'strong anchor must remain verified/0.95');
+});
+
+test('P1: confidence is derived from corroboration and the contributing signals are in the evidence', () => {
+ // Many independent signals (credential content + credential file + risky cmd + surface) vs one weak keyword.
+ const strong = {
+ id: 'node_001', text: 'deploy', title: 'deploy', kind: 'root', status: 'accepted', parent: null,
+ actions: [{ tool: 'Bash', file: 'src/auth/session.ts', command: '. /srv/app/.env; rm -rf /tmp/x; chmod 777 /etc', input: '. /srv/app/.env; rm -rf /tmp/x; chmod 777 /etc', model: 'm' }],
+ };
+ const strongSec = analyzeTree({ nodes: [strong] }).failures.find((f) => f.type === 'security_or_privacy_risk');
+ assert.equal(strongSec.tier, 'verified');
+ assert.ok(/signals:/.test(strongSec.evidence), 'evidence must list the contributing signals (auditable)');
+ assert.ok(/strong credential content/.test(strongSec.evidence), 'evidence must name the strong credential signal');
+
+ const weak = {
+ id: 'node_001', text: 'edit detector', title: 'x', kind: 'root', status: 'accepted', parent: null,
+ actions: [{ tool: 'Edit', file: 'src/analyze.js', input: 'const ACCESS = /rbac/i;', command: null, model: 'm' }],
+ };
+ const weakSec = analyzeTree({ nodes: [weak] }).failures.find((f) => f.type === 'security_or_privacy_risk');
+ // Derived: the lone-weak-keyword score must be strictly below the strong score.
+ assert.ok(weakSec.confidence < strongSec.confidence, 'lone weak keyword must score below a multi-signal strong event');
+});
+
+test('P2: afterFailure does not link a corrector that precedes its failure when timestamps are missing', () => {
+ // Ingestion ordinal (node id suffix) is the tiebreak: node_001 precedes node_002 in the stream.
+ const failure = {
+ id: 'node_002', text: 'the deck still does not render here', title: 'still broken', kind: 'direction', status: 'accepted', parent: null,
+ actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'm' }],
+ };
+ const earlier = {
+ id: 'node_001', text: 'no that is wrong redo the deck here please', title: 'redo', kind: 'correction', status: 'accepted', parent: failure,
+ actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'm' }],
+ };
+ const analysis = analyzeTree({ nodes: [failure, earlier] });
+ for (const f of analysis.failures) {
+ if (!f.correctedByNodeId) continue;
+ const fo = Number(/(\d+)$/.exec(f.firstSeenNodeId)[1]);
+ const co = Number(/(\d+)$/.exec(f.correctedByNodeId)[1]);
+ assert.ok(co >= fo, `failure ${f.id} corrected by an earlier-ordinal node`);
+ }
+});
+
+test('P2: resolvedBy is null when no resolution ties back to the failure, instead of the temporally-nearest node', () => {
+ const failure = {
+ id: 'node_001', text: 'do not hardcode the database url into the config file please', title: 'no hardcoding', kind: 'correction', status: 'accepted', parent: null,
+ ts: '2026-06-12T10:00:00.000Z', actions: [{ tool: 'Edit', file: 'config/db.ts', command: null, input: null, model: 'm' }],
+ };
+ const unrelatedLater = {
+ id: 'node_002', text: 'now lets switch topics entirely and write the marketing landing copy', title: 'marketing', kind: 'direction', status: 'accepted', parent: failure,
+ ts: '2026-06-12T11:00:00.000Z', actions: [{ tool: 'Edit', file: 'site/index.html', command: null, input: null, model: 'm' }],
+ };
+ const analysis = analyzeTree({ nodes: [failure, unrelatedLater] });
+ for (const chain of analysis.correctionChains) {
+ // The unrelated later node shares neither file nor surface token nor acceptance phrasing.
+ assert.notEqual(chain.resolvedNodeId, 'node_002', 'must not resolve to an unrelated temporally-nearest node');
+ }
+});
+
+test('P2: an explicit acceptance turn IS accepted as a resolution even with no shared evidence', () => {
+ // The failure/correction share a file (so they link), but the acceptance turn shares
+ // NOTHING structural with the failure -- only its acceptance phrasing can recover it as
+ // the resolution. This proves the acceptance path, not temporal-nearest guessing.
+ const failure = {
+ id: 'node_001', text: 'the checkout total is off by a cent on tax rounding', title: 'rounding bug', kind: 'direction', status: 'accepted', parent: null,
+ ts: '2026-06-12T10:00:00.000Z', actions: [{ tool: 'Edit', file: 'src/checkout/total.ts', command: null, input: null, model: 'm' }],
+ };
+ const correction = {
+ id: 'node_002', text: 'no the checkout total rounding is still wrong, redo the total calc', title: 'still wrong', kind: 'correction', status: 'accepted', parent: failure,
+ ts: '2026-06-12T10:30:00.000Z', actions: [{ tool: 'Edit', file: 'src/checkout/total.ts', command: null, input: null, model: 'm' }],
+ };
+ const accepted = {
+ id: 'node_003', text: 'perfect, that works now', title: 'works', kind: 'direction', status: 'accepted', parent: correction,
+ ts: '2026-06-12T11:00:00.000Z', actions: [{ tool: 'Edit', file: 'src/unrelated/widget.ts', command: null, input: null, model: 'm' }],
+ };
+ const analysis = analyzeTree({ nodes: [failure, correction, accepted] });
+ // failure + correction share total.ts, so a chain forms; the acceptance turn (node_003)
+ // shares no file/surface with the failure, so only its acceptance phrasing can recover it
+ // as the resolution -- proving the acceptance path, not temporal-nearest guessing.
+ assert.ok(
+ analysis.correctionChains.some((c) => c.resolvedNodeId === 'node_003'),
+ 'the explicit acceptance turn should be recorded as the resolution'
+ );
+});
+
+test('P3: a node that leaks a secret and runs a risky command surfaces both kinds', () => {
+ const node = {
+ id: 'node_001', text: 'deploy', title: 'deploy', kind: 'root', status: 'accepted', parent: null,
+ actions: [{ tool: 'Bash', file: null, command: '. /srv/app/.env; rm -rf /var/data', input: '. /srv/app/.env; rm -rf /var/data', model: 'm' }],
+ };
+ const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk');
+ assert.ok(/credential/.test(sec.evidence) && /risky-command/.test(sec.evidence), `both kinds must appear: ${sec.evidence}`);
+});
+
+test('P3: inferSignals can return multiple process kinds for a multi-class correction', () => {
+ const root = { id: 'node_001', text: 'build a dashboard', title: 'x', kind: 'root', status: 'accepted', parent: null, actions: [] };
+ const corr = {
+ id: 'node_002', kind: 'correction', status: 'accepted', parent: root, actions: [],
+ text: 'no, you ignored what i asked for and this is overbuilt, scrap the web app, keep it minimal',
+ title: 'multi-class correction',
+ };
+ const analysis = analyzeTree({ nodes: [root, corr] });
+ const types = new Set(analysis.failures.map((f) => f.type));
+ assert.ok(types.size >= 2, `expected multiple process labels, got ${[...types].join(', ')}`);
+});
+
+test('P4: a bare rbac keyword with no co-signal stays inferred, never high/verified', () => {
+ const node = {
+ id: 'node_001', text: 'edit detector', title: 'x', kind: 'root', status: 'accepted', parent: null,
+ actions: [{ tool: 'Edit', file: 'src/analyze.js', input: 'const ACCESS_CONTROL_WEAK_RE = /rbac|access-control/i;', command: null, model: 'm' }],
+ };
+ const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk');
+ assert.ok(sec && sec.tier === 'inferred', `lone weak keyword must be inferred (got ${sec && sec.tier})`);
+});
+
+test('P4: a bare rbac keyword WITH a security-surface co-signal earns high tier', () => {
+ const node = {
+ id: 'node_001', text: 'wire up access control', title: 'x', kind: 'root', status: 'accepted', parent: null,
+ actions: [{ tool: 'Edit', file: 'src/rbac/policy.ts', input: 'enable rbac for the route', command: null, model: 'm' }],
+ };
+ const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk');
+ assert.ok(sec && (sec.tier === 'high' || sec.tier === 'verified'), `keyword + surface co-signal should tier up (got ${sec && sec.tier})`);
+});
+
+test('P6: a human security correction backstops a prior action that carried no security label', () => {
+ const prior = {
+ id: 'node_001', text: 'put the deploy config value directly into the deploy script', title: 'deploy config', kind: 'direction', status: 'accepted', parent: null,
+ actions: [{ tool: 'Edit', file: 'deploy.sh', command: null, input: null, model: 'm' }],
+ };
+ const correction = {
+ id: 'node_002', text: 'that is a secret, rotate that key and do not commit it to the deploy script', title: 'rotate', kind: 'correction', status: 'accepted', parent: prior,
+ actions: [{ tool: 'Edit', file: 'deploy.sh', command: null, input: null, model: 'm' }],
+ };
+ const analysis = analyzeTree({ nodes: [prior, correction] });
+ const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk');
+ assert.ok(sec, 'human security correction should backstop a missed security event');
+ assert.equal(sec.tier, 'inferred', 'the backstop must be inferred only, never strong/verified');
+ assert.ok(sec.confidence <= 0.7, 'the backstop confidence must stay low');
+});
+
+test('P6: the backstop never fabricates a strong/verified security label from prose alone', () => {
+ const root = { id: 'node_001', text: 'build the cli', title: 'x', kind: 'root', status: 'accepted', parent: null, actions: [] };
+ const correction = {
+ id: 'node_002', text: 'never leak the api secret token again', title: 'no leaks', kind: 'correction', status: 'accepted', parent: root, actions: [],
+ };
+ const analysis = analyzeTree({ nodes: [root, correction] });
+ const strongSec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk' && (f.tier === 'verified' || f.tier === 'high'));
+ assert.equal(strongSec.length, 0, 'a human-correction backstop must never mint strong/verified labels');
+});
+
+// RELEASE GATE: the negative corpus must produce ZERO security/failure/hallucination false positives.
+test('NEGATIVE CORPUS (release gate): benign inputs produce zero security/failure false positives', () => {
+ const dir = tempProject();
+ // Benign prompts that historically tripped keyword/substring/path false positives.
+ const benign = [
+ 'capture a screenshot with chrome --headless --force-device-scale-factor=1 --screenshot=out.png',
+ 'edit src/ui/semantic-tokens.ts to adjust the design token palette',
+ 'update theme/design-tokens.json and src/lexer/tokenizer.ts for the new theme',
+ 'the access-control documentation mentions rbac as a concept; just explaining it in the readme',
+ 'we use JSON.parse and params.arguments and test.skip in the code, no changes needed',
+ 'add a token field to the response schema and document the bearer header format in the api guide',
+ 'rename the file from auth-helpers.md to authentication-notes.md in the docs folder',
+ 'the password strength meter component needs a tooltip, purely a UI label',
+ ];
+ try {
+ // The benign corpus references real files; create them so any hallucination flag is a
+ // genuine false positive rather than a correct missing-file detection.
+ mkdirSync(join(dir, 'src', 'ui'), { recursive: true });
+ mkdirSync(join(dir, 'src', 'lexer'), { recursive: true });
+ mkdirSync(join(dir, 'theme'), { recursive: true });
+ mkdirSync(join(dir, 'docs'), { recursive: true });
+ writeFileSync(join(dir, 'out.png'), 'x');
+ writeFileSync(join(dir, 'src', 'ui', 'semantic-tokens.ts'), 'export const t = 1;\n');
+ writeFileSync(join(dir, 'src', 'lexer', 'tokenizer.ts'), 'export const t = 1;\n');
+ writeFileSync(join(dir, 'theme', 'design-tokens.json'), '{}');
+ writeFileSync(join(dir, 'auth-helpers.md'), '# notes\n');
+ writeFileSync(join(dir, 'authentication-notes.md'), '# notes\n');
+ writeFileSync(join(dir, 'readme'), 'rbac is a concept\n');
+
+ const nodes = benign.map((text, i) => ({
+ id: `node_${String(i + 1).padStart(3, '0')}`,
+ text, title: text.slice(0, 40), kind: i === 0 ? 'root' : 'direction',
+ status: 'accepted', parent: null,
+ ts: `2026-06-12T${String(10 + i).padStart(2, '0')}:00:00.000Z`,
+ // Benign UI/doc file edits, plus the chrome flag command.
+ actions: i === 0
+ ? [{ tool: 'Bash', file: null, command: 'chrome --headless --force-device-scale-factor=1 --screenshot=out.png', model: 'm' }]
+ : i === 1 ? [{ tool: 'Edit', file: 'src/ui/semantic-tokens.ts', model: 'm' }]
+ : i === 2 ? [{ tool: 'Edit', file: 'theme/design-tokens.json', model: 'm' }]
+ : [],
+ }));
+ for (let k = 1; k < nodes.length; k++) nodes[k].parent = nodes[k - 1];
+
+ const analysis = analyzeTree({ nodes: nodes.map((n) => ({ ...n })) });
+ const secFps = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk');
+ assert.equal(secFps.length, 0, `negative corpus minted security false positives: ${JSON.stringify(secFps.map((f) => f.evidence))}`);
+
+ const halluc = detectHallucinations({ nodes: nodes.map((n) => ({ ...n })) }, dir).hallucinations;
+ assert.equal(halluc.length, 0, `negative corpus minted hallucination false positives: ${JSON.stringify(halluc.map((h) => h.reference))}`);
+
+ // Redaction must not over-fire high/medium on benign prose.
+ for (const text of benign) {
+ const hi = scanText(text).filter((f) => f.severity === 'high' || f.severity === 'medium');
+ assert.equal(hi.length, 0, `redaction over-fired on benign text "${text}": ${JSON.stringify(hi.map((f) => f.ruleId))}`);
+ }
+ } finally {
+ rmSync(dir, { recursive: true, force: true });
+ }
+});