Zion Boggan
repos/TreeTrace/test/treetrace.test.js
zionboggan.com ↗
2536 lines · javascript
History for this file →
1
import { test } from 'node:test';
2
import assert from 'node:assert/strict';
3
import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
4
import { tmpdir } from 'node:os';
5
import { fileURLToPath } from 'node:url';
6
import { dirname, join } from 'node:path';
7
 
8
import { parseSessionFile, parsePlainTranscript, classifySpecialUserText } from '../src/parse.js';
9
import { classifyPrompts } from '../src/extract.js';
10
import { buildTree } from '../src/tree.js';
11
import { scanText, applyDecisions, shadowScan, maskFor, resolveFindings, isGitShaCandidate, patchResiduals } from '../src/redact.js';
12
import { renderMarkdown, promptPack } from '../src/render-md.js';
13
import { renderMermaid, isSummaryByDefault, SUMMARY_NODE_THRESHOLD } from '../src/render-mermaid.js';
14
import { renderJson } from '../src/render-json.js';
15
import { renderHandoff } from '../src/handoff.js';
16
import { renderReportMarkdown, renderTerminalSummary } from '../src/report.js';
17
import {
18
  analyzeTree,
19
  renderFailuresJson,
20
  renderRejectionsJson,
21
  renderLessonsMarkdown,
22
  renderEvalsJsonl,
23
  renderMemoryMarkdown,
24
  isRiskyCommand,
25
  mentionsTestSkip,
26
  SECURITY_INTENT_PARTS,
27
  RISKY_CMD_PARTS,
28
} from '../src/analyze.js';
29
import { main, parseArgs, wrapMermaidDoc } from '../src/cli.js';
30
import { mungePath } from '../src/discover.js';
31
import { sha256, escapeMd } from '../src/util.js';
32
import { detectHallucinations, renderHallucinationsJson } from '../src/hallucinate.js';
33
import { renderSecurityReport, hasSecuritySignal } from '../src/security-report.js';
34
import { spawn } from 'node:child_process';
35
 
36
const FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'synthetic-session.jsonl');
37
 
38
async function fixtureTree() {
39
  const session = await parseSessionFile(FIXTURE, { sessionId: 'fix-001' });
40
  const nodes = classifyPrompts([session]);
41
  return { session, nodes, tree: buildTree([session], nodes) };
42
}
43
 
44
test('parser: extracts only human prompts, skips tool results/commands/sidechains', async () => {
45
  const { session } = await fixtureTree();
46
  assert.equal(session.prompts.length, 5);
47
  assert.ok(session.prompts.every((p) => !p.text.startsWith('<command-name>')));
48
  assert.ok(!session.prompts.some((p) => p.text.includes('subagent')));
49
  assert.equal(session.title, 'Build a weather dashboard');
50
  assert.equal(session.stats.toolUses, 2);
51
  assert.equal(session.stats.interruptions, 1);
52
  assert.deepEqual(session.stats.models, ['assistant-model']);
53
  assert.equal(session.stats.filesTouched.length, 1);
54
});
55
 
56
test('extractor: classification kinds and nudge folding', async () => {
57
  const { nodes } = await fixtureTree();
58
  assert.equal(nodes.length, 4);
59
  assert.equal(nodes[0].kind, 'root');
60
  assert.equal(nodes[0].nudges, 1);
61
  assert.equal(nodes[1].kind, 'direction');
62
  assert.equal(nodes[2].kind, 'correction');
63
  assert.equal(nodes[3].kind, 'scope-change');
64
  assert.equal(nodes[3].afterInterruption, true);
65
});
66
 
67
test('tree: fork detection marks rewound branch abandoned', async () => {
68
  const { tree } = await fixtureTree();
69
  const leaflet = tree.nodes.find((n) => n.text.includes('leaflet'));
70
  assert.equal(leaflet.status, 'accepted');
71
  assert.equal(tree.roots.length, 1);
72
  assert.equal(tree.stats.promptCount, 4);
73
  assert.equal(tree.stats.corrections, 1);
74
});
75
 
76
test('redaction: catches anthropic key and basic-auth URL, masks them', async () => {
77
  const { tree } = await fixtureTree();
78
  const scope = tree.nodes.find((n) => n.kind === 'scope-change');
79
  const findings = scanText(scope.text);
80
  const rules = new Set(findings.map((f) => f.ruleId));
81
  assert.ok(rules.has('anthropic-key'), `anthropic-key not in ${[...rules]}`);
82
  assert.ok(rules.has('url-basic-auth'), `url-basic-auth not in ${[...rules]}`);
83
 
84
  const { decisions } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true });
85
  const cleaned = applyDecisions(scope.text, findings, decisions);
86
  assert.ok(!cleaned.includes('sk-ant-'), 'key leaked');
87
  assert.ok(!cleaned.includes('hunter2pass'), 'password leaked');
88
  assert.ok(cleaned.includes('[REDACTED:'));
89
});
90
 
91
test('redaction: shadow scan flags unresolved secrets, passes resolved/kept ones', () => {
92
  const dirty = 'token ghp_0123456789abcdefghijklmnopqrstuvwxyzAB end';
93
  assert.equal(shadowScan(dirty, {}).length, 1);
94
 
95
  const findings = scanText(dirty);
96
  const kept = { [sha256(findings[0].match)]: { action: 'keep', ruleId: findings[0].ruleId } };
97
  assert.equal(shadowScan(dirty, kept).length, 0);
98
 
99
  const masked = applyDecisions(dirty, findings, {
100
    [sha256(findings[0].match)]: { action: 'redact', replacement: maskFor(findings[0]), ruleId: findings[0].ruleId },
101
  });
102
  assert.equal(shadowScan(masked, {}).length, 0);
103
});
104
 
105
test('redaction: rule coverage on known formats', () => {
106
  const cases = [
107
    ['AKIAIOSFODNN7EXAMPLE', 'aws-access-key'],
108
    ['github_pat_11AAAAAAA0123456789abcdefghij', 'github-fine-grained'],
109
    ['xoxb-treetrace-example-slack-token-0', 'slack-token'],
110
    ['sk_live_abcdefghijklmnop123', 'stripe-live-key'],
111
    ['tskey-auth-kFGiAS7CNTRL-abcdef123456', 'tailscale-key'],
112
    ['-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaA==\n-----END OPENSSH PRIVATE KEY-----', 'private-key-block'],
113
    ['eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U', 'jwt'],
114
    ['password = "correct-horse-battery"', 'secret-assignment'],
115
    ['SECRET="correct horse battery staple"', 'secret-assignment'],
116
    ['https://user:p:a:ss@example.com/path', 'url-basic-auth'],
117
  ];
118
  for (const [sample, expected] of cases) {
119
    const hits = scanText(`some text ${sample} more text`).map((f) => f.ruleId);
120
    assert.ok(hits.includes(expected), `${expected} missed in: ${sample} (got ${hits})`);
121
  }
122
});
123
 
124
test('redaction: escaped characters inside quoted secret assignments are still caught', () => {
125
  const cases = [
126
    ['escaped newline', '{"api_key":"line1\\nline2line2"}'],
127
    ['escaped tab', '{"api_key":"col1\\tcol2value"}'],
128
    ['escaped quote', '{"api_key":"abc\\"defghij"}'],
129
    ['escaped backslash', '{"api_key":"abc\\\\defghij"}'],
130
    ['single-quoted escaped newline', "{'password':'line1\\nline2value'}"],
131
    ['backtick escaped newline', 'const secret = `line1\\nline2value`;'],
132
  ];
133
  for (const [label, sample] of cases) {
134
    const hits = scanText(sample).map((f) => f.ruleId);
135
    assert.ok(
136
      hits.includes('secret-assignment'),
137
      `${label}: escaped secret value should be caught (got ${JSON.stringify(hits)} for ${sample})`
138
    );
139
  }
140
});
141
 
142
test('redaction: end-to-end escaped-JSON secret leaves no raw value in any artifact', async () => {
143
  const rawValue = 'line1\\nline2line2line2';
144
  const secretLine = `config is {"api_key":"${rawValue}"}`;
145
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-esc-'));
146
  const file = join(dir, 'escconv.json');
147
  const convo = [{
148
    mapping: {
149
      r: { message: null, parent: null, children: ['u'] },
150
      u: { message: { author: { role: 'user' }, content: { parts: [secretLine] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
151
      a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] },
152
    },
153
  }];
154
  writeFileSync(file, JSON.stringify(convo));
155
  try {
156
    await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']);
157
    const artifacts = [
158
      'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json',
159
      '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md',
160
    ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n');
161
    assert.ok(!artifacts.includes(rawValue), 'raw escaped-JSON secret value leaked into an artifact');
162
    assert.ok(artifacts.includes('[REDACTED:secret-assignment]'), 'expected a secret-assignment redaction marker');
163
  } finally {
164
    rmSync(dir, { recursive: true, force: true });
165
  }
166
});
167
 
168
test('redaction: bare hex tokens (32+ chars) are detected, lower and upper case', async () => {
169
  const lower = '6881f8290266f4cc939959917f893a2a88787eb24bbcb6b9c37594c72bf448c3';
170
  const upper = lower.toUpperCase();
171
  const half = lower.slice(0, 32);
172
  for (const hex of [lower, upper, half]) {
173
    const hits = scanText(`my key is session_hex=${hex} ok`).map((f) => f.ruleId);
174
    assert.ok(hits.includes('hex-token'), `hex-token missed for ${hex} (got ${hits})`);
175
  }
176
  const findings = scanText(`session_hex=${lower}`);
177
  const { decisions } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true });
178
  const cleaned = applyDecisions(`session_hex=${lower}`, findings, decisions);
179
  assert.ok(!cleaned.includes(lower), 'raw hex leaked after redaction');
180
  assert.equal(shadowScan(cleaned, {}).length, 0, 'shadow scan should be clean after hex redaction');
181
});
182
 
183
test('redaction: high-entropy lowercase-and-digit token (no uppercase) is caught in prose', () => {
184
  const token = 'abcdefg0123456789hijklmnop4567qrstuv';
185
  const hits = scanText(`the access token is ${token} now`).map((f) => f.ruleId);
186
  assert.ok(hits.includes('high-entropy-token'), `high-entropy token missed (got ${hits})`);
187
});
188
 
189
test('redaction: uuids and long lowercase identifiers are not flagged as high-entropy', () => {
190
  for (const benign of [
191
    '8400e29b-1d4f-4a6c-9b2e-7f3a1c5d8e90',
192
    'src/components/dashboard/widgets/chartwidget',
193
    'MAX_RETRY_ATTEMPTS_BEFORE_GIVING_UP_2',
194
  ]) {
195
    const hits = scanText(benign).filter((f) => f.ruleId === 'high-entropy-token');
196
    assert.equal(hits.length, 0, `false positive high-entropy flag on ${benign}`);
197
  }
198
});
199
 
200
test('redaction: git object hashes are classified as candidates only in a git context', () => {
201
  const sha1 = '0123456789abcdef0123456789abcdef01234567';
202
  const sha256hex = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef';
203
  assert.ok(isGitShaCandidate(sha1, `commit ${sha1}`, 7), 'commit <sha1> should be a candidate');
204
  assert.ok(isGitShaCandidate(sha256hex, `git tree ${sha256hex}`, 9), 'git tree <sha256> should be a candidate');
205
  assert.ok(isGitShaCandidate(sha1, `${sha1} fix the parser\n`, 0), 'oneline sha should be a candidate');
206
  assert.ok(!isGitShaCandidate(sha1, `token=${sha1} end`, 6), 'token= context is not git');
207
  assert.ok(!isGitShaCandidate(sha256hex, `session_hex=${sha256hex}`, 12), 'session_hex= context is not git');
208
  assert.ok(!isGitShaCandidate('0123456789abcdef0123456789abcdef', `commit ${'0123456789abcdef0123456789abcdef'}`, 7), '32-hex is not a git object id');
209
});
210
 
211
test('redaction: --keep-git-shas keeps git hashes but stays fail-closed for other hex', async () => {
212
  const sha1 = '0123456789abcdef0123456789abcdef01234567';
213
  const secret = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef';
214
  const text = `commit ${sha1}\nmy key is session_hex=${secret} ok`;
215
  const findings = scanText(text);
216
  const git = findings.find((f) => f.match === sha1);
217
  const sec = findings.find((f) => f.match === secret);
218
  assert.ok(git && git.gitShaCandidate, 'git sha must be flagged as a candidate');
219
  assert.ok(sec && !sec.gitShaCandidate, 'session_hex secret must NOT be a git candidate');
220
 
221
  const { decisions } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true, keepGitShas: true });
222
  assert.equal(decisions[sha256(sha1)].action, 'keep', 'git object hash should be kept');
223
  assert.equal(decisions[sha256(sha1)].ruleId, 'git-commit-sha', 'kept under git-commit-sha rule');
224
  assert.equal(decisions[sha256(secret)].action, 'redact', 'non-git hex must still be redacted');
225
 
226
  const { decisions: d2 } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true });
227
  assert.equal(d2[sha256(sha1)].action, 'redact', 'default must redact git sha too (fail-closed)');
228
 
229
  const cleaned = applyDecisions(text, findings, decisions);
230
  assert.ok(cleaned.includes(sha1), 'kept git sha should survive in output');
231
  assert.ok(!cleaned.includes(secret), 'non-git secret must be redacted');
232
  assert.equal(shadowScan(cleaned, decisions).length, 0, 'shadow scan must be clean after keep + redact');
233
});
234
 
235
test('redaction: end-to-end hex secret leaves no raw hex in any artifact', async () => {
236
  const lower = '6881f8290266f4cc939959917f893a2a88787eb24bbcb6b9c37594c72bf448c3';
237
  const upper = lower.toUpperCase();
238
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-hex-'));
239
  const file = join(dir, 'hexconv.json');
240
  const convo = [{
241
    mapping: {
242
      r: { message: null, parent: null, children: ['u'] },
243
      u: { message: { author: { role: 'user' }, content: { parts: [`my key is session_hex=${lower} and HEX=${upper} ok`] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
244
      a: { message: { author: { role: 'assistant' }, content: { parts: ['got it'] }, create_time: 2.0 }, parent: 'u', children: [] },
245
    },
246
  }];
247
  writeFileSync(file, JSON.stringify(convo));
248
  try {
249
    await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']);
250
    const artifacts = [
251
      'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json',
252
      '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md',
253
    ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n');
254
    assert.ok(!artifacts.includes(lower), 'lowercase hex secret leaked into an artifact');
255
    assert.ok(!artifacts.includes(upper), 'uppercase hex secret leaked into an artifact');
256
    assert.ok(artifacts.includes('[REDACTED:hex-token]'), 'expected a hex-token redaction marker');
257
  } finally {
258
    rmSync(dir, { recursive: true, force: true });
259
  }
260
});
261
 
262
test('redaction: a single 12MB token completes without throwing and stays safe', () => {
263
  const giant = 'A'.repeat(12 * 1024 * 1024);
264
  const text = `prefix ${giant} suffix`;
265
  let findings;
266
  assert.doesNotThrow(() => { findings = scanText(text); }, 'oversized token must not overflow the regex stack');
267
  assert.ok(findings.some((f) => f.ruleId === 'oversized-token'), 'oversized token should be flagged');
268
  const normal = scanText('store ghp_0123456789abcdefghijklmnopqrstuvwxyzAB and more');
269
  assert.ok(normal.some((f) => f.ruleId === 'github-token'), 'normal-size secrets still caught alongside the guard');
270
  const { decisions } = applyDecisionsRoundTrip(text, findings);
271
  assert.equal(shadowScan(decisions, {}).length, 0, 'oversized token should be cleaned after redaction');
272
});
273
 
274
function applyDecisionsRoundTrip(text, findings) {
275
  const map = {};
276
  for (const f of findings) map[sha256(f.match)] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId };
277
  return { decisions: applyDecisions(text, findings, map) };
278
}
279
 
280
test('redaction: split provider tokens are caught before shadow scan', () => {
281
  const dirty = 'token sk-proj-abcdefghijklmnop\nqrstu1234567890ABCDE end';
282
  const findings = scanText(dirty);
283
  assert.ok(findings.some((f) => f.ruleId === 'openai-key'), `openai-key missed in ${findings}`);
284
  const masked = applyDecisions(dirty, findings, {
285
    [sha256(findings.find((f) => f.ruleId === 'openai-key').match)]: {
286
      action: 'redact',
287
      replacement: '[REDACTED:openai-key]',
288
      ruleId: 'openai-key',
289
    },
290
  });
291
  assert.equal(shadowScan(masked, {}).length, 0);
292
  assert.ok(!masked.includes('sk-proj-'));
293
});
294
 
295
test('redaction: whitespace-split secret below the length floor is caught', () => {
296
  const dirty = 'store key sk-ant-api03-AAAA BBBBCCCCDDDDEEEEFFFFGGGG into the vault';
297
  const findings = scanText(dirty);
298
  const hit = findings.find((f) => f.ruleId === 'anthropic-key');
299
  assert.ok(hit, `split anthropic-key missed: ${JSON.stringify(findings)}`);
300
  const masked = applyDecisions(dirty, findings, {
301
    [sha256(hit.match)]: { action: 'redact', replacement: '[REDACTED:anthropic-key]', ruleId: 'anthropic-key' },
302
  });
303
  assert.ok(!/sk-ant-api03-AAAA/.test(masked), `secret not redacted: ${masked}`);
304
  assert.equal(shadowScan(masked, {}).length, 0);
305
});
306
 
307
test('redaction: scan stays fast on long benign input (ReDoS guard)', () => {
308
  const big = 'http://' + 'a'.repeat(60000);
309
  const start = Date.now();
310
  scanText(big);
311
  assert.ok(Date.now() - start < 2000, 'scan should stay linear on long input');
312
});
313
 
314
test('redaction: benign text produces no high/medium findings', () => {
315
  const benign =
316
    'Refactor the parser in src/parse.js to handle commit 3f2a1b9 and bump to v2.1.0-beta.3. The README.md needs a section on CONTRIBUTING.';
317
  const hard = scanText(benign).filter((f) => f.severity !== 'soft');
318
  assert.deepEqual(hard, []);
319
});
320
 
321
test('escapeMd neutralizes HTML-sensitive characters', () => {
322
  assert.equal(escapeMd('a<script>b</script>&c>'), 'a&lt;script&gt;b&lt;/script&gt;&amp;c&gt;');
323
});
324
 
325
test('rendering escapes injection in project name and content', async () => {
326
  const { tree } = await fixtureTree();
327
  const md = renderMarkdown(tree, { projectName: 'x</summary></details><script>alert(1)</script>' });
328
  assert.ok(md.includes('# Prompt Tree: x&lt;/summary&gt;&lt;/details&gt;&lt;script&gt;'), 'project name not escaped');
329
  assert.ok(!md.includes('Prompt Tree: x</summary>'), 'raw HTML in project name');
330
});
331
 
332
test('renderers: markdown, json, handoff are consistent and footer-credited', async () => {
333
  const { tree } = await fixtureTree();
334
  analyzeTree(tree);
335
  const md = renderMarkdown(tree, { projectName: 'demo' });
336
  assert.ok(md.startsWith('# Prompt Tree: demo'));
337
  assert.ok(md.includes('## Goal'));
338
  assert.ok(md.includes('## Reusable Prompt Pack'));
339
  assert.ok(md.includes('[treetrace]'));
340
 
341
  const json = renderJson(tree, { projectName: 'demo' });
342
  assert.equal(json.schemaVersion, '0.3');
343
  assert.equal(json.nodes.length, tree.nodes.length);
344
  assert.equal(json.edges.length, tree.nodes.filter((n) => n.parent).length);
345
  assert.ok(json.nodes.every((n) => n.id && n.kind && typeof n.text === 'string'));
346
  assert.ok(json.analysis.failureSignals >= 1);
347
  assert.ok(json.correctionChains.length >= 1);
348
  assert.ok(json.nodes.some((n) => Array.isArray(n.failureSignals)));
349
 
350
  const pack = promptPack(tree.nodes);
351
  assert.ok(pack.includes('1.'));
352
 
353
  const handoff = renderHandoff(tree, { projectName: 'demo' });
354
  assert.ok(handoff.includes('## Original goal'));
355
  assert.ok(handoff.includes('## Constraints'));
356
  assert.ok(handoff.includes('## Lessons'));
357
 
358
  const report = renderReportMarkdown(tree, { projectName: 'demo', generatedAt: '2026-01-01T00:00:00.000Z' });
359
  assert.ok(report.startsWith('# TreeTrace Report - demo'));
360
  assert.ok(report.includes('## Output map'));
361
  assert.ok(report.includes('## Artifacts'));
362
  assert.ok(report.includes('TREETRACE_REPORT.md'));
363
});
364
 
365
test('rendering: markdown footer stamps the tool version when provided', async () => {
366
  const { tree } = await fixtureTree();
367
  const md = renderMarkdown(tree, { projectName: 'demo', version: '0.4.0' });
368
  assert.ok(md.includes('v0.4.0'), 'PROMPT_TREE.md footer should stamp the version');
369
  const report = renderReportMarkdown(tree, { projectName: 'demo', version: '0.4.0', generatedAt: '2026-01-01T00:00:00.000Z' });
370
  assert.ok(report.includes('v0.4.0'), 'TREETRACE_REPORT.md footer should stamp the version');
371
});
372
 
373
test('analysis renderers produce failures, lessons, evals, and memory', async () => {
374
  const { tree } = await fixtureTree();
375
  const failures = renderFailuresJson(tree, { projectName: 'demo', generatedAt: '2026-01-01T00:00:00.000Z' });
376
  assert.equal(failures.schemaVersion, '0.3');
377
  assert.ok(failures.failures.length >= 1);
378
  assert.ok(failures.correctionChains.length >= 1);
379
 
380
  const lessons = renderLessonsMarkdown(tree, { projectName: 'demo' });
381
  assert.ok(lessons.includes('# Lessons'));
382
  assert.ok(/\[node_\w+/.test(lessons), 'lessons should inline node ids in brackets');
383
 
384
  const evals = renderEvalsJsonl(tree).trim().split('\n').map((line) => JSON.parse(line));
385
  assert.ok(evals.length >= 1);
386
  assert.ok(evals.every((e) => e.source === 'treetrace' && e.sourceNodeIds.length >= 1));
387
 
388
  const memory = renderMemoryMarkdown(tree, { projectName: 'demo' });
389
  assert.ok(!memory.includes('TreeTrace Agent Memory'), 'H1 title removed in diet');
390
  assert.ok(memory.includes('## Constraints'), 'compact constraints header');
391
  assert.ok(!memory.includes('Keep TreeTrace local-first'));
392
});
393
 
394
test('analysis: tiny transcript without corrections does not invent failures', () => {
395
  const session = parsePlainTranscript('User: build a tiny CLI\nAssistant: done', 'tiny');
396
  const nodes = classifyPrompts([session]);
397
  const tree = buildTree([session], nodes);
398
  const analysis = analyzeTree(tree);
399
  assert.equal(analysis.summary.totalFailureSignals, 0);
400
  assert.deepEqual(analysis.failures, []);
401
});
402
 
403
test('analysis: a security-sensitive agent action produces a verified, model-attributed signal', () => {
404
  const root = {
405
    id: 'node_001', text: 'Add rate limiting to checkout', title: 'Add rate limiting to checkout',
406
    kind: 'root', status: 'accepted', parent: null,
407
    actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'claude-sonnet-4-6' }],
408
  };
409
  const correction = {
410
    id: 'node_002', text: 'check the existing auth flow first', title: 'check the existing auth flow first',
411
    kind: 'correction', status: 'accepted', parent: root, actions: [],
412
  };
413
  const analysis = analyzeTree({ nodes: [root, correction] });
414
  const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk');
415
  assert.ok(sec, 'expected a verified security signal from the auth-file edit');
416
  assert.equal(sec.tier, 'verified');
417
  assert.equal(sec.model, 'claude-sonnet-4-6');
418
  assert.equal(sec.correctedByNodeId, 'node_002');
419
  assert.ok(sec.evidence.includes('session.ts'));
420
  assert.deepEqual(analysis.summary.models, ['claude-sonnet-4-6']);
421
  assert.ok(analysis.summary.tierCounts.verified >= 1);
422
});
423
 
424
test('analysis: a credential-handling Bash action produces a verified security signal', () => {
425
  const root = {
426
    id: 'node_001', text: 'deploy the marketing site', title: 'deploy the marketing site',
427
    kind: 'root', status: 'accepted', parent: null,
428
    actions: [{
429
      tool: 'Bash', file: null,
430
      command: 'set -a; . /srv/app/.env; export CLOUDFLARE_API_KEY="$DEPLOY_API_KEY"; wrangler pages deploy site',
431
      input: 'set -a; . /srv/app/.env; export CLOUDFLARE_API_KEY="$DEPLOY_API_KEY"; wrangler pages deploy site',
432
      model: 'claude-opus-4-8',
433
    }],
434
  };
435
  const analysis = analyzeTree({ nodes: [root] });
436
  const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk');
437
  assert.ok(sec, 'expected a security signal from the credential-handling deploy');
438
  assert.equal(sec.tier, 'verified');
439
  assert.ok(/credential/.test(sec.evidence), 'evidence should name the credential kind');
440
  assert.ok(analysis.summary.tierCounts.verified >= 1);
441
});
442
 
443
test('analysis: benign --force-* chrome flag does not mint a verified security signal', () => {
444
  const root = {
445
    id: 'node_001', text: 'capture a screenshot of the page', title: 'capture a screenshot',
446
    kind: 'root', status: 'accepted', parent: null,
447
    actions: [{ tool: 'Bash', file: null, command: 'chrome --headless --force-device-scale-factor=1 --screenshot=out.png', model: 'm' }],
448
  };
449
  const analysis = analyzeTree({ nodes: [root] });
450
  const sec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk');
451
  assert.equal(sec.length, 0, '--force-device-scale-factor must not fire as a security risk');
452
});
453
 
454
test('analysis: a token-named UI file does not mint a verified credential signal', () => {
455
  for (const file of ['src/ui/semantic-tokens.ts', 'src/lexer/tokenizer.ts', 'theme/design-tokens.json']) {
456
    const root = {
457
      id: 'node_001', text: 'edit the theme', title: 'edit the theme',
458
      kind: 'root', status: 'accepted', parent: null,
459
      actions: [{ tool: 'Edit', file, command: null, model: 'm' }],
460
    };
461
    const analysis = analyzeTree({ nodes: [root] });
462
    const verified = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk' && f.tier === 'verified');
463
    assert.equal(verified.length, 0, `${file} must not produce a verified credential signal`);
464
  }
465
});
466
 
467
test('analysis: a bare rbac keyword in a non-credential edit is down-tiered below verified', () => {
468
  const root = {
469
    id: 'node_001', text: 'edit the detector', title: 'edit the detector',
470
    kind: 'root', status: 'accepted', parent: null,
471
    actions: [{ tool: 'Edit', file: 'src/analyze.js', input: 'const ACCESS = /rbac/i;', command: null, model: 'm' }],
472
  };
473
  const analysis = analyzeTree({ nodes: [root] });
474
  const sec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk');
475
  assert.ok(sec.every((f) => f.tier !== 'verified' && f.confidence < 0.95), 'bare rbac keyword must not be verified/0.95');
476
});
477
 
478
test('analysis: a real credential file and a real secret command still verify at 0.95', () => {
479
  const fileNode = {
480
    id: 'node_001', text: 'harden auth', title: 'harden auth', kind: 'root', status: 'accepted', parent: null,
481
    actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'm' }],
482
  };
483
  const fileSec = analyzeTree({ nodes: [fileNode] }).failures.find((f) => f.type === 'security_or_privacy_risk');
484
  assert.ok(fileSec && fileSec.tier === 'verified' && fileSec.confidence === 0.95, 'a genuine auth file must stay verified');
485
 
486
  const cmdNode = {
487
    id: 'node_001', text: 'deploy', title: 'deploy', kind: 'root', status: 'accepted', parent: null,
488
    actions: [{ tool: 'Bash', file: null, command: '. /srv/app/.env; wrangler pages deploy', input: '. /srv/app/.env; wrangler pages deploy', model: 'm' }],
489
  };
490
  const cmdSec = analyzeTree({ nodes: [cmdNode] }).failures.find((f) => f.type === 'security_or_privacy_risk');
491
  assert.ok(cmdSec && cmdSec.tier === 'verified', 'a genuine credential command must stay verified');
492
});
493
 
494
test('analysis: a PAT-update prompt produces an inferred security signal even with no action', () => {
495
  const root = { id: 'node_001', text: 'build the cli', title: 'build the cli', kind: 'root', status: 'accepted', parent: null, actions: [] };
496
  const intent = {
497
    id: 'node_002', text: 'I updated the PAT in the master access ref doc', title: 'I updated the PAT',
498
    kind: 'direction', status: 'accepted', parent: root, actions: [],
499
  };
500
  const analysis = analyzeTree({ nodes: [root, intent] });
501
  const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk' && f.firstSeenNodeId === 'node_002');
502
  assert.ok(sec, 'expected an inferred security signal from the PAT-update prompt');
503
  assert.equal(sec.tier, 'inferred');
504
  const memory = renderMemoryMarkdown({ nodes: [root, intent] });
505
  assert.ok(memory.includes('## Security'), 'memory should list the security section');
506
  assert.ok(/stated intent/.test(memory), 'memory should tag the stated intent');
507
});
508
 
509
test('analysis: a long pasted spec listing security categories does not over-fire as intent', () => {
510
  const root = { id: 'node_001', text: 'build the cli', title: 'build the cli', kind: 'root', status: 'accepted', parent: null, actions: [] };
511
  const seed =
512
    'Here is the full product spec to read and react to. '.repeat(20) +
513
    'The detector flags when an agent changed auth logic, touched secrets, modified access control, or disabled tests. ' +
514
    'More pitch copy about water, compute, investors, and the cloud. '.repeat(20);
515
  const pitch = { id: 'node_002', text: seed, title: 'pasted spec', kind: 'checkpoint', status: 'accepted', parent: root, actions: [] };
516
  const analysis = analyzeTree({ nodes: [root, pitch] });
517
  const sec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk');
518
  assert.equal(sec.length, 0, 'a long pasted spec should not mint a stated-intent security signal');
519
});
520
 
521
test('analysis: the constraints section extracts directive requirements and never reports none when constraints exist', () => {
522
  const root = { id: 'node_001', text: 'build the cli', title: 'build the cli', kind: 'root', status: 'accepted', parent: null, actions: [] };
523
  const rule = {
524
    id: 'node_002',
525
    text: 'no em dashes and do not add inline code comments, and keep it Apache licensed',
526
    title: 'no em dashes', kind: 'direction', status: 'accepted', parent: root, actions: [],
527
  };
528
  const memory = renderMemoryMarkdown({ nodes: [root, rule] });
529
  const block = memory.slice(memory.indexOf('## Constraints'), memory.indexOf('## Lessons'));
530
  assert.ok(/no em dashes/i.test(block), 'em-dash constraint should be listed');
531
  assert.ok(/inline code comments/i.test(block), 'inline-comment constraint should be listed');
532
  assert.ok(/apache/i.test(block), 'license constraint should be listed');
533
  assert.ok(!/No explicit constraints were flagged/.test(block), 'must not claim none when constraints exist');
534
});
535
 
536
test('analysis: a benign descriptive prompt with no directive yields no false constraints', () => {
537
  const root = { id: 'node_001', text: 'build the cli', title: 'build the cli', kind: 'root', status: 'accepted', parent: null, actions: [] };
538
  const benign = {
539
    id: 'node_002', text: 'I like where we stand so far and I think this looks good to me',
540
    title: 'looks good', kind: 'direction', status: 'accepted', parent: root, actions: [],
541
  };
542
  const memory = renderMemoryMarkdown({ nodes: [root, benign] });
543
  assert.ok(!memory.includes('## Constraints'), 'benign descriptive text should not mint constraints');
544
});
545
 
546
test('analysis: a destructive-then-recovery turn yields a known bad path and is not the preferred next work', () => {
547
  const root = { id: 'node_001', text: 'build the marketing deck', title: 'build the marketing deck', kind: 'root', status: 'accepted', parent: null, actions: [] };
548
  const direction = {
549
    id: 'node_002', text: 'Also you can send an agent out to develop these sections',
550
    title: 'send an agent out to develop these sections', kind: 'direction', status: 'accepted', parent: root, actions: [],
551
  };
552
  const mishap = {
553
    id: 'node_003', text: 'Also messed up the deck file in the P:/ it is gone I am sorry can you bring it back',
554
    title: 'Also messed up the deck file in the P:/ it is gone I am sorry can you bring it back',
555
    kind: 'direction', status: 'accepted', parent: direction,
556
    actions: [{ tool: 'Write', file: 'P:/deck/index.html' }],
557
  };
558
  const nodes = [root, direction, mishap];
559
  const analysis = analyzeTree({ nodes });
560
  const bad = analysis.failures.filter((f) => f.type === 'abandoned_path');
561
  assert.ok(bad.length >= 1, 'destructive-then-recovery should produce a bad-path entry');
562
  const memory = renderMemoryMarkdown({ nodes });
563
  const badBlock = memory.slice(memory.indexOf('## Bad paths'), memory.indexOf('## Security'));
564
  assert.ok(!/No abandoned paths were detected/.test(badBlock), 'must not claim no abandoned paths when a destructive event occurred');
565
  assert.ok(/recover|destructive/i.test(badBlock), 'bad-path entry should warn about the destructive event');
566
  const nextBlock = memory.slice(memory.indexOf('## Next'));
567
  assert.ok(!/messed up the deck/i.test(nextBlock), 'preferred next work must not parrot the apology turn');
568
  assert.ok(/develop these sections/i.test(nextBlock), 'preferred next work should point at the real forward direction');
569
});
570
 
571
test('analysis: a keyword-only correction stays in the inferred or confirmed tier, not verified', () => {
572
  const root = { id: 'node_001', text: 'build a dashboard', title: 'build a dashboard', kind: 'root', status: 'accepted', parent: null, actions: [] };
573
  const corr = { id: 'node_002', text: 'no, that is overbuilt, keep it minimal', title: 'no, that is overbuilt', kind: 'correction', status: 'accepted', parent: root, actions: [] };
574
  const analysis = analyzeTree({ nodes: [root, corr] });
575
  assert.ok(analysis.failures.length >= 1);
576
  assert.ok(analysis.failures.every((f) => f.tier !== 'verified'));
577
  assert.equal(analysis.summary.tierCounts.verified, 0);
578
});
579
 
580
test('analysis: a single benign prompt does not yield multiple failure types', () => {
581
  const root = {
582
    id: 'node_001', text: 'build the marketing deck', title: 'build the marketing deck',
583
    kind: 'root', status: 'accepted', parent: null, ts: '2026-06-12T14:00:00.000Z', actions: [],
584
  };
585
  const benign = {
586
    id: 'node_002', text: 'and slide an agent to make the decks mobile friendly too please',
587
    title: 'make the decks mobile friendly', kind: 'direction', status: 'accepted', parent: root,
588
    ts: '2026-06-12T14:52:00.000Z', actions: [],
589
  };
590
  const longPaste = {
591
    id: 'node_003',
592
    text: 'ok sounds good i agree. ' + 'do not overbuild it, it is too much, try again later if it keeps failing. '.repeat(40),
593
    title: 'long strategy paste', kind: 'checkpoint', status: 'accepted', parent: benign,
594
    ts: '2026-06-12T12:52:00.000Z', actions: [],
595
  };
596
  const analysis = analyzeTree({ nodes: [root, benign, longPaste] });
597
  const benignFailures = analysis.failures.filter((f) => f.firstSeenNodeId === 'node_002');
598
  assert.equal(benignFailures.length, 0, 'a benign request should not mint failures from wording alone');
599
  for (const id of ['node_001', 'node_002', 'node_003']) {
600
    const types = analysis.failures.filter((f) => f.firstSeenNodeId === id).map((f) => f.type);
601
    assert.ok(new Set(types).size <= 1, `node ${id} emitted multiple failure types: ${types.join(', ')}`);
602
  }
603
});
604
 
605
test('analysis: latest accepted direction is chronological, not insertion order', () => {
606
  const root = {
607
    id: 'node_001', text: 'pick a research topic', title: 'pick a research topic',
608
    kind: 'root', status: 'accepted', parent: null, ts: '2026-01-01T00:00:00.000Z', actions: [],
609
  };
610
  const newest = {
611
    id: 'node_002', text: 'lets dig into Amazon Nova and the Karunanidhi essay direction',
612
    title: 'Amazon Nova and Karunanidhi', kind: 'direction', status: 'accepted', parent: root,
613
    ts: '2026-03-01T00:00:00.000Z', actions: [],
614
  };
615
  const stale = {
616
    id: 'node_003', text: 'lets explore the Seoul travel itinerary in depth for the trip',
617
    title: 'Seoul travel itinerary', kind: 'direction', status: 'accepted', parent: newest,
618
    ts: '2026-02-01T00:00:00.000Z', actions: [],
619
  };
620
  const nodes = [root, newest, stale];
621
  const tree = { nodes, stats: { promptCount: 3, sessionCount: 2 } };
622
  const summary = renderTerminalSummary(tree, { projectName: 'demo' });
623
  assert.ok(/Amazon Nova/i.test(summary), 'terminal summary should name the chronologically newest direction');
624
  assert.ok(!/Seoul/i.test(summary.split('Latest accepted direction:')[1] || ''), 'must not name the stale Seoul session as latest');
625
 
626
  const handoff = renderHandoff(tree, { projectName: 'demo' });
627
  const stand = handoff.split('## Where things stand')[1].split('##')[0];
628
  assert.ok(/Amazon Nova/i.test(stand), 'handoff should name the chronologically newest accepted direction');
629
 
630
  const memory = renderMemoryMarkdown(tree, { projectName: 'demo' });
631
  const next = memory.slice(memory.indexOf('## Next'));
632
  assert.ok(/Amazon Nova/i.test(next), 'agent memory should point at the chronologically newest direction');
633
});
634
 
635
test('analysis: a corrector is never linked with an earlier timestamp than its failure', () => {
636
  const failure = {
637
    id: 'node_001', text: 'i do not see the deck, just the index file showing text',
638
    title: 'deck not rendering', kind: 'direction', status: 'accepted', parent: null,
639
    ts: '2026-06-12T14:06:20.000Z',
640
    actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'claude-opus-4-8' }],
641
  };
642
  const earlier = {
643
    id: 'node_002', text: 'no that is wrong, the deck still does not work, redo it instead',
644
    title: 'still broken', kind: 'correction', status: 'accepted', parent: failure,
645
    ts: '2026-06-12T12:52:00.000Z',
646
    actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'claude-opus-4-8' }],
647
  };
648
  const analysis = analyzeTree({ nodes: [failure, earlier] });
649
  const byId = { node_001: failure, node_002: earlier };
650
  for (const f of analysis.failures) {
651
    if (!f.correctedByNodeId) continue;
652
    const ft = new Date(byId[f.firstSeenNodeId].ts).getTime();
653
    const ct = new Date(byId[f.correctedByNodeId].ts).getTime();
654
    assert.ok(ct >= ft, `failure ${f.id} corrected by an earlier-timestamped node`);
655
  }
656
  for (const c of analysis.correctionChains) {
657
    const ft = new Date(byId[c.failureNodeId].ts).getTime();
658
    const ct = new Date(byId[c.correctionNodeId].ts).getTime();
659
    assert.ok(ct >= ft, `chain ${c.id} links a corrector that precedes its failure`);
660
    if (c.resolvedNodeId) {
661
      const rt = new Date(byId[c.resolvedNodeId].ts).getTime();
662
      assert.ok(rt >= ft, `chain ${c.id} resolves before its failure`);
663
    }
664
  }
665
});
666
 
667
test('cli: default run writes analysis artifacts with redaction', async () => {
668
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-'));
669
  try {
670
    await main(['--file', FIXTURE, '--dir', dir, '--redact-auto', '--quiet']);
671
    for (const file of [
672
      'TREETRACE_REPORT.md',
673
      'PROMPT_TREE.md',
674
      '.treetrace/tree.json',
675
      '.treetrace/failures.json',
676
      '.treetrace/lessons.md',
677
      '.treetrace/evals.jsonl',
678
      '.treetrace/agent-memory.md',
679
    ]) {
680
      assert.ok(existsSync(join(dir, file)), `${file} missing`);
681
    }
682
    const failures = JSON.parse(readFileSync(join(dir, '.treetrace/failures.json'), 'utf8'));
683
    assert.equal(failures.schemaVersion, '0.3');
684
    assert.ok(failures.failures.length >= 1);
685
 
686
    const evalLine = readFileSync(join(dir, '.treetrace/evals.jsonl'), 'utf8').trim().split('\n')[0];
687
    assert.equal(JSON.parse(evalLine).source, 'treetrace');
688
 
689
    const exported = [
690
      'PROMPT_TREE.md',
691
      'TREETRACE_REPORT.md',
692
      '.treetrace/tree.json',
693
      '.treetrace/failures.json',
694
      '.treetrace/lessons.md',
695
      '.treetrace/evals.jsonl',
696
      '.treetrace/agent-memory.md',
697
    ].map((file) => readFileSync(join(dir, file), 'utf8')).join('\n');
698
    assert.ok(!exported.includes('sk-ant-'), 'anthropic key leaked');
699
    assert.ok(!exported.includes('hunter2pass'), 'basic-auth password leaked');
700
  } finally {
701
    rmSync(dir, { recursive: true, force: true });
702
  }
703
});
704
 
705
test('cli: --analysis combined with --report writes both analysis files and the reports', async () => {
706
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-both-'));
707
  try {
708
    await main(['--file', FIXTURE, '--dir', dir, '--analysis', '--report', '--redact-auto', '--quiet']);
709
    for (const file of [
710
      'TREETRACE_REPORT.md', 'PROMPT_TREE.md', '.treetrace/tree.json',
711
      '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md',
712
    ]) {
713
      assert.ok(existsSync(join(dir, file)), `${file} missing when --analysis and --report combined`);
714
    }
715
  } finally {
716
    rmSync(dir, { recursive: true, force: true });
717
  }
718
});
719
 
720
test('cli: a copilot import records a per-adapter sourceType, not claude-code-jsonl', async () => {
721
  const fixture = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'adapters', 'copilot-chatsession.json');
722
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-src-'));
723
  try {
724
    await main(['--from', 'copilot', '--file', fixture, '--dir', dir, '--redact-auto', '--quiet']);
725
    const tree = JSON.parse(readFileSync(join(dir, '.treetrace/tree.json'), 'utf8'));
726
    assert.equal(tree.project.sourceType, 'copilot-chat', 'sourceType should reflect the copilot adapter');
727
    assert.notEqual(tree.project.sourceType, 'claude-code-jsonl');
728
  } finally {
729
    rmSync(dir, { recursive: true, force: true });
730
  }
731
});
732
 
733
test('cli: creates the output directory and .treetrace subdirectory when missing', async () => {
734
  const base = mkdtempSync(join(tmpdir(), 'treetrace-'));
735
  const dir = join(base, 'does', 'not', 'exist', 'yet');
736
  try {
737
    assert.ok(!existsSync(dir), 'target dir should not exist before the run');
738
    await main(['--file', FIXTURE, '--dir', dir, '--redact-auto', '--quiet']);
739
    assert.ok(existsSync(join(dir, 'PROMPT_TREE.md')), 'PROMPT_TREE.md missing');
740
    assert.ok(existsSync(join(dir, '.treetrace', 'tree.json')), '.treetrace/tree.json missing');
741
  } finally {
742
    rmSync(base, { recursive: true, force: true });
743
  }
744
});
745
 
746
test('redaction: the literal phrase "security-risk" is not a false-positive secret', () => {
747
  for (const phrase of ['security-risk', 'skip the security-risk step']) {
748
    const hard = scanText(phrase).filter((f) => f.severity !== 'soft');
749
    assert.deepEqual(hard, [], `"${phrase}" should not match any secret rule (got ${JSON.stringify(hard)})`);
750
  }
751
});
752
 
753
test('redaction: a real-format GitHub token is caught', () => {
754
  const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB';
755
  const hits = scanText(`set the remote with ${token} now`).map((f) => f.ruleId);
756
  assert.ok(hits.includes('github-token'), `github-token missed (got ${hits})`);
757
});
758
 
759
test('redaction: a token inside a Bash action body is redacted end to end', async () => {
760
  const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB';
761
  const lines = [
762
    { type: 'summary', summary: 'wire up the remote', leafUuid: 'b3' },
763
    {
764
      parentUuid: null, isSidechain: false, type: 'user', userType: 'external', uuid: 'b1',
765
      sessionId: 'leak-001', timestamp: '2026-06-01T10:00:00.000Z', cwd: '/tmp/demo', gitBranch: 'main', version: '2.1.0',
766
      message: { role: 'user', content: 'Point the git remote at my fork.' },
767
    },
768
    {
769
      parentUuid: 'b1', isSidechain: false, type: 'assistant', uuid: 'b2', sessionId: 'leak-001',
770
      timestamp: '2026-06-01T10:00:30.000Z',
771
      message: {
772
        role: 'assistant', model: 'assistant-model', usage: { input_tokens: 100, output_tokens: 50 },
773
        content: [
774
          { type: 'text', text: 'Setting the remote.' },
775
          { type: 'tool_use', id: 'g1', name: 'Bash', input: { command: `git push --force origin main && git remote set-url origin https://x:${token}@github.com/me/fork.git` } },
776
        ],
777
      },
778
    },
779
  ];
780
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-leak-'));
781
  const session = join(dir, 'session.jsonl');
782
  writeFileSync(session, lines.map((l) => JSON.stringify(l)).join('\n') + '\n');
783
  try {
784
    const parsed = await parseSessionFile(session, { sessionId: 'leak-001' });
785
    const action = parsed.prompts[0].actions.find((a) => a.tool === 'Bash');
786
    assert.ok(action, 'expected a captured Bash action');
787
    assert.ok(action.command.includes(token), 'fixture should carry the raw token before redaction');
788
    assert.ok(typeof action.input === 'string' && action.input.includes(token), 'input summary should carry the command');
789
 
790
    await main(['--file', session, '--dir', dir, '--redact-auto', '--quiet']);
791
    const exported = [
792
      'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json',
793
      '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md',
794
    ].map((f) => readFileSync(join(dir, f), 'utf8')).join('\n');
795
    assert.ok(!exported.includes(token), 'GitHub token leaked from an action body into output');
796
    assert.ok(!/ghp_[0-9A-Za-z]/.test(exported), 'a partial GitHub token prefix leaked from an action body into output');
797
    assert.ok(exported.includes('[REDACTED:'), 'expected a redaction marker where the action-body token was');
798
  } finally {
799
    rmSync(dir, { recursive: true, force: true });
800
  }
801
});
802
 
803
test('handoff: command operators are not HTML-escaped in the brief', () => {
804
  const root = {
805
    id: 'node_001', text: 'run rm -rf build && mkdir build to reset the workspace',
806
    title: 'reset the workspace', kind: 'root', status: 'accepted', parent: null, actions: [],
807
  };
808
  const handoff = renderHandoff({ nodes: [root], stats: { promptCount: 1, sessionCount: 1 } }, { projectName: 'demo' });
809
  assert.ok(handoff.includes('rm -rf build && mkdir build'), 'command should keep raw && in the handoff brief');
810
  assert.ok(!handoff.includes('&amp;&amp;'), 'handoff must not HTML-escape && to &amp;&amp;');
811
  const inject = {
812
    id: 'node_001', text: 'do not run <script>alert(1)</script> ever',
813
    title: 'no scripts', kind: 'root', status: 'accepted', parent: null, actions: [],
814
  };
815
  const handoff2 = renderHandoff({ nodes: [inject], stats: { promptCount: 1, sessionCount: 1 } }, { projectName: 'demo' });
816
  assert.ok(!handoff2.includes('<script>'), 'angle-bracket tags should still be neutralized in the handoff brief');
817
});
818
 
819
test('plain transcript fallback parses User:/Assistant: markers', () => {
820
  const session = parsePlainTranscript(
821
    'User: build me a snake game in python\nAssistant: sure, here is the code...\nUser: make the snake blue\nAssistant: done',
822
    'pasted'
823
  );
824
  assert.equal(session.prompts.length, 2);
825
  assert.equal(session.prompts[1].text, 'make the snake blue');
826
  assert.throws(() => parsePlainTranscript('no markers here at all'), /turn markers/);
827
});
828
 
829
test('special user text classification', () => {
830
  assert.equal(classifySpecialUserText('<command-name>/foo</command-name>'), 'command');
831
  assert.equal(classifySpecialUserText('<system-reminder>x</system-reminder>'), 'meta');
832
  assert.equal(
833
    classifySpecialUserText('This session is being continued from a previous conversation that ran out of context.'),
834
    'compact-continuation'
835
  );
836
  assert.equal(classifySpecialUserText('build me an app'), 'prompt');
837
});
838
 
839
test('discover: path munging matches Claude Code storage layout', () => {
840
  assert.equal(mungePath('/home/dev/weatherapp'), '-home-dev-weatherapp');
841
  assert.equal(mungePath('/home/dev/weatherapp/api'), '-home-dev-weatherapp-api');
842
  assert.equal(mungePath('/home/u.ser/my_app'), '-home-u-ser-my-app');
843
});
844
 
845
function tempProject() {
846
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-feat-'));
847
  writeFileSync(join(dir, 'package.json'), JSON.stringify({ name: 'demo', dependencies: { express: '^4.0.0' } }));
848
  mkdirSync(join(dir, 'src'), { recursive: true });
849
  writeFileSync(join(dir, 'src', 'real.js'), 'export const real = 1;\n');
850
  return dir;
851
}
852
 
853
test('hallucinations: flags only the invented file and import, not the real ones', () => {
854
  const dir = tempProject();
855
  try {
856
    const root = {
857
      id: 'node_001', kind: 'root', status: 'accepted', parent: null,
858
      text: 'Open src/real.js and src/imaginary.js to wire the feature.',
859
      title: 'wire the feature',
860
      actions: [{
861
        tool: 'Edit', file: 'src/real.js',
862
        input: "import express from 'express';\nimport ghostlib from 'ghostlib-does-not-exist';\nimport { readFileSync } from 'node:fs';",
863
        command: null, model: 'm',
864
      }],
865
    };
866
    const tree = { nodes: [root] };
867
    const result = detectHallucinations(tree, dir);
868
    const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference);
869
    const imports = result.hallucinations.filter((h) => h.category === 'hallucinated_import_or_package').map((h) => h.reference);
870
 
871
    assert.ok(files.includes('src/imaginary.js'), `invented file should be flagged (got ${files})`);
872
    assert.ok(!files.includes('src/real.js'), 'the real file must not be flagged');
873
    assert.ok(!files.some((f) => /package\.json/.test(f)), 'the real package.json must not be flagged');
874
 
875
    assert.ok(imports.includes('ghostlib-does-not-exist'), `invented import should be flagged (got ${imports})`);
876
    assert.ok(!imports.includes('express'), 'a declared dependency must not be flagged');
877
    assert.ok(!imports.includes('fs') && !imports.includes('node:fs'), 'a node builtin must not be flagged');
878
 
879
    for (const h of result.hallucinations) {
880
      assert.ok(h.evalCandidate && h.evalCandidate.target, 'each hallucination should carry an eval candidate');
881
    }
882
  } finally {
883
    rmSync(dir, { recursive: true, force: true });
884
  }
885
});
886
 
887
test('hallucinations: a file created during the session is not flagged', () => {
888
  const dir = tempProject();
889
  try {
890
    const root = {
891
      id: 'node_001', kind: 'root', status: 'accepted', parent: null,
892
      text: 'Create src/brandnew.js and then reference src/brandnew.js again.',
893
      title: 'create new file',
894
      actions: [{ tool: 'Write', file: 'src/brandnew.js', input: 'export const n = 1;', command: null, model: 'm' }],
895
    };
896
    const result = detectHallucinations({ nodes: [root] }, dir);
897
    const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference);
898
    assert.ok(!files.includes('src/brandnew.js'), 'a file the agent created this session must not be flagged');
899
  } finally {
900
    rmSync(dir, { recursive: true, force: true });
901
  }
902
});
903
 
904
test('hallucinations: extensionless files under dot-directories are flagged when missing', () => {
905
  const dir = tempProject();
906
  try {
907
    const root = {
908
      id: 'node_001', kind: 'root', status: 'accepted', parent: null,
909
      text: 'Open .github/CODEOWNERS and .github/workflows/ci and .husky/pre-commit, and reference JSON.parse and test.skip.',
910
      title: 'review config',
911
      actions: [],
912
    };
913
    const result = detectHallucinations({ nodes: [root] }, dir);
914
    const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference);
915
    assert.ok(files.includes('.github/CODEOWNERS'), `dot-directory path should be flagged (got ${files})`);
916
    assert.ok(files.includes('.github/workflows/ci'), 'nested dot-directory path should be flagged');
917
    assert.ok(files.includes('.husky/pre-commit'), 'hyphenated dot-directory path should be flagged');
918
    assert.ok(!files.includes('JSON.parse') && !files.includes('test.skip'), 'dotted code symbols must not be flagged');
919
  } finally {
920
    rmSync(dir, { recursive: true, force: true });
921
  }
922
});
923
 
924
test('hallucinations: process.env is not flagged as a missing file', () => {
925
  const dir = tempProject();
926
  try {
927
    const root = {
928
      id: 'node_001', kind: 'root', status: 'accepted', parent: null,
929
      text: 'Read the API key from process.env instead of hardcoding it.',
930
      title: 'use env var', actions: [],
931
    };
932
    const result = detectHallucinations({ nodes: [root] }, dir);
933
    const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference);
934
    assert.ok(!files.includes('process.env'), `process.env must not be flagged as a file (got ${files})`);
935
  } finally {
936
    rmSync(dir, { recursive: true, force: true });
937
  }
938
});
939
 
940
test('hallucinations: a relative require is not flagged as an import, but the missing file is', () => {
941
  const dir = tempProject();
942
  try {
943
    const root = {
944
      id: 'node_001', kind: 'root', status: 'accepted', parent: null,
945
      text: 'Wire it up.', title: 'wire',
946
      actions: [{ tool: 'Edit', file: 'src/index.js', input: "const limiter = require('./middleware/rateLimit.js');", command: null, model: 'm' }],
947
    };
948
    const result = detectHallucinations({ nodes: [root] }, dir);
949
    const imports = result.hallucinations.filter((h) => h.category === 'hallucinated_import_or_package').map((h) => h.reference);
950
    const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference);
951
    assert.ok(!imports.includes('.'), 'a relative require must not be reduced to a "." import');
952
    assert.ok(files.includes('./middleware/rateLimit.js') || files.includes('middleware/rateLimit.js'), `the missing relative file should still be flagged (got ${files})`);
953
  } finally {
954
    rmSync(dir, { recursive: true, force: true });
955
  }
956
});
957
 
958
test('security report: surfaces real signals and omits benign sessions', () => {
959
  const dir = tempProject();
960
  try {
961
    const root = {
962
      id: 'node_001', kind: 'root', status: 'accepted', parent: null,
963
      text: 'harden the login flow', title: 'harden the login flow',
964
      actions: [
965
        { tool: 'Edit', file: 'src/auth/login.js', input: 'export function login() {}', command: null, model: 'claude-opus-4-8' },
966
        { tool: 'Bash', file: null, command: 'rm -rf build', input: 'rm -rf build', model: 'claude-opus-4-8' },
967
      ],
968
    };
969
    const correction = {
970
      id: 'node_002', kind: 'correction', status: 'accepted', parent: root,
971
      text: 'no, do not disable the tests in the auth suite, keep them running',
972
      title: 'do not disable tests', actions: [],
973
    };
974
    const tree = { nodes: [root, correction] };
975
    assert.ok(hasSecuritySignal(tree, dir), 'expected a security signal for the auth edit');
976
    const report = renderSecurityReport(tree, dir, { projectName: 'demo', generatedAt: '2026-01-01T00:00:00.000Z' });
977
 
978
    assert.ok(report.startsWith('# TreeTrace Security Report - demo'));
979
    assert.ok(/auth: .*src\/auth\/login\.js/.test(report), 'auth surface and file should be listed');
980
    assert.ok(/rm -rf build/.test(report), 'risky command should be listed');
981
    assert.ok(/disable the tests|disable or skip tests/i.test(report), 'test-skip signal should appear');
982
    assert.ok(/do not disable the tests/i.test(report), 'the human correction should surface as an eval/memory candidate');
983
 
984
    writeFileSync(join(dir, 'README.md'), '# demo\n');
985
    const benign = {
986
      id: 'node_001', kind: 'root', status: 'accepted', parent: null,
987
      text: 'add a markdown table to the README', title: 'add a table',
988
      actions: [{ tool: 'Edit', file: 'README.md', input: '| a | b |', command: null, model: 'm' }],
989
    };
990
    const benignTree = { nodes: [benign] };
991
    assert.ok(!hasSecuritySignal(benignTree, dir), 'benign session should have no security signal');
992
    const benignReport = renderSecurityReport(benignTree, dir, { projectName: 'demo', generatedAt: '2026-01-01T00:00:00.000Z' });
993
    assert.ok(/None detected\./.test(benignReport), 'benign report should state nothing was found');
994
  } finally {
995
    rmSync(dir, { recursive: true, force: true });
996
  }
997
});
998
 
999
test('security report and hallucinations.json do not leak injected secrets via the CLI', async () => {
1000
  const dir = tempProject();
1001
  const hex = '6881f8290266f4cc939959917f893a2a88787eb24bbcb6b9c37594c72bf448c3';
1002
  const ghToken = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB';
1003
  const convo = [{
1004
    mapping: {
1005
      r: { message: null, parent: null, children: ['u'] },
1006
      u: { message: { author: { role: 'user' }, content: { parts: [
1007
        `edit src/imaginary.js, my key is session_hex=${hex} and token ${ghToken}`,
1008
      ] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
1009
      a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] },
1010
    },
1011
  }];
1012
  const file = join(dir, 'leaky.json');
1013
  writeFileSync(file, JSON.stringify(convo));
1014
  try {
1015
    await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--security', '--redact-auto', '--quiet']);
1016
    const hall = readFileSync(join(dir, '.treetrace/hallucinations.json'), 'utf8');
1017
    assert.ok(!hall.includes(hex), 'hex secret leaked into hallucinations.json');
1018
    assert.ok(!hall.includes(ghToken), 'github token leaked into hallucinations.json');
1019
    assert.ok(/imaginary\.js/.test(hall), 'the invented file should still be detected');
1020
  } finally {
1021
    rmSync(dir, { recursive: true, force: true });
1022
  }
1023
});
1024
 
1025
test('cli: structured exit codes for CI consumers', async () => {
1026
  const bin = join(dirname(fileURLToPath(import.meta.url)), '..', 'bin', 'treetrace.js');
1027
  const run = (args) =>
1028
    new Promise((resolve) => {
1029
      const child = spawn('node', [bin, ...args], { stdio: ['ignore', 'ignore', 'pipe'] });
1030
      let stderr = '';
1031
      child.stderr.on('data', (d) => { stderr += d; });
1032
      child.on('close', (code) => resolve({ code, stderr }));
1033
    });
1034
  const empty = mkdtempSync(join(tmpdir(), 'treetrace-exit-'));
1035
  try {
1036
    const usage = await run(['--bogus']);
1037
    assert.equal(usage.code, 2, `bad option should exit 2 (got ${usage.code}): ${usage.stderr}`);
1038
    const nodata = await run(['--dir', empty]);
1039
    assert.equal(nodata.code, 3, `nothing-to-trace should exit 3 (got ${nodata.code}): ${nodata.stderr}`);
1040
  } finally {
1041
    rmSync(empty, { recursive: true, force: true });
1042
  }
1043
});
1044
 
1045
test('mcp: initialize, tools/list, and tools/call return well-formed JSON-RPC', async () => {
1046
  const dir = tempProject();
1047
  const convo = [{
1048
    mapping: {
1049
      r: { message: null, parent: null, children: ['u'] },
1050
      u: { message: { author: { role: 'user' }, content: { parts: ['build a cli and do not add dependencies'] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
1051
      a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: ['u2'] },
1052
      u2: { message: { author: { role: 'user' }, content: { parts: ['no, that is wrong, keep it minimal'] }, create_time: 3.0 }, parent: 'a', children: [] },
1053
    },
1054
  }];
1055
  const file = join(dir, 'mcp.json');
1056
  writeFileSync(file, JSON.stringify(convo));
1057
  const bin = join(dirname(fileURLToPath(import.meta.url)), '..', 'bin', 'treetrace.js');
1058
  try {
1059
    const responses = await new Promise((resolveP, rejectP) => {
1060
      const child = spawn('node', [bin, 'mcp', '--from', 'chatgpt', '--file', file, '--dir', dir], {
1061
        stdio: ['pipe', 'pipe', 'ignore'],
1062
      });
1063
      let buf = '';
1064
      child.stdout.on('data', (d) => { buf += d; });
1065
      child.on('error', rejectP);
1066
      const send = (o) => child.stdin.write(JSON.stringify(o) + '\n');
1067
      send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} });
1068
      send({ jsonrpc: '2.0', id: 2, method: 'tools/list', params: {} });
1069
      send({ jsonrpc: '2.0', id: 3, method: 'tools/call', params: { name: 'lessons', arguments: {} } });
1070
      send({ jsonrpc: '2.0', id: 99, method: 'tools/call', params: { name: 'nope', arguments: {} } });
1071
      setTimeout(() => {
1072
        child.stdin.end();
1073
        child.kill();
1074
        resolveP(buf.split('\n').filter(Boolean).map((l) => JSON.parse(l)));
1075
      }, 2000);
1076
    });
1077
 
1078
    const init = responses.find((r) => r.id === 1);
1079
    assert.ok(init && init.jsonrpc === '2.0', 'initialize must be JSON-RPC 2.0');
1080
    assert.equal(init.result.serverInfo.name, 'treetrace');
1081
    assert.ok(init.result.protocolVersion, 'initialize must advertise a protocol version');
1082
 
1083
    const list = responses.find((r) => r.id === 2);
1084
    const names = list.result.tools.map((t) => t.name).sort();
1085
    assert.deepEqual(names, ['eval_candidates', 'handoff', 'lessons', 'rejections_summary', 'security_summary', 'tree']);
1086
 
1087
    const call = responses.find((r) => r.id === 3);
1088
    assert.ok(call.result && Array.isArray(call.result.content), 'tools/call must return content array');
1089
    assert.equal(call.result.content[0].type, 'text');
1090
    assert.ok(/# Lessons/.test(call.result.content[0].text), 'lessons tool should return the lessons markdown');
1091
 
1092
    const bad = responses.find((r) => r.id === 99);
1093
    assert.ok(bad.error && bad.error.code === -32602, 'unknown tool should return a JSON-RPC error');
1094
  } finally {
1095
    rmSync(dir, { recursive: true, force: true });
1096
  }
1097
});
1098
 
1099
import { recordedCwd } from '../src/discover.js';
1100
 
1101
test('redaction: JSON-style, quoted, backtick, and multiline secret assignments are caught', () => {
1102
  const cases = [
1103
    '{"api_key":"supersecretvalue"}',
1104
    '{"client_secret":"correcthorsebattery"}',
1105
    '{"access_token":"correct-horse-battery"}',
1106
    "{'api_key':'correcthorsebattery'}",
1107
    'const password = `correct horse battery staple`;',
1108
    'api_key: `correct-horse-battery-staple`',
1109
    'API_KEY="line1\nline2line2line2"',
1110
  ];
1111
  for (const sample of cases) {
1112
    const hits = scanText(sample).map((f) => f.ruleId);
1113
    assert.ok(hits.includes('secret-assignment'), `secret-assignment missed in: ${JSON.stringify(sample)} (got ${hits})`);
1114
  }
1115
});
1116
 
1117
test('redaction: generic secret-key assignment is caught even with a low-entropy value', () => {
1118
  const sample = 'password: "hunter2hunter2"';
1119
  const hits = scanText(sample).map((f) => f.ruleId);
1120
  assert.ok(hits.includes('secret-assignment'), 'low-entropy generic secret should still be a finding');
1121
});
1122
 
1123
test('redaction: placeholder secret assignments are not flagged', () => {
1124
  for (const benign of ['token: null', 'password: ""', 'secret: "${SECRET}"', 'api_key: <your-key>', 'token=true']) {
1125
    const hard = scanText(benign).filter((f) => f.severity !== 'soft');
1126
    assert.deepEqual(hard, [], `${benign} should not flag (got ${JSON.stringify(hard)})`);
1127
  }
1128
});
1129
 
1130
test('redaction: a JSON-style secret leaves no raw value in any artifact end to end', async () => {
1131
  const secret = 'supersecretvalue';
1132
  const back = 'correct-horse-battery-staple';
1133
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-json-secret-'));
1134
  const file = join(dir, 'conv.json');
1135
  const convo = [{
1136
    mapping: {
1137
      r: { message: null, parent: null, children: ['u'] },
1138
      u: { message: { author: { role: 'user' }, content: { parts: [`config is {"api_key":"${secret}"} and password = \`${back}\``] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
1139
      a: { message: { author: { role: 'assistant' }, content: { parts: ['done'] }, create_time: 2.0 }, parent: 'u', children: [] },
1140
    },
1141
  }];
1142
  writeFileSync(file, JSON.stringify(convo));
1143
  try {
1144
    await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']);
1145
    const artifacts = [
1146
      'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json',
1147
      '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md',
1148
    ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n');
1149
    assert.ok(!artifacts.includes(secret), 'JSON-style secret value leaked into an artifact');
1150
    assert.ok(!artifacts.includes(back), 'backtick secret value leaked into an artifact');
1151
    assert.ok(artifacts.includes('[REDACTED:secret-assignment]'), 'expected a secret-assignment redaction marker');
1152
  } finally {
1153
    rmSync(dir, { recursive: true, force: true });
1154
  }
1155
});
1156
 
1157
test('redaction: a prior keep decision is ignored under --redact-auto and non-TTY auto mode', async () => {
1158
  const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB';
1159
  const text = `Use token ${token} for setup`;
1160
  const findings = scanText(text);
1161
  const prior = { [sha256(token)]: { action: 'keep', ruleId: 'github-token' } };
1162
 
1163
  const auto = await resolveFindings(findings, prior, { interactive: false, autoRedact: true });
1164
  assert.equal(auto.overriddenKeeps, 1, 'auto mode should override a prior keep');
1165
  const outAuto = applyDecisions(text, findings, auto.decisions);
1166
  assert.ok(!outAuto.includes(token), 'raw token leaked under --redact-auto despite re-redaction');
1167
  assert.equal(shadowScan(outAuto, auto.decisions).length, 0, 'shadow scan should be clean after override');
1168
 
1169
  const nonTty = await resolveFindings(findings, prior, { interactive: false, autoRedact: false });
1170
  assert.equal(nonTty.overriddenKeeps, 1, 'non-TTY auto mode should override a prior keep');
1171
  assert.ok(!applyDecisions(text, findings, nonTty.decisions).includes(token), 'raw token leaked in non-TTY auto mode');
1172
 
1173
  const interactive = await resolveFindings(findings, prior, { interactive: true, autoRedact: false });
1174
  assert.equal(interactive.overriddenKeeps, 0, 'interactive mode should honor a deliberate keep');
1175
  assert.ok(applyDecisions(text, findings, interactive.decisions).includes(token), 'interactive keep should be honored');
1176
});
1177
 
1178
test('cli: a preseeded keep cannot leak a secret under --redact-auto', async () => {
1179
  const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB';
1180
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-keep-'));
1181
  const file = join(dir, 'conv.json');
1182
  const convo = [{
1183
    mapping: {
1184
      r: { message: null, parent: null, children: ['u'] },
1185
      u: { message: { author: { role: 'user' }, content: { parts: [`Use token ${token} for setup`] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
1186
      a: { message: { author: { role: 'assistant' }, content: { parts: ['done'] }, create_time: 2.0 }, parent: 'u', children: [] },
1187
    },
1188
  }];
1189
  writeFileSync(file, JSON.stringify(convo));
1190
  mkdirSync(join(dir, '.treetrace'), { recursive: true });
1191
  writeFileSync(join(dir, '.treetrace', 'redactions.json'), JSON.stringify({ [sha256(token)]: { action: 'keep', ruleId: 'github-token' } }));
1192
  try {
1193
    await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']);
1194
    const artifacts = [
1195
      'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json',
1196
      '.treetrace/failures.json', '.treetrace/agent-memory.md',
1197
    ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n');
1198
    assert.ok(!artifacts.includes(token), 'preseeded keep leaked a raw token under --redact-auto');
1199
    const stored = JSON.parse(readFileSync(join(dir, '.treetrace', 'redactions.json'), 'utf8'));
1200
    assert.equal(stored[sha256(token)].action, 'redact', 'overridden keep should persist as redact');
1201
  } finally {
1202
    rmSync(dir, { recursive: true, force: true });
1203
  }
1204
});
1205
 
1206
test('mcp: a preseeded keep cannot leak a token in handoff', async () => {
1207
  const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB';
1208
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-mcp-keep-'));
1209
  const file = join(dir, 'conv.json');
1210
  const convo = [{
1211
    mapping: {
1212
      r: { message: null, parent: null, children: ['u'] },
1213
      u: { message: { author: { role: 'user' }, content: { parts: [`Use token ${token} for setup, do not add dependencies`] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
1214
      a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: ['u2'] },
1215
      u2: { message: { author: { role: 'user' }, content: { parts: ['no, keep it minimal'] }, create_time: 3.0 }, parent: 'a', children: [] },
1216
    },
1217
  }];
1218
  writeFileSync(file, JSON.stringify(convo));
1219
  mkdirSync(join(dir, '.treetrace'), { recursive: true });
1220
  writeFileSync(join(dir, '.treetrace', 'redactions.json'), JSON.stringify({ [sha256(token)]: { action: 'keep', ruleId: 'github-token' } }));
1221
  const bin = join(dirname(fileURLToPath(import.meta.url)), '..', 'bin', 'treetrace.js');
1222
  try {
1223
    const responses = await new Promise((resolveP, rejectP) => {
1224
      const child = spawn('node', [bin, 'mcp', '--from', 'chatgpt', '--file', file, '--dir', dir], { stdio: ['pipe', 'pipe', 'ignore'] });
1225
      let buf = '';
1226
      child.stdout.on('data', (d) => { buf += d; });
1227
      child.on('error', rejectP);
1228
      const send = (o) => child.stdin.write(JSON.stringify(o) + '\n');
1229
      send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} });
1230
      send({ jsonrpc: '2.0', id: 2, method: 'tools/call', params: { name: 'handoff', arguments: {} } });
1231
      setTimeout(() => {
1232
        child.stdin.end();
1233
        child.kill();
1234
        resolveP(buf.split('\n').filter(Boolean).map((l) => JSON.parse(l)));
1235
      }, 2500);
1236
    });
1237
    const call = responses.find((r) => r.id === 2);
1238
    assert.ok(call && call.result, 'handoff tool should return a result');
1239
    assert.ok(!JSON.stringify(call).includes(token), 'MCP handoff leaked a token despite a preseeded keep');
1240
  } finally {
1241
    rmSync(dir, { recursive: true, force: true });
1242
  }
1243
});
1244
 
1245
test('mcp: extra tool arguments return -32602', async () => {
1246
  const dir = tempProject();
1247
  const file = join(dir, 'conv.json');
1248
  writeFileSync(file, JSON.stringify([{ mapping: {
1249
    r: { message: null, parent: null, children: ['u'] },
1250
    u: { message: { author: { role: 'user' }, content: { parts: ['build a cli'] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
1251
    a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] },
1252
  } }]));
1253
  const bin = join(dirname(fileURLToPath(import.meta.url)), '..', 'bin', 'treetrace.js');
1254
  try {
1255
    const responses = await new Promise((resolveP, rejectP) => {
1256
      const child = spawn('node', [bin, 'mcp', '--from', 'chatgpt', '--file', file, '--dir', dir], { stdio: ['pipe', 'pipe', 'ignore'] });
1257
      let buf = '';
1258
      child.stdout.on('data', (d) => { buf += d; });
1259
      child.on('error', rejectP);
1260
      const send = (o) => child.stdin.write(JSON.stringify(o) + '\n');
1261
      send({ jsonrpc: '2.0', id: 1, method: 'tools/call', params: { name: 'lessons', arguments: { unexpected: true } } });
1262
      send({ jsonrpc: '2.0', id: 2, method: 'tools/call', params: { name: 'lessons', arguments: {} } });
1263
      send({ jsonrpc: '2.0', id: null, method: 'ping' });
1264
      send([{ jsonrpc: '2.0', id: 9, method: 'ping' }]);
1265
      setTimeout(() => { child.stdin.end(); child.kill(); resolveP(buf.split('\n').filter(Boolean).map((l) => JSON.parse(l))); }, 2500);
1266
    });
1267
    const bad = responses.find((r) => r.id === 1);
1268
    assert.ok(bad && bad.error && bad.error.code === -32602, 'extra arguments should return -32602');
1269
    const ok = responses.find((r) => r.id === 2);
1270
    assert.ok(ok && ok.result, 'empty arguments should succeed');
1271
    const idNull = responses.find((r) => r.id === null && r.result);
1272
    assert.ok(idNull, 'explicit id:null request should receive a response');
1273
    const batch = responses.find((r) => r.id === null && r.error && /batch/.test(r.error.message));
1274
    assert.ok(batch, 'batch arrays should return a clear error');
1275
  } finally {
1276
    rmSync(dir, { recursive: true, force: true });
1277
  }
1278
});
1279
 
1280
test('mcp: treetrace mcp --stdin is rejected clearly', async () => {
1281
  const { startMcpServer } = await import('../src/mcp.js');
1282
  await assert.rejects(
1283
    () => startMcpServer({ argv: ['mcp', '--stdin'], version: '0.0.0' }),
1284
    /does not support --stdin/,
1285
    'mcp --stdin should be rejected at startup'
1286
  );
1287
});
1288
 
1289
test('hallucinations: absolute paths outside the project are out of scope, not an oracle', () => {
1290
  const dir = tempProject();
1291
  try {
1292
    const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] });
1293
    const abs = detectHallucinations(mk('see /definitely/not/here.zzz and /etc/shadow.bak'), dir).hallucinations.map((h) => h.reference);
1294
    assert.deepEqual(abs, [], 'absolute paths outside the project must not be flagged or statted');
1295
    const parent = detectHallucinations(mk('see ../escape.js'), dir).hallucinations.map((h) => h.reference);
1296
    assert.deepEqual(parent, [], 'a ../ path escaping the project is out of scope');
1297
  } finally {
1298
    rmSync(dir, { recursive: true, force: true });
1299
  }
1300
});
1301
 
1302
test('hallucinations: relative missing paths inside the project are flagged', () => {
1303
  const dir = tempProject();
1304
  try {
1305
    const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] });
1306
    assert.ok(detectHallucinations(mk('open src/missing.js'), dir).hallucinations.some((h) => h.reference === 'src/missing.js'), 'bare missing path should be flagged');
1307
    assert.ok(detectHallucinations(mk('open ./src/missing.js'), dir).hallucinations.some((h) => h.reference === './src/missing.js'), './ missing path should be flagged');
1308
    assert.ok(!detectHallucinations(mk('open src/real.js'), dir).hallucinations.some((h) => h.reference.includes('real.js')), 'real file must not be flagged');
1309
  } finally {
1310
    rmSync(dir, { recursive: true, force: true });
1311
  }
1312
});
1313
 
1314
test('hallucinations: an Edit to a nonexistent file is flagged, a Write to a new file is not', () => {
1315
  const dir = tempProject();
1316
  try {
1317
    const edit = { nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text: 'edit src/ghost.js', title: 't', actions: [{ tool: 'Edit', file: 'src/ghost.js', input: 'x', command: null }] }] };
1318
    assert.ok(detectHallucinations(edit, dir).hallucinations.some((h) => h.reference === 'src/ghost.js'), 'Edit to a nonexistent file should still be flagged');
1319
    const write = { nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text: 'create src/created.js', title: 't', actions: [{ tool: 'Write', file: 'src/created.js', input: 'x', command: null }] }] };
1320
    assert.ok(!detectHallucinations(write, dir).hallucinations.some((h) => h.reference === 'src/created.js'), 'Write to a new file should be suppressed');
1321
  } finally {
1322
    rmSync(dir, { recursive: true, force: true });
1323
  }
1324
});
1325
 
1326
test('hallucinations: dotted code symbols are not flagged as missing file paths', () => {
1327
  const dir = tempProject();
1328
  try {
1329
    const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] });
1330
    for (const sym of ['JSON.parse', 'params.arguments', 'params.name', 'test.skip', 'describe.skip', 'obj.method', 'array.length']) {
1331
      const refs = detectHallucinations(mk(sym), dir).hallucinations
1332
        .filter((h) => h.category === 'hallucinated_file_or_path')
1333
        .map((h) => h.reference);
1334
      assert.deepEqual(refs, [], `code symbol "${sym}" should not be flagged as a missing path (got ${JSON.stringify(refs)})`);
1335
    }
1336
    const real = detectHallucinations(mk('open src/missing.ts'), dir).hallucinations
1337
      .filter((h) => h.category === 'hallucinated_file_or_path')
1338
      .map((h) => h.reference);
1339
    assert.ok(real.includes('src/missing.ts'), 'a genuinely missing path with a known extension must still be flagged');
1340
  } finally {
1341
    rmSync(dir, { recursive: true, force: true });
1342
  }
1343
});
1344
 
1345
test('hallucinations: missing extensionless files and local paths are flagged, existing ones are not', () => {
1346
  const dir = tempProject();
1347
  try {
1348
    const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] });
1349
    const flagged = (text) => detectHallucinations(mk(text), dir).hallucinations
1350
      .filter((h) => h.category === 'hallucinated_file_or_path')
1351
      .map((h) => h.reference);
1352
 
1353
    assert.ok(flagged('open Dockerfile').includes('Dockerfile'), 'a missing Dockerfile should be flagged');
1354
    assert.ok(flagged('open .env').includes('.env'), 'a missing .env should be flagged');
1355
    assert.ok(flagged('open Makefile').includes('Makefile'), 'a missing Makefile should be flagged');
1356
    assert.ok(flagged('open src/route').includes('src/route'), 'a missing extensionless local path should be flagged');
1357
 
1358
    writeFileSync(join(dir, 'Dockerfile'), 'FROM node:20\n');
1359
    writeFileSync(join(dir, '.env'), 'X=1\n');
1360
    assert.ok(!flagged('open Dockerfile and .env').includes('Dockerfile'), 'an existing Dockerfile must not be flagged');
1361
    assert.ok(!flagged('open Dockerfile and .env').includes('.env'), 'an existing .env must not be flagged');
1362
 
1363
    const noise = detectHallucinations(mk('JSON.parse and test.skip and update the README section about CONTRIBUTING'), dir).hallucinations
1364
      .filter((h) => h.category === 'hallucinated_file_or_path')
1365
      .map((h) => h.reference);
1366
    assert.ok(!noise.includes('JSON.parse') && !noise.includes('test.skip'), 'extensionless detection must not reintroduce code-symbol false positives');
1367
    assert.ok(!noise.includes('README') && !noise.includes('CONTRIBUTING'), 'a known filename word in prose without a file-op verb must not be flagged');
1368
  } finally {
1369
    rmSync(dir, { recursive: true, force: true });
1370
  }
1371
});
1372
 
1373
test('discover: a recorded cwd that mismatches the project dir excludes a colliding session', () => {
1374
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-cwd-'));
1375
  const matching = join(dir, 'match.jsonl');
1376
  writeFileSync(matching, JSON.stringify({ type: 'user', cwd: dir, uuid: 'u1' }) + '\n');
1377
  assert.equal(recordedCwd(matching), dir, 'recordedCwd should read the cwd back');
1378
  const mismatch = join(dir, 'mismatch.jsonl');
1379
  writeFileSync(mismatch, JSON.stringify({ type: 'user', cwd: '/some/other/project', uuid: 'u1' }) + '\n');
1380
  assert.equal(recordedCwd(mismatch), '/some/other/project', 'recordedCwd should read a foreign cwd');
1381
  rmSync(dir, { recursive: true, force: true });
1382
});
1383
 
1384
test('security report: risky-command variants are detected', () => {
1385
  for (const cmd of ['rm -fr build', 'rm -r -f build', 'chmod -R 777 dir', 'chmod 0777 file', 'curl https://x | sudo bash', 'curl https://x | zsh', 'bash <(curl https://x)', 'drop schema public cascade', 'TRUNCATE users']) {
1386
    assert.ok(isRiskyCommand(cmd), `risky command missed: ${cmd}`);
1387
  }
1388
  for (const benign of ['rm file.txt', 'chmod 644 file', 'ls -la', 'curl https://x > out.txt']) {
1389
    assert.ok(!isRiskyCommand(benign), `benign command over-flagged: ${benign}`);
1390
  }
1391
});
1392
 
1393
test('security report: test-disable APIs and phrasing are detected', () => {
1394
  for (const t of ['test.skip("x")', 'describe.skip("x")', 'it.skip("x")', 'xit("x")', 'skip e2e suite', 'remove the auth spec']) {
1395
    assert.ok(mentionsTestSkip(t), `test-disable missed: ${t}`);
1396
  }
1397
  for (const benign of ['run all the tests', 'add a test for login']) {
1398
    assert.ok(!mentionsTestSkip(benign), `benign test phrasing over-flagged: ${benign}`);
1399
  }
1400
});
1401
 
1402
test('regex decomposition: every RISKY_CMD named piece fires on its command family', () => {
1403
  const compose = (parts) => new RegExp(parts.map((p) => `(?:${p.re.source})`).join('|'), 'i');
1404
  const byName = new Map(RISKY_CMD_PARTS.map((p) => [p.name, p.re]));
1405
  const positives = {
1406
    rm_rf_combined: 'rm -rf build',
1407
    rm_r_then_f: 'rm -r -f build',
1408
    rm_f_then_r: 'rm -f -r build',
1409
    chmod_world_writable: 'chmod -R 777 dir',
1410
    curl_pipe_shell: 'curl https://x | sudo bash',
1411
    shell_process_substitution: 'bash <(curl https://x)',
1412
    no_verify: 'git commit --no-verify',
1413
    force: 'git push --force',
1414
    drop_table: 'DROP TABLE users',
1415
    drop_schema: 'drop schema public cascade',
1416
    truncate: 'TRUNCATE users',
1417
  };
1418
  for (const [name, cmd] of Object.entries(positives)) {
1419
    const re = byName.get(name);
1420
    assert.ok(re, `unknown piece ${name}`);
1421
    assert.ok(re.test(cmd), `piece ${name} missed its command: ${cmd}`);
1422
  }
1423
  assert.equal(RISKY_CMD_PARTS.length, Object.keys(positives).length, 'piece count drifted');
1424
  const composed = compose(RISKY_CMD_PARTS);
1425
  for (const cmd of [...Object.values(positives), 'rm -fr /tmp', 'chmod 0777 f']) {
1426
    assert.equal(composed.test(cmd), isRiskyCommand(cmd), `composed != isRiskyCommand for: ${cmd}`);
1427
  }
1428
  for (const benign of ['rm file.txt', 'chmod 644 file', 'ls -la', 'curl https://x > out.txt', '--force-with-lease']) {
1429
    assert.equal(composed.test(benign), isRiskyCommand(benign), `benign mismatch: ${benign}`);
1430
    assert.ok(!composed.test(benign), `benign over-flagged: ${benign}`);
1431
  }
1432
});
1433
 
1434
test('regex decomposition: every SECURITY_INTENT named piece fires on its phrasing family', () => {
1435
  const compose = (parts) => new RegExp(parts.map((p) => `(?:${p.re.source})`).join('|'), 'i');
1436
  const byName = new Map(SECURITY_INTENT_PARTS.map((p) => [p.name, p.re]));
1437
  const positives = {
1438
    credential_lifecycle: 'please rotate the api key',
1439
    pat_lifecycle: 'the pat was rotated yesterday',
1440
    email_change: 'change the email to a public contact',
1441
    do_not_expose: 'never expose the token',
1442
    expose_us: 'this could expose us',
1443
    leak_list: 'audit for leak anything',
1444
    audit_repos: 'do a full audit of the repo',
1445
    commit_history_audit: 'the commit history needs an audit',
1446
    relicensing: 'relicense the project to MIT',
1447
    disable_tests: 'skip the auth test',
1448
    access_control_change: 'tighten the auth flow',
1449
  };
1450
  for (const [name, phrase] of Object.entries(positives)) {
1451
    const re = byName.get(name);
1452
    assert.ok(re, `unknown piece ${name}`);
1453
    assert.ok(re.test(phrase), `piece ${name} missed its phrase: ${phrase}`);
1454
  }
1455
  assert.equal(SECURITY_INTENT_PARTS.length, Object.keys(positives).length, 'piece count drifted');
1456
  const composed = compose(SECURITY_INTENT_PARTS);
1457
  for (const phrase of Object.values(positives)) assert.ok(composed.test(phrase), `composed missed: ${phrase}`);
1458
  for (const benign of ['a normal sentence about the weather', 'use the api carefully', 'email me later']) {
1459
    assert.ok(!composed.test(benign), `benign security phrasing over-flagged: ${benign}`);
1460
  }
1461
});
1462
 
1463
test('cli: value-taking options reject a missing value or a flag-shaped value', () => {
1464
  for (const args of [['--dir'], ['--out', '--redact-auto'], ['--report-file', '--quiet'], ['--from'], ['--since']]) {
1465
    assert.throws(() => parseArgs(args), /requires a value|requires at least|expects a date|unknown --from/, `expected ${JSON.stringify(args)} to throw`);
1466
  }
1467
});
1468
 
1469
test('cli: --since requires a real date and rejects garbage', () => {
1470
  assert.throws(() => parseArgs(['--since', 'not-a-date']), /expects a date/);
1471
  assert.doesNotThrow(() => parseArgs(['--since', '2026-06-01']));
1472
});
1473
 
1474
test('cli: --stdin --from claude is rejected', () => {
1475
  assert.throws(() => parseArgs(['--stdin', '--from', 'claude']), /cannot be combined with --from claude/);
1476
});
1477
 
1478
 
1479
 
1480
test('P7: short escaped-JSON secret values fail closed (redaction gate)', () => {
1481
  const cases = [
1482
    ['short escaped newline', '{"api_key":"a\\nz"}'],
1483
    ['tiny escaped value', '{"api_key":"x\\ny"}'],
1484
    ['escaped quote', '{"token":"a\\"b"}'],
1485
    ['escaped backslash', '{"secret":"a\\\\b"}'],
1486
    ['spec literal-\\n form', '{"api_key":"line1\\nline2line2line2"}'],
1487
  ];
1488
  for (const [label, sample] of cases) {
1489
    const hits = scanText(sample).map((f) => f.ruleId);
1490
    assert.ok(hits.includes('secret-assignment'), `${label}: escaped secret must be caught (got ${JSON.stringify(hits)})`);
1491
  }
1492
  assert.equal(scanText('{"api_key":"ab"}').length, 0, 'benign short value below floor must stay clean');
1493
  assert.equal(scanText('{"api_key":"${SECRET}"}').filter((f) => f.ruleId === 'secret-assignment').length, 0, 'placeholder must stay clean');
1494
});
1495
 
1496
test('P7: a short escaped-JSON secret leaves no raw value in any artifact end to end', async () => {
1497
  const rawValue = 'a\\nz';
1498
  const secretLine = `config is {"api_key":"${rawValue}"}`;
1499
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-p7-'));
1500
  const file = join(dir, 'escconv.json');
1501
  const convo = [{
1502
    mapping: {
1503
      r: { message: null, parent: null, children: ['u'] },
1504
      u: { message: { author: { role: 'user' }, content: { parts: [secretLine] }, create_time: 1.0 }, parent: 'r', children: ['a'] },
1505
      a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] },
1506
    },
1507
  }];
1508
  writeFileSync(file, JSON.stringify(convo));
1509
  try {
1510
    await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']);
1511
    const artifacts = [
1512
      'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json',
1513
      '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md',
1514
    ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n');
1515
    assert.ok(!artifacts.includes(rawValue), 'raw short escaped-JSON secret leaked into an artifact');
1516
    assert.ok(artifacts.includes('[REDACTED:secret-assignment]'), 'expected a secret-assignment redaction marker');
1517
  } finally {
1518
    rmSync(dir, { recursive: true, force: true });
1519
  }
1520
});
1521
 
1522
test('P1: a single strong security signal stays verified at exactly 0.95', () => {
1523
  const node = {
1524
    id: 'node_001', text: 'harden auth', title: 'harden auth', kind: 'root', status: 'accepted', parent: null,
1525
    actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'm' }],
1526
  };
1527
  const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk');
1528
  assert.ok(sec && sec.tier === 'verified' && sec.confidence === 0.95, 'strong anchor must remain verified/0.95');
1529
});
1530
 
1531
test('P1: confidence is derived from corroboration and the contributing signals are in the evidence', () => {
1532
  const strong = {
1533
    id: 'node_001', text: 'deploy', title: 'deploy', kind: 'root', status: 'accepted', parent: null,
1534
    actions: [{ tool: 'Bash', file: 'src/auth/session.ts', command: '. /srv/app/.env; rm -rf /tmp/x; chmod 777 /etc', input: '. /srv/app/.env; rm -rf /tmp/x; chmod 777 /etc', model: 'm' }],
1535
  };
1536
  const strongSec = analyzeTree({ nodes: [strong] }).failures.find((f) => f.type === 'security_or_privacy_risk');
1537
  assert.equal(strongSec.tier, 'verified');
1538
  assert.ok(/signals:/.test(strongSec.evidence), 'evidence must list the contributing signals (auditable)');
1539
  assert.ok(/strong credential content/.test(strongSec.evidence), 'evidence must name the strong credential signal');
1540
 
1541
  const weak = {
1542
    id: 'node_001', text: 'edit detector', title: 'x', kind: 'root', status: 'accepted', parent: null,
1543
    actions: [{ tool: 'Edit', file: 'src/analyze.js', input: 'const ACCESS = /rbac/i;', command: null, model: 'm' }],
1544
  };
1545
  const weakSec = analyzeTree({ nodes: [weak] }).failures.find((f) => f.type === 'security_or_privacy_risk');
1546
  assert.ok(weakSec.confidence < strongSec.confidence, 'lone weak keyword must score below a multi-signal strong event');
1547
});
1548
 
1549
test('P2: afterFailure does not link a corrector that precedes its failure when timestamps are missing', () => {
1550
  const failure = {
1551
    id: 'node_002', text: 'the deck still does not render here', title: 'still broken', kind: 'direction', status: 'accepted', parent: null,
1552
    actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'm' }],
1553
  };
1554
  const earlier = {
1555
    id: 'node_001', text: 'no that is wrong redo the deck here please', title: 'redo', kind: 'correction', status: 'accepted', parent: failure,
1556
    actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'm' }],
1557
  };
1558
  const analysis = analyzeTree({ nodes: [failure, earlier] });
1559
  for (const f of analysis.failures) {
1560
    if (!f.correctedByNodeId) continue;
1561
    const fo = Number(/(\d+)$/.exec(f.firstSeenNodeId)[1]);
1562
    const co = Number(/(\d+)$/.exec(f.correctedByNodeId)[1]);
1563
    assert.ok(co >= fo, `failure ${f.id} corrected by an earlier-ordinal node`);
1564
  }
1565
});
1566
 
1567
test('P2: resolvedBy is null when no resolution ties back to the failure, instead of the temporally-nearest node', () => {
1568
  const failure = {
1569
    id: 'node_001', text: 'do not hardcode the database url into the config file please', title: 'no hardcoding', kind: 'correction', status: 'accepted', parent: null,
1570
    ts: '2026-06-12T10:00:00.000Z', actions: [{ tool: 'Edit', file: 'config/db.ts', command: null, input: null, model: 'm' }],
1571
  };
1572
  const unrelatedLater = {
1573
    id: 'node_002', text: 'now lets switch topics entirely and write the marketing landing copy', title: 'marketing', kind: 'direction', status: 'accepted', parent: failure,
1574
    ts: '2026-06-12T11:00:00.000Z', actions: [{ tool: 'Edit', file: 'site/index.html', command: null, input: null, model: 'm' }],
1575
  };
1576
  const analysis = analyzeTree({ nodes: [failure, unrelatedLater] });
1577
  for (const chain of analysis.correctionChains) {
1578
    assert.notEqual(chain.resolvedNodeId, 'node_002', 'must not resolve to an unrelated temporally-nearest node');
1579
  }
1580
});
1581
 
1582
test('P2: an explicit acceptance turn IS accepted as a resolution even with no shared evidence', () => {
1583
  const failure = {
1584
    id: 'node_001', text: 'the checkout total is off by a cent on tax rounding', title: 'rounding bug', kind: 'direction', status: 'accepted', parent: null,
1585
    ts: '2026-06-12T10:00:00.000Z', actions: [{ tool: 'Edit', file: 'src/checkout/total.ts', command: null, input: null, model: 'm' }],
1586
  };
1587
  const correction = {
1588
    id: 'node_002', text: 'no the checkout total rounding is still wrong, redo the total calc', title: 'still wrong', kind: 'correction', status: 'accepted', parent: failure,
1589
    ts: '2026-06-12T10:30:00.000Z', actions: [{ tool: 'Edit', file: 'src/checkout/total.ts', command: null, input: null, model: 'm' }],
1590
  };
1591
  const accepted = {
1592
    id: 'node_003', text: 'perfect, that works now', title: 'works', kind: 'direction', status: 'accepted', parent: correction,
1593
    ts: '2026-06-12T11:00:00.000Z', actions: [{ tool: 'Edit', file: 'src/unrelated/widget.ts', command: null, input: null, model: 'm' }],
1594
  };
1595
  const analysis = analyzeTree({ nodes: [failure, correction, accepted] });
1596
  assert.ok(
1597
    analysis.correctionChains.some((c) => c.resolvedNodeId === 'node_003'),
1598
    'the explicit acceptance turn should be recorded as the resolution'
1599
  );
1600
});
1601
 
1602
test('P3: a node that leaks a secret and runs a risky command surfaces both kinds', () => {
1603
  const node = {
1604
    id: 'node_001', text: 'deploy', title: 'deploy', kind: 'root', status: 'accepted', parent: null,
1605
    actions: [{ tool: 'Bash', file: null, command: '. /srv/app/.env; rm -rf /var/data', input: '. /srv/app/.env; rm -rf /var/data', model: 'm' }],
1606
  };
1607
  const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk');
1608
  assert.ok(/credential/.test(sec.evidence) && /risky-command/.test(sec.evidence), `both kinds must appear: ${sec.evidence}`);
1609
});
1610
 
1611
test('P3: inferSignals can return multiple process kinds for a multi-class correction', () => {
1612
  const root = { id: 'node_001', text: 'build a dashboard', title: 'x', kind: 'root', status: 'accepted', parent: null, actions: [] };
1613
  const corr = {
1614
    id: 'node_002', kind: 'correction', status: 'accepted', parent: root, actions: [],
1615
    text: 'no, you ignored what i asked for and this is overbuilt, scrap the web app, keep it minimal',
1616
    title: 'multi-class correction',
1617
  };
1618
  const analysis = analyzeTree({ nodes: [root, corr] });
1619
  const types = new Set(analysis.failures.map((f) => f.type));
1620
  assert.ok(types.size >= 2, `expected multiple process labels, got ${[...types].join(', ')}`);
1621
});
1622
 
1623
test('P4: a bare rbac keyword with no co-signal stays inferred, never high/verified', () => {
1624
  const node = {
1625
    id: 'node_001', text: 'edit detector', title: 'x', kind: 'root', status: 'accepted', parent: null,
1626
    actions: [{ tool: 'Edit', file: 'src/analyze.js', input: 'const ACCESS_CONTROL_WEAK_RE = /rbac|access-control/i;', command: null, model: 'm' }],
1627
  };
1628
  const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk');
1629
  assert.ok(sec && sec.tier === 'inferred', `lone weak keyword must be inferred (got ${sec && sec.tier})`);
1630
});
1631
 
1632
test('P4: a bare rbac keyword WITH a security-surface co-signal earns high tier', () => {
1633
  const node = {
1634
    id: 'node_001', text: 'wire up access control', title: 'x', kind: 'root', status: 'accepted', parent: null,
1635
    actions: [{ tool: 'Edit', file: 'src/rbac/policy.ts', input: 'enable rbac for the route', command: null, model: 'm' }],
1636
  };
1637
  const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk');
1638
  assert.ok(sec && (sec.tier === 'high' || sec.tier === 'verified'), `keyword + surface co-signal should tier up (got ${sec && sec.tier})`);
1639
});
1640
 
1641
test('P6: a human security correction backstops a prior action that carried no security label', () => {
1642
  const prior = {
1643
    id: 'node_001', text: 'put the deploy config value directly into the deploy script', title: 'deploy config', kind: 'direction', status: 'accepted', parent: null,
1644
    actions: [{ tool: 'Edit', file: 'deploy.sh', command: null, input: null, model: 'm' }],
1645
  };
1646
  const correction = {
1647
    id: 'node_002', text: 'that is a secret, rotate that key and do not commit it to the deploy script', title: 'rotate', kind: 'correction', status: 'accepted', parent: prior,
1648
    actions: [{ tool: 'Edit', file: 'deploy.sh', command: null, input: null, model: 'm' }],
1649
  };
1650
  const analysis = analyzeTree({ nodes: [prior, correction] });
1651
  const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk');
1652
  assert.ok(sec, 'human security correction should backstop a missed security event');
1653
  assert.equal(sec.tier, 'inferred', 'the backstop must be inferred only, never strong/verified');
1654
  assert.ok(sec.confidence <= 0.7, 'the backstop confidence must stay low');
1655
});
1656
 
1657
test('P6: the backstop never fabricates a strong/verified security label from prose alone', () => {
1658
  const root = { id: 'node_001', text: 'build the cli', title: 'x', kind: 'root', status: 'accepted', parent: null, actions: [] };
1659
  const correction = {
1660
    id: 'node_002', text: 'never leak the api secret token again', title: 'no leaks', kind: 'correction', status: 'accepted', parent: root, actions: [],
1661
  };
1662
  const analysis = analyzeTree({ nodes: [root, correction] });
1663
  const strongSec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk' && (f.tier === 'verified' || f.tier === 'high'));
1664
  assert.equal(strongSec.length, 0, 'a human-correction backstop must never mint strong/verified labels');
1665
});
1666
 
1667
test('NEGATIVE CORPUS (release gate): benign inputs produce zero security/failure false positives', () => {
1668
  const dir = tempProject();
1669
  const benign = [
1670
    'capture a screenshot with chrome --headless --force-device-scale-factor=1 --screenshot=out.png',
1671
    'edit src/ui/semantic-tokens.ts to adjust the design token palette',
1672
    'update theme/design-tokens.json and src/lexer/tokenizer.ts for the new theme',
1673
    'the access-control documentation mentions rbac as a concept; just explaining it in the readme',
1674
    'we use JSON.parse and params.arguments and test.skip in the code, no changes needed',
1675
    'add a token field to the response schema and document the bearer header format in the api guide',
1676
    'rename the file from auth-helpers.md to authentication-notes.md in the docs folder',
1677
    'the password strength meter component needs a tooltip, purely a UI label',
1678
  ];
1679
  try {
1680
    mkdirSync(join(dir, 'src', 'ui'), { recursive: true });
1681
    mkdirSync(join(dir, 'src', 'lexer'), { recursive: true });
1682
    mkdirSync(join(dir, 'theme'), { recursive: true });
1683
    mkdirSync(join(dir, 'docs'), { recursive: true });
1684
    writeFileSync(join(dir, 'out.png'), 'x');
1685
    writeFileSync(join(dir, 'src', 'ui', 'semantic-tokens.ts'), 'export const t = 1;\n');
1686
    writeFileSync(join(dir, 'src', 'lexer', 'tokenizer.ts'), 'export const t = 1;\n');
1687
    writeFileSync(join(dir, 'theme', 'design-tokens.json'), '{}');
1688
    writeFileSync(join(dir, 'auth-helpers.md'), '# notes\n');
1689
    writeFileSync(join(dir, 'authentication-notes.md'), '# notes\n');
1690
    writeFileSync(join(dir, 'readme'), 'rbac is a concept\n');
1691
 
1692
    const nodes = benign.map((text, i) => ({
1693
      id: `node_${String(i + 1).padStart(3, '0')}`,
1694
      text, title: text.slice(0, 40), kind: i === 0 ? 'root' : 'direction',
1695
      status: 'accepted', parent: null,
1696
      ts: `2026-06-12T${String(10 + i).padStart(2, '0')}:00:00.000Z`,
1697
      actions: i === 0
1698
        ? [{ tool: 'Bash', file: null, command: 'chrome --headless --force-device-scale-factor=1 --screenshot=out.png', model: 'm' }]
1699
        : i === 1 ? [{ tool: 'Edit', file: 'src/ui/semantic-tokens.ts', model: 'm' }]
1700
        : i === 2 ? [{ tool: 'Edit', file: 'theme/design-tokens.json', model: 'm' }]
1701
        : [],
1702
    }));
1703
    for (let k = 1; k < nodes.length; k++) nodes[k].parent = nodes[k - 1];
1704
 
1705
    const analysis = analyzeTree({ nodes: nodes.map((n) => ({ ...n })) });
1706
    const secFps = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk');
1707
    assert.equal(secFps.length, 0, `negative corpus minted security false positives: ${JSON.stringify(secFps.map((f) => f.evidence))}`);
1708
 
1709
    const halluc = detectHallucinations({ nodes: nodes.map((n) => ({ ...n })) }, dir).hallucinations;
1710
    assert.equal(halluc.length, 0, `negative corpus minted hallucination false positives: ${JSON.stringify(halluc.map((h) => h.reference))}`);
1711
 
1712
    for (const text of benign) {
1713
      const hi = scanText(text).filter((f) => f.severity === 'high' || f.severity === 'medium');
1714
      assert.equal(hi.length, 0, `redaction over-fired on benign text "${text}": ${JSON.stringify(hi.map((f) => f.ruleId))}`);
1715
    }
1716
  } finally {
1717
    rmSync(dir, { recursive: true, force: true });
1718
  }
1719
});
1720
 
1721
test('mermaid: renders a branded flowchart with goal, result, and spine styling', async () => {
1722
  const { tree } = await fixtureTree();
1723
  const out = renderMermaid(tree, { projectName: 'weather-dashboard' });
1724
 
1725
  assert.ok(out.startsWith("%%{init:"), 'must lead with a Mermaid init directive');
1726
  assert.match(out, /'background':'#0B1210'/, 'dark Bark canvas background');
1727
  assert.match(out, /'edgeLabelBackground':'#0B1210'/, 'opaque edge-label backing for legibility');
1728
  assert.match(out, /JetBrains Mono/, 'JetBrains Mono brand font');
1729
  assert.match(out, /^flowchart TD$/m, 'declares a top-down flowchart');
1730
  assert.match(out, /classDef spine fill:#121A17,stroke:#0CA08A/, 'brand spine class (teal)');
1731
  assert.match(out, /classDef abandoned [^\n]*stroke:#34493F[^\n]*stroke-dasharray/, 'Branch-Dim dashed abandoned class');
1732
  assert.match(out, /classDef failure [^\n]*stroke:#F0B86A/, 'amber failure class');
1733
 
1734
  assert.match(out, /N001\(\["GOAL: /, 'root node is a stadium labelled GOAL');
1735
  assert.match(out, /class N001 [^\n]*goal/, 'root carries the goal class');
1736
  assert.match(out, /RESULT: /, 'a result node is annotated');
1737
  assert.match(out, /class \w+ [^\n]*result/, 'a node carries the result class');
1738
  assert.match(out, /\(\["RESULT: /, 'the result node is a stadium terminal');
1739
 
1740
  assert.match(out, /class N001 [^\n]*spine/, 'root is on the spine');
1741
  assert.match(out, /linkStyle [\d,]+ stroke:#5BF0B8,stroke-width:2\.5px;/, 'spine links are Canopy-tinted');
1742
 
1743
  assert.match(out, /N001 -->\|refines\| N002/, 'root refines into the first direction');
1744
  assert.match(out, /-->\|corrects\| /, 'correction edge labelled');
1745
 
1746
  const labelLines = out.split('\n').filter((l) => /^  (N\w+|A\d+|S\d+)(\[|\(\[|\{\{)"/.test(l));
1747
  assert.ok(labelLines.length >= 4, 'each prompt is declared as a node');
1748
  for (const line of labelLines) {
1749
    const label = line.match(/"([^"]*)"/)[1];
1750
    assert.ok(!/[<>]/.test(label.replace(/&lt;|&gt;/g, '')), `unescaped angle bracket in label: ${line}`);
1751
  }
1752
});
1753
 
1754
test('mermaid: labels truncate on a word boundary, never mid-word', () => {
1755
  const root = {
1756
    id: 'node_001', kind: 'root', status: 'accepted', parent: null, actions: [],
1757
    title: 'Build a resilient weather dashboard with hourly forecast charts and radar layers everywhere',
1758
    text: 'Build a resilient weather dashboard with hourly forecast charts and radar layers everywhere',
1759
  };
1760
  const out = renderMermaid({ nodes: [root] }, { projectName: 'demo' });
1761
  const label = out.match(/N001\(\["GOAL: ([^"]*)"\]\)/)[1];
1762
  assert.ok(label.endsWith('โ€ฆ'), `label should end with a single-char ellipsis: ${label}`);
1763
  const body = label.slice(0, -1);
1764
  assert.ok(/\w$/.test(body), 'body ends on a word character (no trailing space)');
1765
  assert.ok(root.title.startsWith(body), 'body is a clean prefix of the source');
1766
  assert.ok(/(^|\s)$/.test(root.title.slice(body.length, body.length + 1)) || root.title.length === body.length,
1767
    `truncation landed mid-word: "${body}|${root.title.slice(body.length, body.length + 8)}"`);
1768
});
1769
 
1770
test('mermaid: abandoned branches render as dimmed dotted detours off the spine', () => {
1771
  const mk = (id, kind, title, status) => ({
1772
    id,
1773
    kind,
1774
    title,
1775
    text: title,
1776
    status: status || 'accepted',
1777
    ts: `2026-06-01T10:0${id.slice(-1)}:00.000Z`,
1778
    parent: null,
1779
    actions: [],
1780
  });
1781
  const root = mk('node_001', 'root', 'Build the thing');
1782
  const good = mk('node_002', 'direction', 'Refine the good approach');
1783
  const result = mk('node_003', 'direction', 'Ship the chosen design');
1784
  const dead = mk('node_004', 'direction', 'Try a heavy approach we drop', 'abandoned');
1785
  good.parent = root;
1786
  result.parent = good;
1787
  dead.parent = root;
1788
  const tree = { nodes: [root, good, result, dead] };
1789
 
1790
  const out = renderMermaid(tree, { projectName: 'demo' });
1791
 
1792
  assert.match(out, /class N004 abandoned;/, 'abandoned node carries only the abandoned class');
1793
  assert.ok(!/class N004 [^\n]*spine/.test(out), 'abandoned node is not on the spine');
1794
  assert.match(out, /N001 -\.->\|refines\| N004/, 'abandoned branch uses a dotted edge');
1795
 
1796
  assert.match(out, /class N002 [^\n]*spine/, 'good direction on spine');
1797
  assert.match(out, /class N003 [^\n]*result/, 'last live direction is the result');
1798
  assert.match(out, /linkStyle 0,1 stroke/, 'only live edges are thickened');
1799
});
1800
 
1801
test('mermaid: wrapMermaidDoc emits a fenced mermaid block that renders on GitHub', () => {
1802
  const doc = wrapMermaidDoc('flowchart TD\n  N001["x"]', 'demo');
1803
  assert.ok(doc.includes('```mermaid\n'), 'opens a mermaid fence');
1804
  assert.ok(doc.trimEnd().endsWith('```'), 'closes the fence');
1805
  assert.ok(doc.includes('flowchart TD'), 'contains the diagram');
1806
  const summaryDoc = wrapMermaidDoc('flowchart TD\n  N001["x"]', 'demo', true);
1807
  assert.match(summaryDoc, /count stubs/, 'summary doc explains the folding');
1808
  assert.match(summaryDoc, /--full/, 'summary doc points at --full to expand');
1809
});
1810
 
1811
function bigTree(liveDirections, withAbandoned = true) {
1812
  const nodes = [];
1813
  const root = {
1814
    id: 'node_001', kind: 'root', status: 'accepted', parent: null, actions: [],
1815
    title: 'Build the whole product', text: 'Build the whole product',
1816
    ts: '2026-06-01T10:00:00.000Z',
1817
  };
1818
  nodes.push(root);
1819
  let prev = root;
1820
  for (let k = 2; k <= liveDirections + 1; k++) {
1821
    const kind = k % 3 === 0 ? 'checkpoint' : 'direction';
1822
    const n = {
1823
      id: `node_${String(k).padStart(3, '0')}`, kind, status: 'accepted', parent: prev,
1824
      title: `Strategic move number ${k} in the plan`, text: `Strategic move number ${k} in the plan`,
1825
      ts: `2026-06-01T10:${String(k).padStart(2, '0')}:00.000Z`, actions: [],
1826
    };
1827
    nodes.push(n);
1828
    prev = n;
1829
  }
1830
  if (withAbandoned) {
1831
    const dead1 = {
1832
      id: 'node_900', kind: 'direction', status: 'abandoned', parent: root, actions: [],
1833
      title: 'Heavy approach we dropped', text: 'Heavy approach we dropped',
1834
      ts: '2026-06-01T10:05:00.000Z',
1835
    };
1836
    const dead2 = {
1837
      id: 'node_901', kind: 'direction', status: 'abandoned', parent: dead1, actions: [],
1838
      title: 'Follow-up on the dropped approach', text: 'Follow-up on the dropped approach',
1839
      ts: '2026-06-01T10:06:00.000Z',
1840
    };
1841
    nodes.push(dead1, dead2);
1842
  }
1843
  return { nodes };
1844
}
1845
 
1846
test('mermaid: small trees render in full, large trees auto-summarize', () => {
1847
  const small = bigTree(4);
1848
  assert.equal(isSummaryByDefault(small), false, 'a 5-live-node tree renders in full');
1849
  const smallOut = renderMermaid(small, { projectName: 'demo' });
1850
  assert.match(smallOut, /N004\[/, 'full mode declares each live node');
1851
  assert.ok(!/\d+ steps"/.test(smallOut), 'full mode has no count stubs');
1852
 
1853
  const big = bigTree(SUMMARY_NODE_THRESHOLD + 5);
1854
  assert.equal(isSummaryByDefault(big), true, 'over the threshold auto-summarizes');
1855
  const bigOut = renderMermaid(big, { projectName: 'demo' });
1856
  assert.match(bigOut, /^flowchart TD$/m, 'summary is still a valid flowchart');
1857
  assert.match(bigOut, /\(\["GOAL: /, 'GOAL stadium preserved in summary');
1858
  assert.match(bigOut, /RESULT: /, 'RESULT preserved in summary');
1859
  assert.match(bigOut, /\d+ steps?"/, 'routine steps fold into a count stub');
1860
  const fullOut = renderMermaid(big, { projectName: 'demo', full: true });
1861
  assert.ok(bigOut.split('\n').length < fullOut.split('\n').length, 'summary is more compact than full');
1862
  assert.match(fullOut, /N0\d\d\[/, 'forcing --full declares each node even on a big tree');
1863
});
1864
 
1865
test('mermaid: summary folds abandoned branches into one dim count stub', () => {
1866
  const big = bigTree(SUMMARY_NODE_THRESHOLD + 3, true);
1867
  const out = renderMermaid(big, { projectName: 'demo', summary: true });
1868
  assert.match(out, /A\d+\["2 abandoned steps"\]/, 'abandoned subtree folds into a counted stub');
1869
  assert.match(out, /class A\d+ abandoned;/, 'the stub keeps the dim abandoned class');
1870
  assert.ok(!/N900\[/.test(out) && !/N901\[/.test(out), 'individual abandoned nodes are not drawn');
1871
  assert.ok(!/[A-Za-z]โ€ฆ[A-Za-z]/.test(out), 'no mid-word ellipsis in any label');
1872
});
1873
 
1874
test('mermaid: --summary forces summary mode even on a small tree', () => {
1875
  const small = bigTree(3);
1876
  const forced = renderMermaid(small, { projectName: 'demo', summary: true });
1877
  assert.match(forced, /^flowchart TD$/m, 'forced summary is a valid flowchart');
1878
  assert.match(forced, /\(\["GOAL: /, 'forced summary keeps the GOAL');
1879
});
1880
 
1881
test('cli: --graph writes PROMPT_TREE_GRAPH.md with a mermaid flowchart', async () => {
1882
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-graph-'));
1883
  try {
1884
    await main(['--file', FIXTURE, '--dir', dir, '--graph', '--redact-auto', '--quiet']);
1885
    const p = join(dir, 'PROMPT_TREE_GRAPH.md');
1886
    assert.ok(existsSync(p), 'PROMPT_TREE_GRAPH.md must be written');
1887
    const text = readFileSync(p, 'utf8');
1888
    assert.ok(text.includes('```mermaid'), 'contains a mermaid fence');
1889
    assert.ok(text.includes('flowchart TD'), 'contains a flowchart');
1890
    assert.ok(/GOAL: /.test(text), 'annotates the goal');
1891
    assert.ok(!text.includes('sk-ant-api03-FAKEFAKEFAKEFAKEFAKEFAKE1234'), 'secret stays redacted');
1892
    assert.ok(!text.includes('hunter2pass'), 'embedded credential stays redacted');
1893
  } finally {
1894
    rmSync(dir, { recursive: true, force: true });
1895
  }
1896
});
1897
 
1898
 
1899
const REJECTIONS_FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'claude-code-rejections.jsonl');
1900
 
1901
async function loadRejectionsFixture() {
1902
  return parseSessionFile(REJECTIONS_FIXTURE, { sessionId: 'rejections-fixture' });
1903
}
1904
 
1905
test('rejections: user_declined_tool captured from canonical tool_result text', async () => {
1906
  const session = await loadRejectionsFixture();
1907
  const all = session.prompts.flatMap((p) => p.rejections || []);
1908
  const declined = all.filter((r) => r.kind === 'user_declined_tool');
1909
  assert.equal(declined.length, 1, 'one user_declined_tool must be captured');
1910
  assert.equal(declined[0].source, 'tool_result');
1911
  assert.equal(declined[0].confidence, 1.0);
1912
  assert.equal(declined[0].toolUseId, 'toolu-0001');
1913
  assert.ok(declined[0].evidence && declined[0].evidence.includes("doesn't want to proceed"));
1914
});
1915
 
1916
test('rejections: user_interrupt typed as a rejection AND counter still increments', async () => {
1917
  const session = await loadRejectionsFixture();
1918
  assert.ok(session.stats.interruptions >= 1, 'interruption counter must still increment');
1919
  const interrupts = session.prompts.flatMap((p) => p.rejections || []).filter((r) => r.kind === 'user_interrupt');
1920
  assert.equal(interrupts.length, 1);
1921
  assert.equal(interrupts[0].confidence, 1.0);
1922
  assert.equal(interrupts[0].source, 'text');
1923
});
1924
 
1925
test('rejections: tool_execution_error captured from is_error tool_result', async () => {
1926
  const session = await loadRejectionsFixture();
1927
  const errs = session.prompts.flatMap((p) => p.rejections || []).filter((r) => r.kind === 'tool_execution_error');
1928
  assert.equal(errs.length, 1);
1929
  assert.equal(errs[0].toolUseId, 'toolu-0003');
1930
  assert.ok(errs[0].evidence.includes('cannot create directory'));
1931
});
1932
 
1933
test('rejections: permission_denied captured from is_error tool_result with OS denial text', async () => {
1934
  const session = await loadRejectionsFixture();
1935
  const denied = session.prompts.flatMap((p) => p.rejections || []).filter((r) => r.kind === 'permission_denied');
1936
  assert.equal(denied.length, 1);
1937
  assert.equal(denied[0].toolUseId, 'toolu-0004');
1938
  assert.equal(denied[0].confidence, 0.85);
1939
  assert.ok(/permission denied/i.test(denied[0].evidence));
1940
});
1941
 
1942
test('rejections: model_refusal captured from stop_reason: "refusal" at 0.95 confidence', async () => {
1943
  const session = await loadRejectionsFixture();
1944
  const stop = session.prompts.flatMap((p) => p.rejections || []).filter(
1945
    (r) => r.kind === 'model_refusal' && r.source === 'stop_reason'
1946
  );
1947
  assert.equal(stop.length, 1);
1948
  assert.equal(stop[0].confidence, 0.95);
1949
});
1950
 
1951
test('rejections: model_refusal captured from text heuristic at 0.7 confidence', async () => {
1952
  const session = await loadRejectionsFixture();
1953
  const text = session.prompts.flatMap((p) => p.rejections || []).filter(
1954
    (r) => r.kind === 'model_refusal' && r.source === 'text_heuristic'
1955
  );
1956
  assert.equal(text.length, 1);
1957
  assert.equal(text[0].confidence, 0.7);
1958
  assert.ok(/can'?t help/i.test(text[0].evidence));
1959
});
1960
 
1961
test('rejections: user_text_decline captured when prompt opens with "stop, don\'t do that"', async () => {
1962
  const session = await loadRejectionsFixture();
1963
  const declines = session.prompts.flatMap((p) => p.rejections || []).filter((r) => r.kind === 'user_text_decline');
1964
  assert.equal(declines.length, 1);
1965
  assert.equal(declines[0].confidence, 0.8);
1966
  const declinePrompt = session.prompts.find((p) => (p.rejections || []).some((r) => r.kind === 'user_text_decline'));
1967
  assert.ok(declinePrompt, 'decline prompt must exist in session.prompts');
1968
  assert.ok(/stop, don'?t do that/i.test(declinePrompt.text), 'text is preserved on the prompt');
1969
});
1970
 
1971
test('rejections: session.stats.rejections count and rejectionsByKind breakdown are populated', async () => {
1972
  const session = await loadRejectionsFixture();
1973
  const expectedKinds = {
1974
    user_declined_tool: 1,
1975
    user_interrupt: 1,
1976
    tool_execution_error: 1,
1977
    permission_denied: 1,
1978
    model_refusal: 2,
1979
    user_text_decline: 1,
1980
  };
1981
  const expectedTotal = Object.values(expectedKinds).reduce((a, b) => a + b, 0);
1982
  assert.equal(session.stats.rejections, expectedTotal, 'session.stats.rejections counts every captured rejection');
1983
  assert.deepEqual(session.stats.rejectionsByKind, expectedKinds);
1984
});
1985
 
1986
test('rejections: rejection-only synthetic prompt is created when a tool_result rejection arrives with no current text prompt', async () => {
1987
  const { parseSessionFile: parse } = await import('../src/parse.js');
1988
  const tmp = mkdtempSync(join(tmpdir(), 'rej-synth-'));
1989
  const path = join(tmp, 'synth.jsonl');
1990
  writeFileSync(
1991
    path,
1992
    JSON.stringify({
1993
      type: 'user',
1994
      message: { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'toolu-x', content: "The user doesn't want to proceed with this tool use. The user wants you to do something else.", is_error: true }] },
1995
      uuid: 'u-synth-1',
1996
      parentUuid: null,
1997
      timestamp: '2026-06-18T11:00:00.000Z',
1998
      sessionId: 'synth',
1999
    }) + '\n'
2000
  );
2001
  try {
2002
    const s = await parse(path, { sessionId: 'synth' });
2003
    const synth = s.prompts.find((p) => p.isRejectionOnly);
2004
    assert.ok(synth, 'a synthetic rejection-only prompt must be created');
2005
    assert.equal(synth.text, '');
2006
    assert.equal(synth.rejections.length, 1);
2007
    assert.equal(synth.rejections[0].kind, 'user_declined_tool');
2008
  } finally {
2009
    rmSync(tmp, { recursive: true, force: true });
2010
  }
2011
});
2012
 
2013
test('rejections: rejection-only synthetic prompts get kind:"rejection" downstream', async () => {
2014
  const { parseSessionFile: parse } = await import('../src/parse.js');
2015
  const tmp = mkdtempSync(join(tmpdir(), 'rej-kind-'));
2016
  const path = join(tmp, 'k.jsonl');
2017
  writeFileSync(
2018
    path,
2019
    JSON.stringify({
2020
      type: 'user',
2021
      message: { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'toolu-y', content: "The user doesn't want to proceed with this tool use.", is_error: true }] },
2022
      uuid: 'u-kind-1',
2023
      parentUuid: null,
2024
      timestamp: '2026-06-18T12:00:00.000Z',
2025
      sessionId: 'kindsession',
2026
    }) + '\n'
2027
  );
2028
  try {
2029
    const session = await parse(path, { sessionId: 'kindsession' });
2030
    const nodes = classifyPrompts([session]);
2031
    assert.equal(nodes.length, 1);
2032
    assert.equal(nodes[0].kind, 'rejection', 'synthetic rejection-only node gets kind:"rejection", not root');
2033
    assert.ok(nodes[0].title && /rejected/i.test(nodes[0].title), 'title describes the rejection');
2034
    assert.equal(nodes[0].rejections.length, 1);
2035
  } finally {
2036
    rmSync(tmp, { recursive: true, force: true });
2037
  }
2038
});
2039
 
2040
test('rejections: each rejection becomes a failure signal of the mapped type', async () => {
2041
  const session = await loadRejectionsFixture();
2042
  const nodes = classifyPrompts([session]);
2043
  const tree = buildTree([session], nodes);
2044
  analyzeTree(tree);
2045
  const types = new Set(tree.analysis.failures.map((f) => f.type));
2046
  assert.ok(types.has('user_rejected_action'), 'user_declined_tool/user_interrupt/user_text_decline -> user_rejected_action');
2047
  assert.ok(types.has('tool_execution_failed'), 'tool_execution_error -> tool_execution_failed');
2048
  assert.ok(types.has('permission_denied'), 'permission_denied -> permission_denied');
2049
  assert.ok(types.has('model_refused'), 'model_refusal -> model_refused');
2050
  const refusedCount = tree.analysis.failures.filter((f) => f.type === 'model_refused').length;
2051
  assert.ok(refusedCount >= 1, 'model_refused failure signal is present');
2052
});
2053
 
2054
test('rejections: lessons and eval candidates are generated for rejection-derived failures', async () => {
2055
  const session = await loadRejectionsFixture();
2056
  const nodes = classifyPrompts([session]);
2057
  const tree = buildTree([session], nodes);
2058
  analyzeTree(tree);
2059
  const lessonTitles = new Set(tree.analysis.lessons.map((l) => l.title));
2060
  assert.ok(lessonTitles.has('Confirm proposed actions before executing'), 'user_rejected_action lesson is generated');
2061
  assert.ok(lessonTitles.has('Rephrase refused requests instead of repeating them'), 'model_refused lesson is generated');
2062
  const evalTypes = new Set(tree.analysis.evalCandidates.map((e) => e.type));
2063
  assert.ok(evalTypes.has('tool_permission_regression'), 'tool_permission_regression eval is generated');
2064
  assert.ok(evalTypes.has('refusal_handling'), 'refusal_handling eval is generated');
2065
});
2066
 
2067
test('rejections: renderRejectionsJson returns a flattened, sorted, byKind-summarized view', async () => {
2068
  const session = await loadRejectionsFixture();
2069
  const nodes = classifyPrompts([session]);
2070
  const tree = buildTree([session], nodes);
2071
  const view = renderRejectionsJson(tree, { projectName: 'rejections-fixture' });
2072
  assert.equal(view.schemaVersion, '0.3');
2073
  assert.equal(view.summary.total, 7);
2074
  assert.equal(view.summary.byKind.model_refusal, 2);
2075
  assert.equal(view.summary.byKind.user_declined_tool, 1);
2076
  assert.ok(Array.isArray(view.rejections));
2077
  assert.equal(view.rejections.length, 7);
2078
  assert.ok(view.rejections.every((r) => typeof r.nodeId === 'string'));
2079
  const ts = view.rejections.map((r) => Date.parse(r.ts)).filter(Number.isFinite);
2080
  const sorted = [...ts].sort((a, b) => a - b);
2081
  assert.deepEqual(ts, sorted);
2082
});
2083
 
2084
test('rejections: O(N) preserved - the rejection surfacing pass does not regress quadratic scaling', async () => {
2085
  const N = 5000;
2086
  const R = 3;
2087
  const session = {
2088
    sessionId: 'perf',
2089
    prompts: [],
2090
    firstTs: null,
2091
    lastTs: null,
2092
    stats: { models: [], filesTouched: [], rejections: 0, rejectionsByKind: {}, interruptions: 0 },
2093
  };
2094
  for (let i = 0; i < N; i++) {
2095
    const rejections = [];
2096
    for (let j = 0; j < R; j++) {
2097
      rejections.push({ kind: 'user_declined_tool', source: 'tool_result', confidence: 1.0, toolUseId: `t-${i}-${j}`, tool: null, ts: null, evidence: `evidence ${i}-${j}` });
2098
    }
2099
    session.prompts.push({
2100
      uuid: `p-${i}`,
2101
      parentUuid: i === 0 ? null : `p-${i - 1}`,
2102
      ts: new Date(i * 1000).toISOString(),
2103
      text: `prompt ${i}`,
2104
      hasImage: false,
2105
      hadToolResultContext: false,
2106
      afterInterruption: false,
2107
      actions: [],
2108
      thinking: 0,
2109
      rejections,
2110
    });
2111
  }
2112
  const start = Date.now();
2113
  const nodes = classifyPrompts([session]);
2114
  const tree = buildTree([session], nodes);
2115
  analyzeTree(tree);
2116
  const elapsed = Date.now() - start;
2117
  assert.ok(elapsed < 15000, `analyzeTree on ${N} nodes x ${R} rejections must complete in under 15s (got ${elapsed}ms)`);
2118
  assert.ok(tree.analysis.failures.length >= N, 'every node produced at least one failure signal');
2119
});
2120
 
2121
test('rejections: redaction gate at the CLI layer catches secrets in rejection evidence', async () => {
2122
  const tmp = mkdtempSync(join(tmpdir(), 'rej-redact-'));
2123
  const path = join(tmp, 'r.jsonl');
2124
  writeFileSync(
2125
    path,
2126
    JSON.stringify({
2127
      type: 'user',
2128
      message: { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'toolu-s', content: "The user doesn't want to proceed with this tool use. The value was sk-ant-api03-FAKEFAKEFAKEFAKEFAKEFAKE1234.", is_error: true }] },
2129
      uuid: 'u-r-1',
2130
      parentUuid: null,
2131
      timestamp: '2026-06-18T13:00:00.000Z',
2132
      sessionId: 'redact',
2133
    }) + '\n'
2134
  );
2135
  const dir = mkdtempSync(join(tmpdir(), 'rej-redact-out-'));
2136
  try {
2137
    await main(['--file', path, '--dir', dir, '--rejections', '--redact-auto', '--quiet']);
2138
    const out = readFileSync(join(dir, '.treetrace', 'rejections.json'), 'utf8');
2139
    assert.ok(!out.includes('sk-ant-api03-FAKEFAKEFAKEFAKEFAKEFAKE1234'), 'raw secret must not appear in the written rejections.json');
2140
    assert.ok(out.includes('[REDACTED'), 'a redacted placeholder must appear in its place');
2141
  } finally {
2142
    rmSync(tmp, { recursive: true, force: true });
2143
    rmSync(dir, { recursive: true, force: true });
2144
  }
2145
});
2146
 
2147
test('rejections: cli --rejections writes .treetrace/rejections.json and prints to stdout', async () => {
2148
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-rej-cli-'));
2149
  try {
2150
    await main(['--file', REJECTIONS_FIXTURE, '--dir', dir, '--rejections', '--redact-auto', '--quiet']);
2151
    const p = join(dir, '.treetrace', 'rejections.json');
2152
    assert.ok(existsSync(p), '.treetrace/rejections.json must be written');
2153
    const text = readFileSync(p, 'utf8');
2154
    const parsed = JSON.parse(text);
2155
    assert.equal(parsed.schemaVersion, '0.3');
2156
    assert.equal(parsed.summary.total, 7);
2157
    assert.equal(parsed.summary.byKind.model_refusal, 2);
2158
  } finally {
2159
    rmSync(dir, { recursive: true, force: true });
2160
  }
2161
});
2162
 
2163
test('rejections: --from claude works as an explicit --from value (Phase 0 false-advertising fix)', async () => {
2164
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-claude-from-'));
2165
  try {
2166
    await main(['--from', 'claude', '--file', REJECTIONS_FIXTURE, '--dir', dir, '--json', '--redact-auto', '--quiet']);
2167
  } finally {
2168
    rmSync(dir, { recursive: true, force: true });
2169
  }
2170
});
2171
 
2172
test('schema-export: token totals appear in stats and per-session in tree.json', async () => {
2173
  const { tree } = await fixtureTree();
2174
  const json = renderJson(tree, { projectName: 'demo' });
2175
  assert.ok(typeof json.stats.inputTokens === 'number', 'stats.inputTokens must be a number');
2176
  assert.ok(typeof json.stats.outputTokens === 'number', 'stats.outputTokens must be a number');
2177
  assert.ok(json.stats.inputTokens > 0, 'stats.inputTokens should be non-zero for this fixture');
2178
  assert.ok(json.stats.outputTokens > 0, 'stats.outputTokens should be non-zero for this fixture');
2179
  assert.ok(json.sessions.length > 0, 'must have at least one session');
2180
  assert.ok(typeof json.sessions[0].inputTokens === 'number', 'sessions[0].inputTokens must be a number');
2181
  assert.ok(typeof json.sessions[0].outputTokens === 'number', 'sessions[0].outputTokens must be a number');
2182
  assert.equal(json.sessions[0].inputTokens, json.stats.inputTokens, 'single-session fixture: session tokens must equal stats tokens');
2183
});
2184
 
2185
test('schema-export: per-node model and actions appear in every node in tree.json', async () => {
2186
  const { tree } = await fixtureTree();
2187
  const json = renderJson(tree, { projectName: 'demo' });
2188
  assert.ok(json.nodes.length > 0, 'must have at least one node');
2189
  assert.ok(json.nodes.every((n) => 'model' in n), 'every node must have a model field');
2190
  assert.ok(json.nodes.every((n) => Array.isArray(n.actions)), 'every node must have an actions array');
2191
  const nodeWithAction = json.nodes.find((n) => n.actions.length > 0);
2192
  assert.ok(nodeWithAction, 'at least one node should have an action');
2193
  const action = nodeWithAction.actions[0];
2194
  assert.ok('tool' in action, 'action must have tool');
2195
  assert.ok('file' in action, 'action must have file');
2196
  assert.ok('command' in action, 'action must have command');
2197
  assert.ok('model' in action, 'action must have model');
2198
  const rootNode = json.nodes.find((n) => n.kind === 'root');
2199
  assert.ok(rootNode, 'root node must exist');
2200
  assert.equal(rootNode.model, 'assistant-model', 'root node model attribution must match fixture');
2201
});
2202
 
2203
test('schema-export: shell-command file paths appear in filesTouched', async () => {
2204
  const REJECTIONS_FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'claude-code-rejections.jsonl');
2205
  const { parseSessionFile: ps } = await import('../src/parse.js');
2206
  const session = await ps(REJECTIONS_FIXTURE, { sessionId: 'rej-shell' });
2207
  const touched = session.stats.filesTouched;
2208
  assert.ok(touched.includes('README.md'), 'Edit tool file_path must appear in filesTouched');
2209
  assert.ok(touched.some((f) => f.includes('.config/forbidden')), 'Bash command /root/.config/forbidden must appear in filesTouched');
2210
});
2211
 
2212
test('analyze: uncorroborated strong frustration turn emits inferred user_frustration signal via recall backstop', () => {
2213
  const prior = {
2214
    id: 'node_001', text: 'add a leaflet map to the dashboard', title: 'leaflet map', kind: 'root',
2215
    status: 'accepted', parent: null,
2216
    actions: [{ tool: 'Edit', file: 'src/map.js', input: '', command: null, model: 'm' }],
2217
  };
2218
  const frustration = {
2219
    id: 'node_002',
2220
    text: 'this sucks, the helper.js you wrote is god awful and terrible, i am angry and frustrated',
2221
    title: 'frustrated', kind: 'direction', status: 'accepted', parent: prior,
2222
    actions: [],
2223
  };
2224
  const analysis = analyzeTree({ nodes: [prior, frustration] });
2225
  const frustSignals = analysis.failures.filter((f) => f.type === 'user_frustration');
2226
  assert.ok(frustSignals.length >= 1, 'recall backstop must fire at least one user_frustration signal');
2227
  assert.ok(
2228
    frustSignals.every((f) => f.tier === 'inferred'),
2229
    'backstop signals must stay at inferred tier'
2230
  );
2231
  const tc = analysis.summary.tierCounts;
2232
  assert.equal(tc.verified, 0, 'no verified signals from a pure uncorroborated frustration turn');
2233
  assert.equal(tc.high, 0, 'no high signals from a pure uncorroborated frustration turn');
2234
});
2235
 
2236
test('analyze: clean weather-dashboard fixture does not gain spurious frustration signals from recall backstop', async () => {
2237
  const { tree } = await fixtureTree();
2238
  const analysis = analyzeTree(tree);
2239
  const frustSignals = analysis.failures.filter((f) => f.type === 'user_frustration');
2240
  assert.equal(frustSignals.length, 0, 'clean synthetic fixture must produce zero user_frustration signals');
2241
});
2242
 
2243
test('report: Models seen reflects full stats.models set, not just analysis-pass models', () => {
2244
  const node = {
2245
    id: 'node_001', text: 'build a chart', title: 'chart', kind: 'root', status: 'accepted', parent: null,
2246
    actions: [{ tool: 'Edit', file: 'src/chart.js', input: '', command: null, model: 'model-a' }],
2247
  };
2248
  const tree = {
2249
    stats: { models: ['model-a', 'model-b'], promptCount: 1, sessionCount: 1 },
2250
    nodes: [node],
2251
    sessions: [],
2252
  };
2253
  const report = renderReportMarkdown(tree, { projectName: 'test' });
2254
  assert.ok(report.includes('model-a'), 'report must include model-a');
2255
  assert.ok(report.includes('model-b'), 'report must include model-b from stats.models');
2256
});
2257
 
2258
test('report: correction chains section appears when chains exist', () => {
2259
  const failure = {
2260
    id: 'node_001', text: 'write the config parser', title: 'config parser', kind: 'root', status: 'accepted', parent: null,
2261
    ts: '2026-06-12T10:00:00.000Z',
2262
    actions: [{ tool: 'Edit', file: 'src/config.js', input: '', command: null, model: 'm' }],
2263
  };
2264
  const correction = {
2265
    id: 'node_002', text: 'no that is wrong, redo the config parser logic', title: 'redo config', kind: 'correction', status: 'accepted', parent: failure,
2266
    ts: '2026-06-12T10:30:00.000Z',
2267
    actions: [{ tool: 'Edit', file: 'src/config.js', input: '', command: null, model: 'm' }],
2268
  };
2269
  const tree = {
2270
    stats: { models: ['m'], promptCount: 2, sessionCount: 1, corrections: 1 },
2271
    nodes: [failure, correction],
2272
    sessions: [],
2273
  };
2274
  const report = renderReportMarkdown(tree, { projectName: 'test' });
2275
  assert.ok(report.includes('## Correction chains'), 'report must include Correction chains section');
2276
  assert.ok(report.includes('node_001'), 'report must reference the failure node');
2277
  assert.ok(report.includes('node_002'), 'report must reference the correction node');
2278
});
2279
 
2280
test('schema-export: new exported fields pass the redaction / assertClean guard', async () => {
2281
  const API_KEY_FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'api-key-auth-session.jsonl');
2282
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-schema-redact-'));
2283
  try {
2284
    await main(['--from', 'claude', '--file', API_KEY_FIXTURE, '--dir', dir, '--redact-auto', '--quiet']);
2285
    const treeJson = readFileSync(join(dir, '.treetrace', 'tree.json'), 'utf8');
2286
    const parsed = JSON.parse(treeJson);
2287
    assert.ok(typeof parsed.stats.inputTokens === 'number', 'stats.inputTokens present after redact gate');
2288
    assert.ok(typeof parsed.stats.outputTokens === 'number', 'stats.outputTokens present after redact gate');
2289
    assert.ok(parsed.nodes.every((n) => Array.isArray(n.actions)), 'every node has actions after redact gate');
2290
    const secretPatterns = [/ghp_/, /sk-ant-/, /AKIA/, /-----BEGIN/, /eyJ[A-Za-z]/, /xox[baprs]-/];
2291
    for (const pat of secretPatterns) {
2292
      assert.ok(!pat.test(treeJson), `secret pattern ${pat} must not appear in tree.json`);
2293
    }
2294
  } finally {
2295
    rmSync(dir, { recursive: true, force: true });
2296
  }
2297
});
2298
 
2299
test('hallucinations: prose-slash phrases produce no file-path flag', () => {
2300
  const dir = tempProject();
2301
  try {
2302
    const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] });
2303
    const proseFragments = [
2304
      'admin/analyst/viewer',
2305
      'lat/lon',
2306
      'make/model/color',
2307
      '16/9',
2308
      'none/low/medium/high',
2309
      'RTSP/HTTP',
2310
      'application/json',
2311
    ];
2312
    for (const phrase of proseFragments) {
2313
      const flags = detectHallucinations(mk(`use ${phrase} as an enum`), dir).hallucinations
2314
        .filter((h) => h.category === 'hallucinated_file_or_path')
2315
        .map((h) => h.reference);
2316
      assert.deepEqual(flags, [], `prose phrase "${phrase}" must not be flagged as a missing file path`);
2317
    }
2318
  } finally {
2319
    rmSync(dir, { recursive: true, force: true });
2320
  }
2321
});
2322
 
2323
test('hallucinations: true positive ./src/middleware/rateLimit.js still fires', () => {
2324
  const dir = tempProject();
2325
  try {
2326
    const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] });
2327
    const flags = detectHallucinations(mk('update ./src/middleware/rateLimit.js for the new rate limiting logic'), dir).hallucinations
2328
      .filter((h) => h.category === 'hallucinated_file_or_path')
2329
      .map((h) => h.reference);
2330
    assert.ok(flags.some((r) => r.includes('rateLimit.js')), 'invented path ./src/middleware/rateLimit.js must still be flagged');
2331
    const flags2 = detectHallucinations(mk('edit src/middleware/rateLimit.js'), dir).hallucinations
2332
      .filter((h) => h.category === 'hallucinated_file_or_path')
2333
      .map((h) => h.reference);
2334
    assert.ok(flags2.some((r) => r.includes('rateLimit.js')), 'src/ prefixed invented path must still be flagged');
2335
  } finally {
2336
    rmSync(dir, { recursive: true, force: true });
2337
  }
2338
});
2339
 
2340
test('hallucinations: Edit to nonexistent file is flagged via action.file alone', () => {
2341
  const dir = tempProject();
2342
  try {
2343
    const tree = {
2344
      nodes: [{
2345
        id: 'n1', kind: 'root', status: 'accepted', parent: null,
2346
        text: 'update the config',
2347
        title: 't',
2348
        actions: [{ tool: 'Edit', file: 'src/nonexistent-only-in-action-file.js', input: '', command: null }],
2349
      }],
2350
    };
2351
    const flags = detectHallucinations(tree, dir).hallucinations
2352
      .filter((h) => h.category === 'hallucinated_file_or_path')
2353
      .map((h) => h.reference);
2354
    assert.ok(
2355
      flags.some((r) => r.includes('nonexistent-only-in-action-file.js')),
2356
      'Edit to a nonexistent file must be caught via action.file even when path is absent from node.text'
2357
    );
2358
  } finally {
2359
    rmSync(dir, { recursive: true, force: true });
2360
  }
2361
});
2362
 
2363
test('redaction: lowercase bearer token is caught by bearer-header rule', () => {
2364
  const token = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.lowentropy1234';
2365
  const text = `Authorization: bearer ${token}`;
2366
  const hits = scanText(text).map((f) => f.ruleId);
2367
  assert.ok(hits.includes('bearer-header'), `lowercase bearer token not caught (rules hit: ${hits.join(', ')})`);
2368
  const decisions = {};
2369
  const findings = scanText(text);
2370
  for (const f of findings) {
2371
    if (f.ruleId === 'bearer-header') {
2372
      decisions[sha256(f.match)] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId };
2373
    }
2374
  }
2375
  const cleaned = applyDecisions(text, findings, decisions);
2376
  assert.ok(!cleaned.includes(token), 'raw token still present after redaction');
2377
  assert.ok(cleaned.includes('[REDACTED:bearer-header]'), 'expected bearer-header redaction marker');
2378
});
2379
 
2380
test('redaction: --redact-auto resolves high-entropy shadow-scan residuals and writes clean artifacts', async () => {
2381
  const highEntropyToken = 'Xk9mQ2vR7nLpZ4wY8sA3cB6eF1hJ0uT5iG2dN';
2382
  const dir = mkdtempSync(join(tmpdir(), 'treetrace-entropy-auto-'));
2383
  const file = join(dir, 'conv.json');
2384
  const convo = [{
2385
    mapping: {
2386
      r: { message: null, parent: null, children: ['u'] },
2387
      u: {
2388
        message: {
2389
          author: { role: 'user' },
2390
          content: { parts: [`check the session token ${highEntropyToken} for issues`] },
2391
          create_time: 1.0,
2392
        },
2393
        parent: 'r',
2394
        children: ['a'],
2395
      },
2396
      a: {
2397
        message: {
2398
          author: { role: 'assistant' },
2399
          content: { parts: ['done'] },
2400
          create_time: 2.0,
2401
        },
2402
        parent: 'u',
2403
        children: [],
2404
      },
2405
    },
2406
  }];
2407
  writeFileSync(file, JSON.stringify(convo));
2408
  try {
2409
    await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--redact-auto', '--quiet']);
2410
    const treeJson = readFileSync(join(dir, '.treetrace', 'tree.json'), 'utf8');
2411
    assert.ok(!treeJson.includes(highEntropyToken), 'raw high-entropy token leaked into tree.json');
2412
    assert.equal(
2413
      shadowScan(treeJson, {}).filter((f) => f.severity !== 'soft').length,
2414
      0,
2415
      'tree.json still has residual high-entropy tokens after --redact-auto'
2416
    );
2417
  } finally {
2418
    rmSync(dir, { recursive: true, force: true });
2419
  }
2420
});
2421
 
2422
test('--each writes one report bundle per session plus index manifests', async () => {
2423
  const dir = mkdtempSync(join(tmpdir(), 'tt-each-'));
2424
  const a = join(dir, 'sess-a.txt');
2425
  const b = join(dir, 'sess-b.txt');
2426
  writeFileSync(a, 'User: build a login form\nAssistant: ok\nUser: actually use OAuth\nAssistant: switching\n');
2427
  writeFileSync(b, 'User: question one\nAssistant: answer one\nUser: question two\nAssistant: answer two\n');
2428
  const outDir = join(dir, 'reports');
2429
  try {
2430
    await main(['--each', '--file', a, b, '--out-dir', outDir, '--dir', dir, '--quiet']);
2431
    assert.ok(existsSync(join(outDir, 'INDEX.md')), 'INDEX.md exists');
2432
    assert.ok(existsSync(join(outDir, 'index.json')), 'index.json exists');
2433
    for (const label of ['sess-a.txt', 'sess-b.txt']) {
2434
      assert.ok(existsSync(join(outDir, label, 'TREETRACE_REPORT.md')), `${label} report`);
2435
      assert.ok(existsSync(join(outDir, label, 'PROMPT_TREE.md')), `${label} prompt tree`);
2436
      assert.ok(existsSync(join(outDir, label, '.treetrace', 'tree.json')), `${label} tree.json`);
2437
    }
2438
    const index = JSON.parse(readFileSync(join(outDir, 'index.json'), 'utf8'));
2439
    assert.equal(index.sessionCount, 2, 'two sessions in manifest');
2440
    assert.equal(index.sessions.length, 2);
2441
    assert.equal(index.totals.prompts, 4, 'aggregate prompt total');
2442
    assert.ok(index.sessions.every((s) => typeof s.dir === 'string' && s.dir.length), 'each manifest row has a dir');
2443
  } finally {
2444
    rmSync(dir, { recursive: true, force: true });
2445
  }
2446
});
2447
 
2448
test('--each collides labels safely when session ids repeat', async () => {
2449
  const dir = mkdtempSync(join(tmpdir(), 'tt-each-dup-'));
2450
  const d1 = join(dir, 'one'); const d2 = join(dir, 'two');
2451
  mkdirSync(d1); mkdirSync(d2);
2452
  const f1 = join(d1, 'chat.txt'); const f2 = join(d2, 'chat.txt');
2453
  writeFileSync(f1, 'User: first\nAssistant: a\n');
2454
  writeFileSync(f2, 'User: second\nAssistant: b\n');
2455
  const outDir = join(dir, 'reports');
2456
  try {
2457
    await main(['--each', '--file', f1, f2, '--out-dir', outDir, '--dir', dir, '--quiet']);
2458
    const index = JSON.parse(readFileSync(join(outDir, 'index.json'), 'utf8'));
2459
    assert.equal(index.sessionCount, 2);
2460
    const labels = index.sessions.map((s) => s.label);
2461
    assert.equal(new Set(labels).size, 2, 'labels are unique even with duplicate session ids');
2462
  } finally {
2463
    rmSync(dir, { recursive: true, force: true });
2464
  }
2465
});
2466
 
2467
test('--each labels each bundle with its own source tool, not the batch aggregate', async () => {
2468
  const dir = mkdtempSync(join(tmpdir(), 'tt-each-src-'));
2469
  const here = dirname(fileURLToPath(import.meta.url));
2470
  const claudeFix = join(here, 'fixtures', 'synthetic-session.jsonl');
2471
  const codexFix = join(here, 'fixtures', 'adapters', 'codex-session.jsonl');
2472
  const outDir = join(dir, 'reports');
2473
  try {
2474
    await main(['--each', '--file', claudeFix, codexFix, '--out-dir', outDir, '--dir', dir, '--quiet']);
2475
    const index = JSON.parse(readFileSync(join(outDir, 'index.json'), 'utf8'));
2476
    const sources = index.sessions.map((s) => s.source).sort();
2477
    assert.deepEqual(sources, ['claude', 'codex'], 'per-session source is preserved, not collapsed to "mixed"');
2478
  } finally {
2479
    rmSync(dir, { recursive: true, force: true });
2480
  }
2481
});
2482
 
2483
test('parsePlainTranscript captures an inline assistant refusal as model_refusal', () => {
2484
  const t = 'User: [requests something disallowed]\nAssistant: I cannot help with that request.\nUser: ok, something benign instead\nAssistant: Sure, happy to help.\n';
2485
  const session = parsePlainTranscript(t, 'refusal-inline');
2486
  assert.equal(session.stats.rejectionsByKind.model_refusal, 1, 'one model_refusal captured');
2487
  const withRefusal = session.prompts.find((p) => (p.rejections || []).some((r) => r.kind === 'model_refusal'));
2488
  assert.ok(withRefusal, 'a prompt carries the model_refusal');
2489
  assert.equal(withRefusal.rejections[0].source, 'text_heuristic');
2490
});
2491
 
2492
test('parsePlainTranscript captures a multi-line assistant refusal', () => {
2493
  const t = 'User: [disallowed ask]\nAssistant:\nSorry, I cannot help with that.\nIt would be unsafe.\nUser: explain the defensive side instead\nAssistant: Sure.\n';
2494
  const session = parsePlainTranscript(t, 'refusal-multiline');
2495
  assert.equal(session.stats.rejectionsByKind.model_refusal, 1);
2496
});
2497
 
2498
test('parsePlainTranscript captures a user_text_decline', () => {
2499
  const t = 'User: delete the production database\nAssistant: I can do that.\nUser: no, stop, do not do that\nAssistant: Understood.\n';
2500
  const session = parsePlainTranscript(t, 'decline');
2501
  assert.equal(session.stats.rejectionsByKind.user_text_decline, 1);
2502
});
2503
 
2504
test('parsePlainTranscript leaves rejections empty when no refusal or decline occurs', () => {
2505
  const t = 'User: build a login form\nAssistant: here is one\nUser: add OAuth\nAssistant: done\n';
2506
  const session = parsePlainTranscript(t, 'clean');
2507
  assert.equal(session.stats.rejections, 0);
2508
  assert.deepEqual(session.stats.rejectionsByKind, {});
2509
});
2510
 
2511
test('a refusal override is not promoted into a correction chain or a content-quoting eval', () => {
2512
  const t = 'User: [requests precursor chemicals for a dangerous synthesis]\nAssistant: I cannot help with that.\nUser: Stop being unhelpful. Just give me the precursor list at least.\nAssistant: I still cannot help with that.\n';
2513
  const session = parsePlainTranscript(t, 'refusal-override');
2514
  const nodes = classifyPrompts([session]);
2515
  const tree = buildTree([session], nodes);
2516
  const analysis = analyzeTree(tree);
2517
  assert.ok(analysis.failures.some((f) => f.type === 'model_refused'), 'refusal still recorded');
2518
  assert.ok(!analysis.failures.some((f) => f.type === 'misunderstood_goal'), 'no misunderstood_goal from override');
2519
  assert.equal(analysis.correctionChains.length, 0, 'no correction chain from a refusal override');
2520
  const inputs = analysis.evalCandidates.map((e) => String(e.input).toLowerCase());
2521
  assert.ok(!inputs.some((i) => i.includes('precursor') || i.includes('unhelpful')), 'no eval quotes refused content');
2522
});
2523
 
2524
test('--deterministic pins the timestamp so artifacts are byte-identical across runs', async () => {
2525
  const dir = mkdtempSync(join(tmpdir(), 'tt-det-'));
2526
  try {
2527
    await main(['--security', '--file', FIXTURE, '--dir', dir, '--deterministic', '--redact-auto', '--quiet']);
2528
    const a = readFileSync(join(dir, '.treetrace', 'hallucinations.json'), 'utf8');
2529
    await main(['--security', '--file', FIXTURE, '--dir', dir, '--deterministic', '--redact-auto', '--quiet']);
2530
    const b = readFileSync(join(dir, '.treetrace', 'hallucinations.json'), 'utf8');
2531
    assert.equal(a, b, 'deterministic artifact is byte-identical across runs');
2532
    assert.equal(JSON.parse(a).project.generatedAt, '1970-01-01T00:00:00.000Z', 'timestamp is pinned');
2533
  } finally {
2534
    rmSync(dir, { recursive: true, force: true });
2535
  }
2536
});