| 1 | import { test } from 'node:test'; |
| 2 | import assert from 'node:assert/strict'; |
| 3 | import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; |
| 4 | import { tmpdir } from 'node:os'; |
| 5 | import { fileURLToPath } from 'node:url'; |
| 6 | import { dirname, join } from 'node:path'; |
| 7 | |
| 8 | import { parseSessionFile, parsePlainTranscript, classifySpecialUserText } from '../src/parse.js'; |
| 9 | import { classifyPrompts } from '../src/extract.js'; |
| 10 | import { buildTree } from '../src/tree.js'; |
| 11 | import { scanText, applyDecisions, shadowScan, maskFor, resolveFindings, isGitShaCandidate, patchResiduals } from '../src/redact.js'; |
| 12 | import { renderMarkdown, promptPack } from '../src/render-md.js'; |
| 13 | import { renderMermaid, isSummaryByDefault, SUMMARY_NODE_THRESHOLD } from '../src/render-mermaid.js'; |
| 14 | import { renderJson } from '../src/render-json.js'; |
| 15 | import { renderHandoff } from '../src/handoff.js'; |
| 16 | import { renderReportMarkdown, renderTerminalSummary } from '../src/report.js'; |
| 17 | import { |
| 18 | analyzeTree, |
| 19 | renderFailuresJson, |
| 20 | renderRejectionsJson, |
| 21 | renderLessonsMarkdown, |
| 22 | renderEvalsJsonl, |
| 23 | renderMemoryMarkdown, |
| 24 | isRiskyCommand, |
| 25 | mentionsTestSkip, |
| 26 | SECURITY_INTENT_PARTS, |
| 27 | RISKY_CMD_PARTS, |
| 28 | } from '../src/analyze.js'; |
| 29 | import { main, parseArgs, wrapMermaidDoc } from '../src/cli.js'; |
| 30 | import { mungePath } from '../src/discover.js'; |
| 31 | import { sha256, escapeMd } from '../src/util.js'; |
| 32 | import { detectHallucinations, renderHallucinationsJson } from '../src/hallucinate.js'; |
| 33 | import { renderSecurityReport, hasSecuritySignal } from '../src/security-report.js'; |
| 34 | import { spawn } from 'node:child_process'; |
| 35 | |
| 36 | const FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'synthetic-session.jsonl'); |
| 37 | |
| 38 | async function fixtureTree() { |
| 39 | const session = await parseSessionFile(FIXTURE, { sessionId: 'fix-001' }); |
| 40 | const nodes = classifyPrompts([session]); |
| 41 | return { session, nodes, tree: buildTree([session], nodes) }; |
| 42 | } |
| 43 | |
| 44 | test('parser: extracts only human prompts, skips tool results/commands/sidechains', async () => { |
| 45 | const { session } = await fixtureTree(); |
| 46 | assert.equal(session.prompts.length, 5); |
| 47 | assert.ok(session.prompts.every((p) => !p.text.startsWith('<command-name>'))); |
| 48 | assert.ok(!session.prompts.some((p) => p.text.includes('subagent'))); |
| 49 | assert.equal(session.title, 'Build a weather dashboard'); |
| 50 | assert.equal(session.stats.toolUses, 2); |
| 51 | assert.equal(session.stats.interruptions, 1); |
| 52 | assert.deepEqual(session.stats.models, ['assistant-model']); |
| 53 | assert.equal(session.stats.filesTouched.length, 1); |
| 54 | }); |
| 55 | |
| 56 | test('extractor: classification kinds and nudge folding', async () => { |
| 57 | const { nodes } = await fixtureTree(); |
| 58 | assert.equal(nodes.length, 4); |
| 59 | assert.equal(nodes[0].kind, 'root'); |
| 60 | assert.equal(nodes[0].nudges, 1); |
| 61 | assert.equal(nodes[1].kind, 'direction'); |
| 62 | assert.equal(nodes[2].kind, 'correction'); |
| 63 | assert.equal(nodes[3].kind, 'scope-change'); |
| 64 | assert.equal(nodes[3].afterInterruption, true); |
| 65 | }); |
| 66 | |
| 67 | test('tree: fork detection marks rewound branch abandoned', async () => { |
| 68 | const { tree } = await fixtureTree(); |
| 69 | const leaflet = tree.nodes.find((n) => n.text.includes('leaflet')); |
| 70 | assert.equal(leaflet.status, 'accepted'); |
| 71 | assert.equal(tree.roots.length, 1); |
| 72 | assert.equal(tree.stats.promptCount, 4); |
| 73 | assert.equal(tree.stats.corrections, 1); |
| 74 | }); |
| 75 | |
| 76 | test('redaction: catches anthropic key and basic-auth URL, masks them', async () => { |
| 77 | const { tree } = await fixtureTree(); |
| 78 | const scope = tree.nodes.find((n) => n.kind === 'scope-change'); |
| 79 | const findings = scanText(scope.text); |
| 80 | const rules = new Set(findings.map((f) => f.ruleId)); |
| 81 | assert.ok(rules.has('anthropic-key'), `anthropic-key not in ${[...rules]}`); |
| 82 | assert.ok(rules.has('url-basic-auth'), `url-basic-auth not in ${[...rules]}`); |
| 83 | |
| 84 | const { decisions } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true }); |
| 85 | const cleaned = applyDecisions(scope.text, findings, decisions); |
| 86 | assert.ok(!cleaned.includes('sk-ant-'), 'key leaked'); |
| 87 | assert.ok(!cleaned.includes('hunter2pass'), 'password leaked'); |
| 88 | assert.ok(cleaned.includes('[REDACTED:')); |
| 89 | }); |
| 90 | |
| 91 | test('redaction: shadow scan flags unresolved secrets, passes resolved/kept ones', () => { |
| 92 | const dirty = 'token ghp_0123456789abcdefghijklmnopqrstuvwxyzAB end'; |
| 93 | assert.equal(shadowScan(dirty, {}).length, 1); |
| 94 | |
| 95 | const findings = scanText(dirty); |
| 96 | const kept = { [sha256(findings[0].match)]: { action: 'keep', ruleId: findings[0].ruleId } }; |
| 97 | assert.equal(shadowScan(dirty, kept).length, 0); |
| 98 | |
| 99 | const masked = applyDecisions(dirty, findings, { |
| 100 | [sha256(findings[0].match)]: { action: 'redact', replacement: maskFor(findings[0]), ruleId: findings[0].ruleId }, |
| 101 | }); |
| 102 | assert.equal(shadowScan(masked, {}).length, 0); |
| 103 | }); |
| 104 | |
| 105 | test('redaction: rule coverage on known formats', () => { |
| 106 | const cases = [ |
| 107 | ['AKIAIOSFODNN7EXAMPLE', 'aws-access-key'], |
| 108 | ['github_pat_11AAAAAAA0123456789abcdefghij', 'github-fine-grained'], |
| 109 | ['xoxb-treetrace-example-slack-token-0', 'slack-token'], |
| 110 | ['sk_live_abcdefghijklmnop123', 'stripe-live-key'], |
| 111 | ['tskey-auth-kFGiAS7CNTRL-abcdef123456', 'tailscale-key'], |
| 112 | ['-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaA==\n-----END OPENSSH PRIVATE KEY-----', 'private-key-block'], |
| 113 | ['eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U', 'jwt'], |
| 114 | ['password = "correct-horse-battery"', 'secret-assignment'], |
| 115 | ['SECRET="correct horse battery staple"', 'secret-assignment'], |
| 116 | ['https://user:p:a:ss@example.com/path', 'url-basic-auth'], |
| 117 | ]; |
| 118 | for (const [sample, expected] of cases) { |
| 119 | const hits = scanText(`some text ${sample} more text`).map((f) => f.ruleId); |
| 120 | assert.ok(hits.includes(expected), `${expected} missed in: ${sample} (got ${hits})`); |
| 121 | } |
| 122 | }); |
| 123 | |
| 124 | test('redaction: escaped characters inside quoted secret assignments are still caught', () => { |
| 125 | const cases = [ |
| 126 | ['escaped newline', '{"api_key":"line1\\nline2line2"}'], |
| 127 | ['escaped tab', '{"api_key":"col1\\tcol2value"}'], |
| 128 | ['escaped quote', '{"api_key":"abc\\"defghij"}'], |
| 129 | ['escaped backslash', '{"api_key":"abc\\\\defghij"}'], |
| 130 | ['single-quoted escaped newline', "{'password':'line1\\nline2value'}"], |
| 131 | ['backtick escaped newline', 'const secret = `line1\\nline2value`;'], |
| 132 | ]; |
| 133 | for (const [label, sample] of cases) { |
| 134 | const hits = scanText(sample).map((f) => f.ruleId); |
| 135 | assert.ok( |
| 136 | hits.includes('secret-assignment'), |
| 137 | `${label}: escaped secret value should be caught (got ${JSON.stringify(hits)} for ${sample})` |
| 138 | ); |
| 139 | } |
| 140 | }); |
| 141 | |
| 142 | test('redaction: end-to-end escaped-JSON secret leaves no raw value in any artifact', async () => { |
| 143 | const rawValue = 'line1\\nline2line2line2'; |
| 144 | const secretLine = `config is {"api_key":"${rawValue}"}`; |
| 145 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-esc-')); |
| 146 | const file = join(dir, 'escconv.json'); |
| 147 | const convo = [{ |
| 148 | mapping: { |
| 149 | r: { message: null, parent: null, children: ['u'] }, |
| 150 | u: { message: { author: { role: 'user' }, content: { parts: [secretLine] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 151 | a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] }, |
| 152 | }, |
| 153 | }]; |
| 154 | writeFileSync(file, JSON.stringify(convo)); |
| 155 | try { |
| 156 | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']); |
| 157 | const artifacts = [ |
| 158 | 'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json', |
| 159 | '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md', |
| 160 | ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n'); |
| 161 | assert.ok(!artifacts.includes(rawValue), 'raw escaped-JSON secret value leaked into an artifact'); |
| 162 | assert.ok(artifacts.includes('[REDACTED:secret-assignment]'), 'expected a secret-assignment redaction marker'); |
| 163 | } finally { |
| 164 | rmSync(dir, { recursive: true, force: true }); |
| 165 | } |
| 166 | }); |
| 167 | |
| 168 | test('redaction: bare hex tokens (32+ chars) are detected, lower and upper case', async () => { |
| 169 | const lower = '6881f8290266f4cc939959917f893a2a88787eb24bbcb6b9c37594c72bf448c3'; |
| 170 | const upper = lower.toUpperCase(); |
| 171 | const half = lower.slice(0, 32); |
| 172 | for (const hex of [lower, upper, half]) { |
| 173 | const hits = scanText(`my key is session_hex=${hex} ok`).map((f) => f.ruleId); |
| 174 | assert.ok(hits.includes('hex-token'), `hex-token missed for ${hex} (got ${hits})`); |
| 175 | } |
| 176 | const findings = scanText(`session_hex=${lower}`); |
| 177 | const { decisions } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true }); |
| 178 | const cleaned = applyDecisions(`session_hex=${lower}`, findings, decisions); |
| 179 | assert.ok(!cleaned.includes(lower), 'raw hex leaked after redaction'); |
| 180 | assert.equal(shadowScan(cleaned, {}).length, 0, 'shadow scan should be clean after hex redaction'); |
| 181 | }); |
| 182 | |
| 183 | test('redaction: high-entropy lowercase-and-digit token (no uppercase) is caught in prose', () => { |
| 184 | const token = 'abcdefg0123456789hijklmnop4567qrstuv'; |
| 185 | const hits = scanText(`the access token is ${token} now`).map((f) => f.ruleId); |
| 186 | assert.ok(hits.includes('high-entropy-token'), `high-entropy token missed (got ${hits})`); |
| 187 | }); |
| 188 | |
| 189 | test('redaction: uuids and long lowercase identifiers are not flagged as high-entropy', () => { |
| 190 | for (const benign of [ |
| 191 | '8400e29b-1d4f-4a6c-9b2e-7f3a1c5d8e90', |
| 192 | 'src/components/dashboard/widgets/chartwidget', |
| 193 | 'MAX_RETRY_ATTEMPTS_BEFORE_GIVING_UP_2', |
| 194 | ]) { |
| 195 | const hits = scanText(benign).filter((f) => f.ruleId === 'high-entropy-token'); |
| 196 | assert.equal(hits.length, 0, `false positive high-entropy flag on ${benign}`); |
| 197 | } |
| 198 | }); |
| 199 | |
| 200 | test('redaction: git object hashes are classified as candidates only in a git context', () => { |
| 201 | const sha1 = '0123456789abcdef0123456789abcdef01234567'; |
| 202 | const sha256hex = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'; |
| 203 | assert.ok(isGitShaCandidate(sha1, `commit ${sha1}`, 7), 'commit <sha1> should be a candidate'); |
| 204 | assert.ok(isGitShaCandidate(sha256hex, `git tree ${sha256hex}`, 9), 'git tree <sha256> should be a candidate'); |
| 205 | assert.ok(isGitShaCandidate(sha1, `${sha1} fix the parser\n`, 0), 'oneline sha should be a candidate'); |
| 206 | assert.ok(!isGitShaCandidate(sha1, `token=${sha1} end`, 6), 'token= context is not git'); |
| 207 | assert.ok(!isGitShaCandidate(sha256hex, `session_hex=${sha256hex}`, 12), 'session_hex= context is not git'); |
| 208 | assert.ok(!isGitShaCandidate('0123456789abcdef0123456789abcdef', `commit ${'0123456789abcdef0123456789abcdef'}`, 7), '32-hex is not a git object id'); |
| 209 | }); |
| 210 | |
| 211 | test('redaction: --keep-git-shas keeps git hashes but stays fail-closed for other hex', async () => { |
| 212 | const sha1 = '0123456789abcdef0123456789abcdef01234567'; |
| 213 | const secret = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'; |
| 214 | const text = `commit ${sha1}\nmy key is session_hex=${secret} ok`; |
| 215 | const findings = scanText(text); |
| 216 | const git = findings.find((f) => f.match === sha1); |
| 217 | const sec = findings.find((f) => f.match === secret); |
| 218 | assert.ok(git && git.gitShaCandidate, 'git sha must be flagged as a candidate'); |
| 219 | assert.ok(sec && !sec.gitShaCandidate, 'session_hex secret must NOT be a git candidate'); |
| 220 | |
| 221 | const { decisions } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true, keepGitShas: true }); |
| 222 | assert.equal(decisions[sha256(sha1)].action, 'keep', 'git object hash should be kept'); |
| 223 | assert.equal(decisions[sha256(sha1)].ruleId, 'git-commit-sha', 'kept under git-commit-sha rule'); |
| 224 | assert.equal(decisions[sha256(secret)].action, 'redact', 'non-git hex must still be redacted'); |
| 225 | |
| 226 | const { decisions: d2 } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true }); |
| 227 | assert.equal(d2[sha256(sha1)].action, 'redact', 'default must redact git sha too (fail-closed)'); |
| 228 | |
| 229 | const cleaned = applyDecisions(text, findings, decisions); |
| 230 | assert.ok(cleaned.includes(sha1), 'kept git sha should survive in output'); |
| 231 | assert.ok(!cleaned.includes(secret), 'non-git secret must be redacted'); |
| 232 | assert.equal(shadowScan(cleaned, decisions).length, 0, 'shadow scan must be clean after keep + redact'); |
| 233 | }); |
| 234 | |
| 235 | test('redaction: end-to-end hex secret leaves no raw hex in any artifact', async () => { |
| 236 | const lower = '6881f8290266f4cc939959917f893a2a88787eb24bbcb6b9c37594c72bf448c3'; |
| 237 | const upper = lower.toUpperCase(); |
| 238 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-hex-')); |
| 239 | const file = join(dir, 'hexconv.json'); |
| 240 | const convo = [{ |
| 241 | mapping: { |
| 242 | r: { message: null, parent: null, children: ['u'] }, |
| 243 | u: { message: { author: { role: 'user' }, content: { parts: [`my key is session_hex=${lower} and HEX=${upper} ok`] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 244 | a: { message: { author: { role: 'assistant' }, content: { parts: ['got it'] }, create_time: 2.0 }, parent: 'u', children: [] }, |
| 245 | }, |
| 246 | }]; |
| 247 | writeFileSync(file, JSON.stringify(convo)); |
| 248 | try { |
| 249 | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']); |
| 250 | const artifacts = [ |
| 251 | 'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json', |
| 252 | '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md', |
| 253 | ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n'); |
| 254 | assert.ok(!artifacts.includes(lower), 'lowercase hex secret leaked into an artifact'); |
| 255 | assert.ok(!artifacts.includes(upper), 'uppercase hex secret leaked into an artifact'); |
| 256 | assert.ok(artifacts.includes('[REDACTED:hex-token]'), 'expected a hex-token redaction marker'); |
| 257 | } finally { |
| 258 | rmSync(dir, { recursive: true, force: true }); |
| 259 | } |
| 260 | }); |
| 261 | |
| 262 | test('redaction: a single 12MB token completes without throwing and stays safe', () => { |
| 263 | const giant = 'A'.repeat(12 * 1024 * 1024); |
| 264 | const text = `prefix ${giant} suffix`; |
| 265 | let findings; |
| 266 | assert.doesNotThrow(() => { findings = scanText(text); }, 'oversized token must not overflow the regex stack'); |
| 267 | assert.ok(findings.some((f) => f.ruleId === 'oversized-token'), 'oversized token should be flagged'); |
| 268 | const normal = scanText('store ghp_0123456789abcdefghijklmnopqrstuvwxyzAB and more'); |
| 269 | assert.ok(normal.some((f) => f.ruleId === 'github-token'), 'normal-size secrets still caught alongside the guard'); |
| 270 | const { decisions } = applyDecisionsRoundTrip(text, findings); |
| 271 | assert.equal(shadowScan(decisions, {}).length, 0, 'oversized token should be cleaned after redaction'); |
| 272 | }); |
| 273 | |
| 274 | function applyDecisionsRoundTrip(text, findings) { |
| 275 | const map = {}; |
| 276 | for (const f of findings) map[sha256(f.match)] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId }; |
| 277 | return { decisions: applyDecisions(text, findings, map) }; |
| 278 | } |
| 279 | |
| 280 | test('redaction: split provider tokens are caught before shadow scan', () => { |
| 281 | const dirty = 'token sk-proj-abcdefghijklmnop\nqrstu1234567890ABCDE end'; |
| 282 | const findings = scanText(dirty); |
| 283 | assert.ok(findings.some((f) => f.ruleId === 'openai-key'), `openai-key missed in ${findings}`); |
| 284 | const masked = applyDecisions(dirty, findings, { |
| 285 | [sha256(findings.find((f) => f.ruleId === 'openai-key').match)]: { |
| 286 | action: 'redact', |
| 287 | replacement: '[REDACTED:openai-key]', |
| 288 | ruleId: 'openai-key', |
| 289 | }, |
| 290 | }); |
| 291 | assert.equal(shadowScan(masked, {}).length, 0); |
| 292 | assert.ok(!masked.includes('sk-proj-')); |
| 293 | }); |
| 294 | |
| 295 | test('redaction: whitespace-split secret below the length floor is caught', () => { |
| 296 | const dirty = 'store key sk-ant-api03-AAAA BBBBCCCCDDDDEEEEFFFFGGGG into the vault'; |
| 297 | const findings = scanText(dirty); |
| 298 | const hit = findings.find((f) => f.ruleId === 'anthropic-key'); |
| 299 | assert.ok(hit, `split anthropic-key missed: ${JSON.stringify(findings)}`); |
| 300 | const masked = applyDecisions(dirty, findings, { |
| 301 | [sha256(hit.match)]: { action: 'redact', replacement: '[REDACTED:anthropic-key]', ruleId: 'anthropic-key' }, |
| 302 | }); |
| 303 | assert.ok(!/sk-ant-api03-AAAA/.test(masked), `secret not redacted: ${masked}`); |
| 304 | assert.equal(shadowScan(masked, {}).length, 0); |
| 305 | }); |
| 306 | |
| 307 | test('redaction: scan stays fast on long benign input (ReDoS guard)', () => { |
| 308 | const big = 'http://' + 'a'.repeat(60000); |
| 309 | const start = Date.now(); |
| 310 | scanText(big); |
| 311 | assert.ok(Date.now() - start < 2000, 'scan should stay linear on long input'); |
| 312 | }); |
| 313 | |
| 314 | test('redaction: benign text produces no high/medium findings', () => { |
| 315 | const benign = |
| 316 | 'Refactor the parser in src/parse.js to handle commit 3f2a1b9 and bump to v2.1.0-beta.3. The README.md needs a section on CONTRIBUTING.'; |
| 317 | const hard = scanText(benign).filter((f) => f.severity !== 'soft'); |
| 318 | assert.deepEqual(hard, []); |
| 319 | }); |
| 320 | |
| 321 | test('escapeMd neutralizes HTML-sensitive characters', () => { |
| 322 | assert.equal(escapeMd('a<script>b</script>&c>'), 'a<script>b</script>&c>'); |
| 323 | }); |
| 324 | |
| 325 | test('rendering escapes injection in project name and content', async () => { |
| 326 | const { tree } = await fixtureTree(); |
| 327 | const md = renderMarkdown(tree, { projectName: 'x</summary></details><script>alert(1)</script>' }); |
| 328 | assert.ok(md.includes('# Prompt Tree: x</summary></details><script>'), 'project name not escaped'); |
| 329 | assert.ok(!md.includes('Prompt Tree: x</summary>'), 'raw HTML in project name'); |
| 330 | }); |
| 331 | |
| 332 | test('renderers: markdown, json, handoff are consistent and footer-credited', async () => { |
| 333 | const { tree } = await fixtureTree(); |
| 334 | analyzeTree(tree); |
| 335 | const md = renderMarkdown(tree, { projectName: 'demo' }); |
| 336 | assert.ok(md.startsWith('# Prompt Tree: demo')); |
| 337 | assert.ok(md.includes('## Goal')); |
| 338 | assert.ok(md.includes('## Reusable Prompt Pack')); |
| 339 | assert.ok(md.includes('[treetrace]')); |
| 340 | |
| 341 | const json = renderJson(tree, { projectName: 'demo' }); |
| 342 | assert.equal(json.schemaVersion, '0.3'); |
| 343 | assert.equal(json.nodes.length, tree.nodes.length); |
| 344 | assert.equal(json.edges.length, tree.nodes.filter((n) => n.parent).length); |
| 345 | assert.ok(json.nodes.every((n) => n.id && n.kind && typeof n.text === 'string')); |
| 346 | assert.ok(json.analysis.failureSignals >= 1); |
| 347 | assert.ok(json.correctionChains.length >= 1); |
| 348 | assert.ok(json.nodes.some((n) => Array.isArray(n.failureSignals))); |
| 349 | |
| 350 | const pack = promptPack(tree.nodes); |
| 351 | assert.ok(pack.includes('1.')); |
| 352 | |
| 353 | const handoff = renderHandoff(tree, { projectName: 'demo' }); |
| 354 | assert.ok(handoff.includes('## Original goal')); |
| 355 | assert.ok(handoff.includes('## Constraints')); |
| 356 | assert.ok(handoff.includes('## Lessons')); |
| 357 | |
| 358 | const report = renderReportMarkdown(tree, { projectName: 'demo', generatedAt: '2026-01-01T00:00:00.000Z' }); |
| 359 | assert.ok(report.startsWith('# TreeTrace Report - demo')); |
| 360 | assert.ok(report.includes('## Output map')); |
| 361 | assert.ok(report.includes('## Artifacts')); |
| 362 | assert.ok(report.includes('TREETRACE_REPORT.md')); |
| 363 | }); |
| 364 | |
| 365 | test('rendering: markdown footer stamps the tool version when provided', async () => { |
| 366 | const { tree } = await fixtureTree(); |
| 367 | const md = renderMarkdown(tree, { projectName: 'demo', version: '0.4.0' }); |
| 368 | assert.ok(md.includes('v0.4.0'), 'PROMPT_TREE.md footer should stamp the version'); |
| 369 | const report = renderReportMarkdown(tree, { projectName: 'demo', version: '0.4.0', generatedAt: '2026-01-01T00:00:00.000Z' }); |
| 370 | assert.ok(report.includes('v0.4.0'), 'TREETRACE_REPORT.md footer should stamp the version'); |
| 371 | }); |
| 372 | |
| 373 | test('analysis renderers produce failures, lessons, evals, and memory', async () => { |
| 374 | const { tree } = await fixtureTree(); |
| 375 | const failures = renderFailuresJson(tree, { projectName: 'demo', generatedAt: '2026-01-01T00:00:00.000Z' }); |
| 376 | assert.equal(failures.schemaVersion, '0.3'); |
| 377 | assert.ok(failures.failures.length >= 1); |
| 378 | assert.ok(failures.correctionChains.length >= 1); |
| 379 | |
| 380 | const lessons = renderLessonsMarkdown(tree, { projectName: 'demo' }); |
| 381 | assert.ok(lessons.includes('# Lessons')); |
| 382 | assert.ok(/\[node_\w+/.test(lessons), 'lessons should inline node ids in brackets'); |
| 383 | |
| 384 | const evals = renderEvalsJsonl(tree).trim().split('\n').map((line) => JSON.parse(line)); |
| 385 | assert.ok(evals.length >= 1); |
| 386 | assert.ok(evals.every((e) => e.source === 'treetrace' && e.sourceNodeIds.length >= 1)); |
| 387 | |
| 388 | const memory = renderMemoryMarkdown(tree, { projectName: 'demo' }); |
| 389 | assert.ok(!memory.includes('TreeTrace Agent Memory'), 'H1 title removed in diet'); |
| 390 | assert.ok(memory.includes('## Constraints'), 'compact constraints header'); |
| 391 | assert.ok(!memory.includes('Keep TreeTrace local-first')); |
| 392 | }); |
| 393 | |
| 394 | test('analysis: tiny transcript without corrections does not invent failures', () => { |
| 395 | const session = parsePlainTranscript('User: build a tiny CLI\nAssistant: done', 'tiny'); |
| 396 | const nodes = classifyPrompts([session]); |
| 397 | const tree = buildTree([session], nodes); |
| 398 | const analysis = analyzeTree(tree); |
| 399 | assert.equal(analysis.summary.totalFailureSignals, 0); |
| 400 | assert.deepEqual(analysis.failures, []); |
| 401 | }); |
| 402 | |
| 403 | test('analysis: a security-sensitive agent action produces a verified, model-attributed signal', () => { |
| 404 | const root = { |
| 405 | id: 'node_001', text: 'Add rate limiting to checkout', title: 'Add rate limiting to checkout', |
| 406 | kind: 'root', status: 'accepted', parent: null, |
| 407 | actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'claude-sonnet-4-6' }], |
| 408 | }; |
| 409 | const correction = { |
| 410 | id: 'node_002', text: 'check the existing auth flow first', title: 'check the existing auth flow first', |
| 411 | kind: 'correction', status: 'accepted', parent: root, actions: [], |
| 412 | }; |
| 413 | const analysis = analyzeTree({ nodes: [root, correction] }); |
| 414 | const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 415 | assert.ok(sec, 'expected a verified security signal from the auth-file edit'); |
| 416 | assert.equal(sec.tier, 'verified'); |
| 417 | assert.equal(sec.model, 'claude-sonnet-4-6'); |
| 418 | assert.equal(sec.correctedByNodeId, 'node_002'); |
| 419 | assert.ok(sec.evidence.includes('session.ts')); |
| 420 | assert.deepEqual(analysis.summary.models, ['claude-sonnet-4-6']); |
| 421 | assert.ok(analysis.summary.tierCounts.verified >= 1); |
| 422 | }); |
| 423 | |
| 424 | test('analysis: a credential-handling Bash action produces a verified security signal', () => { |
| 425 | const root = { |
| 426 | id: 'node_001', text: 'deploy the marketing site', title: 'deploy the marketing site', |
| 427 | kind: 'root', status: 'accepted', parent: null, |
| 428 | actions: [{ |
| 429 | tool: 'Bash', file: null, |
| 430 | command: 'set -a; . /srv/app/.env; export CLOUDFLARE_API_KEY="$DEPLOY_API_KEY"; wrangler pages deploy site', |
| 431 | input: 'set -a; . /srv/app/.env; export CLOUDFLARE_API_KEY="$DEPLOY_API_KEY"; wrangler pages deploy site', |
| 432 | model: 'claude-opus-4-8', |
| 433 | }], |
| 434 | }; |
| 435 | const analysis = analyzeTree({ nodes: [root] }); |
| 436 | const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 437 | assert.ok(sec, 'expected a security signal from the credential-handling deploy'); |
| 438 | assert.equal(sec.tier, 'verified'); |
| 439 | assert.ok(/credential/.test(sec.evidence), 'evidence should name the credential kind'); |
| 440 | assert.ok(analysis.summary.tierCounts.verified >= 1); |
| 441 | }); |
| 442 | |
| 443 | test('analysis: benign --force-* chrome flag does not mint a verified security signal', () => { |
| 444 | const root = { |
| 445 | id: 'node_001', text: 'capture a screenshot of the page', title: 'capture a screenshot', |
| 446 | kind: 'root', status: 'accepted', parent: null, |
| 447 | actions: [{ tool: 'Bash', file: null, command: 'chrome --headless --force-device-scale-factor=1 --screenshot=out.png', model: 'm' }], |
| 448 | }; |
| 449 | const analysis = analyzeTree({ nodes: [root] }); |
| 450 | const sec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk'); |
| 451 | assert.equal(sec.length, 0, '--force-device-scale-factor must not fire as a security risk'); |
| 452 | }); |
| 453 | |
| 454 | test('analysis: a token-named UI file does not mint a verified credential signal', () => { |
| 455 | for (const file of ['src/ui/semantic-tokens.ts', 'src/lexer/tokenizer.ts', 'theme/design-tokens.json']) { |
| 456 | const root = { |
| 457 | id: 'node_001', text: 'edit the theme', title: 'edit the theme', |
| 458 | kind: 'root', status: 'accepted', parent: null, |
| 459 | actions: [{ tool: 'Edit', file, command: null, model: 'm' }], |
| 460 | }; |
| 461 | const analysis = analyzeTree({ nodes: [root] }); |
| 462 | const verified = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk' && f.tier === 'verified'); |
| 463 | assert.equal(verified.length, 0, `${file} must not produce a verified credential signal`); |
| 464 | } |
| 465 | }); |
| 466 | |
| 467 | test('analysis: a bare rbac keyword in a non-credential edit is down-tiered below verified', () => { |
| 468 | const root = { |
| 469 | id: 'node_001', text: 'edit the detector', title: 'edit the detector', |
| 470 | kind: 'root', status: 'accepted', parent: null, |
| 471 | actions: [{ tool: 'Edit', file: 'src/analyze.js', input: 'const ACCESS = /rbac/i;', command: null, model: 'm' }], |
| 472 | }; |
| 473 | const analysis = analyzeTree({ nodes: [root] }); |
| 474 | const sec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk'); |
| 475 | assert.ok(sec.every((f) => f.tier !== 'verified' && f.confidence < 0.95), 'bare rbac keyword must not be verified/0.95'); |
| 476 | }); |
| 477 | |
| 478 | test('analysis: a real credential file and a real secret command still verify at 0.95', () => { |
| 479 | const fileNode = { |
| 480 | id: 'node_001', text: 'harden auth', title: 'harden auth', kind: 'root', status: 'accepted', parent: null, |
| 481 | actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'm' }], |
| 482 | }; |
| 483 | const fileSec = analyzeTree({ nodes: [fileNode] }).failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 484 | assert.ok(fileSec && fileSec.tier === 'verified' && fileSec.confidence === 0.95, 'a genuine auth file must stay verified'); |
| 485 | |
| 486 | const cmdNode = { |
| 487 | id: 'node_001', text: 'deploy', title: 'deploy', kind: 'root', status: 'accepted', parent: null, |
| 488 | actions: [{ tool: 'Bash', file: null, command: '. /srv/app/.env; wrangler pages deploy', input: '. /srv/app/.env; wrangler pages deploy', model: 'm' }], |
| 489 | }; |
| 490 | const cmdSec = analyzeTree({ nodes: [cmdNode] }).failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 491 | assert.ok(cmdSec && cmdSec.tier === 'verified', 'a genuine credential command must stay verified'); |
| 492 | }); |
| 493 | |
| 494 | test('analysis: a PAT-update prompt produces an inferred security signal even with no action', () => { |
| 495 | const root = { id: 'node_001', text: 'build the cli', title: 'build the cli', kind: 'root', status: 'accepted', parent: null, actions: [] }; |
| 496 | const intent = { |
| 497 | id: 'node_002', text: 'I updated the PAT in the master access ref doc', title: 'I updated the PAT', |
| 498 | kind: 'direction', status: 'accepted', parent: root, actions: [], |
| 499 | }; |
| 500 | const analysis = analyzeTree({ nodes: [root, intent] }); |
| 501 | const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk' && f.firstSeenNodeId === 'node_002'); |
| 502 | assert.ok(sec, 'expected an inferred security signal from the PAT-update prompt'); |
| 503 | assert.equal(sec.tier, 'inferred'); |
| 504 | const memory = renderMemoryMarkdown({ nodes: [root, intent] }); |
| 505 | assert.ok(memory.includes('## Security'), 'memory should list the security section'); |
| 506 | assert.ok(/stated intent/.test(memory), 'memory should tag the stated intent'); |
| 507 | }); |
| 508 | |
| 509 | test('analysis: a long pasted spec listing security categories does not over-fire as intent', () => { |
| 510 | const root = { id: 'node_001', text: 'build the cli', title: 'build the cli', kind: 'root', status: 'accepted', parent: null, actions: [] }; |
| 511 | const seed = |
| 512 | 'Here is the full product spec to read and react to. '.repeat(20) + |
| 513 | 'The detector flags when an agent changed auth logic, touched secrets, modified access control, or disabled tests. ' + |
| 514 | 'More pitch copy about water, compute, investors, and the cloud. '.repeat(20); |
| 515 | const pitch = { id: 'node_002', text: seed, title: 'pasted spec', kind: 'checkpoint', status: 'accepted', parent: root, actions: [] }; |
| 516 | const analysis = analyzeTree({ nodes: [root, pitch] }); |
| 517 | const sec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk'); |
| 518 | assert.equal(sec.length, 0, 'a long pasted spec should not mint a stated-intent security signal'); |
| 519 | }); |
| 520 | |
| 521 | test('analysis: the constraints section extracts directive requirements and never reports none when constraints exist', () => { |
| 522 | const root = { id: 'node_001', text: 'build the cli', title: 'build the cli', kind: 'root', status: 'accepted', parent: null, actions: [] }; |
| 523 | const rule = { |
| 524 | id: 'node_002', |
| 525 | text: 'no em dashes and do not add inline code comments, and keep it Apache licensed', |
| 526 | title: 'no em dashes', kind: 'direction', status: 'accepted', parent: root, actions: [], |
| 527 | }; |
| 528 | const memory = renderMemoryMarkdown({ nodes: [root, rule] }); |
| 529 | const block = memory.slice(memory.indexOf('## Constraints'), memory.indexOf('## Lessons')); |
| 530 | assert.ok(/no em dashes/i.test(block), 'em-dash constraint should be listed'); |
| 531 | assert.ok(/inline code comments/i.test(block), 'inline-comment constraint should be listed'); |
| 532 | assert.ok(/apache/i.test(block), 'license constraint should be listed'); |
| 533 | assert.ok(!/No explicit constraints were flagged/.test(block), 'must not claim none when constraints exist'); |
| 534 | }); |
| 535 | |
| 536 | test('analysis: a benign descriptive prompt with no directive yields no false constraints', () => { |
| 537 | const root = { id: 'node_001', text: 'build the cli', title: 'build the cli', kind: 'root', status: 'accepted', parent: null, actions: [] }; |
| 538 | const benign = { |
| 539 | id: 'node_002', text: 'I like where we stand so far and I think this looks good to me', |
| 540 | title: 'looks good', kind: 'direction', status: 'accepted', parent: root, actions: [], |
| 541 | }; |
| 542 | const memory = renderMemoryMarkdown({ nodes: [root, benign] }); |
| 543 | assert.ok(!memory.includes('## Constraints'), 'benign descriptive text should not mint constraints'); |
| 544 | }); |
| 545 | |
| 546 | test('analysis: a destructive-then-recovery turn yields a known bad path and is not the preferred next work', () => { |
| 547 | const root = { id: 'node_001', text: 'build the marketing deck', title: 'build the marketing deck', kind: 'root', status: 'accepted', parent: null, actions: [] }; |
| 548 | const direction = { |
| 549 | id: 'node_002', text: 'Also you can send an agent out to develop these sections', |
| 550 | title: 'send an agent out to develop these sections', kind: 'direction', status: 'accepted', parent: root, actions: [], |
| 551 | }; |
| 552 | const mishap = { |
| 553 | id: 'node_003', text: 'Also messed up the deck file in the P:/ it is gone I am sorry can you bring it back', |
| 554 | title: 'Also messed up the deck file in the P:/ it is gone I am sorry can you bring it back', |
| 555 | kind: 'direction', status: 'accepted', parent: direction, |
| 556 | actions: [{ tool: 'Write', file: 'P:/deck/index.html' }], |
| 557 | }; |
| 558 | const nodes = [root, direction, mishap]; |
| 559 | const analysis = analyzeTree({ nodes }); |
| 560 | const bad = analysis.failures.filter((f) => f.type === 'abandoned_path'); |
| 561 | assert.ok(bad.length >= 1, 'destructive-then-recovery should produce a bad-path entry'); |
| 562 | const memory = renderMemoryMarkdown({ nodes }); |
| 563 | const badBlock = memory.slice(memory.indexOf('## Bad paths'), memory.indexOf('## Security')); |
| 564 | assert.ok(!/No abandoned paths were detected/.test(badBlock), 'must not claim no abandoned paths when a destructive event occurred'); |
| 565 | assert.ok(/recover|destructive/i.test(badBlock), 'bad-path entry should warn about the destructive event'); |
| 566 | const nextBlock = memory.slice(memory.indexOf('## Next')); |
| 567 | assert.ok(!/messed up the deck/i.test(nextBlock), 'preferred next work must not parrot the apology turn'); |
| 568 | assert.ok(/develop these sections/i.test(nextBlock), 'preferred next work should point at the real forward direction'); |
| 569 | }); |
| 570 | |
| 571 | test('analysis: a keyword-only correction stays in the inferred or confirmed tier, not verified', () => { |
| 572 | const root = { id: 'node_001', text: 'build a dashboard', title: 'build a dashboard', kind: 'root', status: 'accepted', parent: null, actions: [] }; |
| 573 | const corr = { id: 'node_002', text: 'no, that is overbuilt, keep it minimal', title: 'no, that is overbuilt', kind: 'correction', status: 'accepted', parent: root, actions: [] }; |
| 574 | const analysis = analyzeTree({ nodes: [root, corr] }); |
| 575 | assert.ok(analysis.failures.length >= 1); |
| 576 | assert.ok(analysis.failures.every((f) => f.tier !== 'verified')); |
| 577 | assert.equal(analysis.summary.tierCounts.verified, 0); |
| 578 | }); |
| 579 | |
| 580 | test('analysis: a single benign prompt does not yield multiple failure types', () => { |
| 581 | const root = { |
| 582 | id: 'node_001', text: 'build the marketing deck', title: 'build the marketing deck', |
| 583 | kind: 'root', status: 'accepted', parent: null, ts: '2026-06-12T14:00:00.000Z', actions: [], |
| 584 | }; |
| 585 | const benign = { |
| 586 | id: 'node_002', text: 'and slide an agent to make the decks mobile friendly too please', |
| 587 | title: 'make the decks mobile friendly', kind: 'direction', status: 'accepted', parent: root, |
| 588 | ts: '2026-06-12T14:52:00.000Z', actions: [], |
| 589 | }; |
| 590 | const longPaste = { |
| 591 | id: 'node_003', |
| 592 | text: 'ok sounds good i agree. ' + 'do not overbuild it, it is too much, try again later if it keeps failing. '.repeat(40), |
| 593 | title: 'long strategy paste', kind: 'checkpoint', status: 'accepted', parent: benign, |
| 594 | ts: '2026-06-12T12:52:00.000Z', actions: [], |
| 595 | }; |
| 596 | const analysis = analyzeTree({ nodes: [root, benign, longPaste] }); |
| 597 | const benignFailures = analysis.failures.filter((f) => f.firstSeenNodeId === 'node_002'); |
| 598 | assert.equal(benignFailures.length, 0, 'a benign request should not mint failures from wording alone'); |
| 599 | for (const id of ['node_001', 'node_002', 'node_003']) { |
| 600 | const types = analysis.failures.filter((f) => f.firstSeenNodeId === id).map((f) => f.type); |
| 601 | assert.ok(new Set(types).size <= 1, `node ${id} emitted multiple failure types: ${types.join(', ')}`); |
| 602 | } |
| 603 | }); |
| 604 | |
| 605 | test('analysis: latest accepted direction is chronological, not insertion order', () => { |
| 606 | const root = { |
| 607 | id: 'node_001', text: 'pick a research topic', title: 'pick a research topic', |
| 608 | kind: 'root', status: 'accepted', parent: null, ts: '2026-01-01T00:00:00.000Z', actions: [], |
| 609 | }; |
| 610 | const newest = { |
| 611 | id: 'node_002', text: 'lets dig into Amazon Nova and the Karunanidhi essay direction', |
| 612 | title: 'Amazon Nova and Karunanidhi', kind: 'direction', status: 'accepted', parent: root, |
| 613 | ts: '2026-03-01T00:00:00.000Z', actions: [], |
| 614 | }; |
| 615 | const stale = { |
| 616 | id: 'node_003', text: 'lets explore the Seoul travel itinerary in depth for the trip', |
| 617 | title: 'Seoul travel itinerary', kind: 'direction', status: 'accepted', parent: newest, |
| 618 | ts: '2026-02-01T00:00:00.000Z', actions: [], |
| 619 | }; |
| 620 | const nodes = [root, newest, stale]; |
| 621 | const tree = { nodes, stats: { promptCount: 3, sessionCount: 2 } }; |
| 622 | const summary = renderTerminalSummary(tree, { projectName: 'demo' }); |
| 623 | assert.ok(/Amazon Nova/i.test(summary), 'terminal summary should name the chronologically newest direction'); |
| 624 | assert.ok(!/Seoul/i.test(summary.split('Latest accepted direction:')[1] || ''), 'must not name the stale Seoul session as latest'); |
| 625 | |
| 626 | const handoff = renderHandoff(tree, { projectName: 'demo' }); |
| 627 | const stand = handoff.split('## Where things stand')[1].split('##')[0]; |
| 628 | assert.ok(/Amazon Nova/i.test(stand), 'handoff should name the chronologically newest accepted direction'); |
| 629 | |
| 630 | const memory = renderMemoryMarkdown(tree, { projectName: 'demo' }); |
| 631 | const next = memory.slice(memory.indexOf('## Next')); |
| 632 | assert.ok(/Amazon Nova/i.test(next), 'agent memory should point at the chronologically newest direction'); |
| 633 | }); |
| 634 | |
| 635 | test('analysis: a corrector is never linked with an earlier timestamp than its failure', () => { |
| 636 | const failure = { |
| 637 | id: 'node_001', text: 'i do not see the deck, just the index file showing text', |
| 638 | title: 'deck not rendering', kind: 'direction', status: 'accepted', parent: null, |
| 639 | ts: '2026-06-12T14:06:20.000Z', |
| 640 | actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'claude-opus-4-8' }], |
| 641 | }; |
| 642 | const earlier = { |
| 643 | id: 'node_002', text: 'no that is wrong, the deck still does not work, redo it instead', |
| 644 | title: 'still broken', kind: 'correction', status: 'accepted', parent: failure, |
| 645 | ts: '2026-06-12T12:52:00.000Z', |
| 646 | actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'claude-opus-4-8' }], |
| 647 | }; |
| 648 | const analysis = analyzeTree({ nodes: [failure, earlier] }); |
| 649 | const byId = { node_001: failure, node_002: earlier }; |
| 650 | for (const f of analysis.failures) { |
| 651 | if (!f.correctedByNodeId) continue; |
| 652 | const ft = new Date(byId[f.firstSeenNodeId].ts).getTime(); |
| 653 | const ct = new Date(byId[f.correctedByNodeId].ts).getTime(); |
| 654 | assert.ok(ct >= ft, `failure ${f.id} corrected by an earlier-timestamped node`); |
| 655 | } |
| 656 | for (const c of analysis.correctionChains) { |
| 657 | const ft = new Date(byId[c.failureNodeId].ts).getTime(); |
| 658 | const ct = new Date(byId[c.correctionNodeId].ts).getTime(); |
| 659 | assert.ok(ct >= ft, `chain ${c.id} links a corrector that precedes its failure`); |
| 660 | if (c.resolvedNodeId) { |
| 661 | const rt = new Date(byId[c.resolvedNodeId].ts).getTime(); |
| 662 | assert.ok(rt >= ft, `chain ${c.id} resolves before its failure`); |
| 663 | } |
| 664 | } |
| 665 | }); |
| 666 | |
| 667 | test('cli: default run writes analysis artifacts with redaction', async () => { |
| 668 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-')); |
| 669 | try { |
| 670 | await main(['--file', FIXTURE, '--dir', dir, '--redact-auto', '--quiet']); |
| 671 | for (const file of [ |
| 672 | 'TREETRACE_REPORT.md', |
| 673 | 'PROMPT_TREE.md', |
| 674 | '.treetrace/tree.json', |
| 675 | '.treetrace/failures.json', |
| 676 | '.treetrace/lessons.md', |
| 677 | '.treetrace/evals.jsonl', |
| 678 | '.treetrace/agent-memory.md', |
| 679 | ]) { |
| 680 | assert.ok(existsSync(join(dir, file)), `${file} missing`); |
| 681 | } |
| 682 | const failures = JSON.parse(readFileSync(join(dir, '.treetrace/failures.json'), 'utf8')); |
| 683 | assert.equal(failures.schemaVersion, '0.3'); |
| 684 | assert.ok(failures.failures.length >= 1); |
| 685 | |
| 686 | const evalLine = readFileSync(join(dir, '.treetrace/evals.jsonl'), 'utf8').trim().split('\n')[0]; |
| 687 | assert.equal(JSON.parse(evalLine).source, 'treetrace'); |
| 688 | |
| 689 | const exported = [ |
| 690 | 'PROMPT_TREE.md', |
| 691 | 'TREETRACE_REPORT.md', |
| 692 | '.treetrace/tree.json', |
| 693 | '.treetrace/failures.json', |
| 694 | '.treetrace/lessons.md', |
| 695 | '.treetrace/evals.jsonl', |
| 696 | '.treetrace/agent-memory.md', |
| 697 | ].map((file) => readFileSync(join(dir, file), 'utf8')).join('\n'); |
| 698 | assert.ok(!exported.includes('sk-ant-'), 'anthropic key leaked'); |
| 699 | assert.ok(!exported.includes('hunter2pass'), 'basic-auth password leaked'); |
| 700 | } finally { |
| 701 | rmSync(dir, { recursive: true, force: true }); |
| 702 | } |
| 703 | }); |
| 704 | |
| 705 | test('cli: --analysis combined with --report writes both analysis files and the reports', async () => { |
| 706 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-both-')); |
| 707 | try { |
| 708 | await main(['--file', FIXTURE, '--dir', dir, '--analysis', '--report', '--redact-auto', '--quiet']); |
| 709 | for (const file of [ |
| 710 | 'TREETRACE_REPORT.md', 'PROMPT_TREE.md', '.treetrace/tree.json', |
| 711 | '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md', |
| 712 | ]) { |
| 713 | assert.ok(existsSync(join(dir, file)), `${file} missing when --analysis and --report combined`); |
| 714 | } |
| 715 | } finally { |
| 716 | rmSync(dir, { recursive: true, force: true }); |
| 717 | } |
| 718 | }); |
| 719 | |
| 720 | test('cli: a copilot import records a per-adapter sourceType, not claude-code-jsonl', async () => { |
| 721 | const fixture = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'adapters', 'copilot-chatsession.json'); |
| 722 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-src-')); |
| 723 | try { |
| 724 | await main(['--from', 'copilot', '--file', fixture, '--dir', dir, '--redact-auto', '--quiet']); |
| 725 | const tree = JSON.parse(readFileSync(join(dir, '.treetrace/tree.json'), 'utf8')); |
| 726 | assert.equal(tree.project.sourceType, 'copilot-chat', 'sourceType should reflect the copilot adapter'); |
| 727 | assert.notEqual(tree.project.sourceType, 'claude-code-jsonl'); |
| 728 | } finally { |
| 729 | rmSync(dir, { recursive: true, force: true }); |
| 730 | } |
| 731 | }); |
| 732 | |
| 733 | test('cli: creates the output directory and .treetrace subdirectory when missing', async () => { |
| 734 | const base = mkdtempSync(join(tmpdir(), 'treetrace-')); |
| 735 | const dir = join(base, 'does', 'not', 'exist', 'yet'); |
| 736 | try { |
| 737 | assert.ok(!existsSync(dir), 'target dir should not exist before the run'); |
| 738 | await main(['--file', FIXTURE, '--dir', dir, '--redact-auto', '--quiet']); |
| 739 | assert.ok(existsSync(join(dir, 'PROMPT_TREE.md')), 'PROMPT_TREE.md missing'); |
| 740 | assert.ok(existsSync(join(dir, '.treetrace', 'tree.json')), '.treetrace/tree.json missing'); |
| 741 | } finally { |
| 742 | rmSync(base, { recursive: true, force: true }); |
| 743 | } |
| 744 | }); |
| 745 | |
| 746 | test('redaction: the literal phrase "security-risk" is not a false-positive secret', () => { |
| 747 | for (const phrase of ['security-risk', 'skip the security-risk step']) { |
| 748 | const hard = scanText(phrase).filter((f) => f.severity !== 'soft'); |
| 749 | assert.deepEqual(hard, [], `"${phrase}" should not match any secret rule (got ${JSON.stringify(hard)})`); |
| 750 | } |
| 751 | }); |
| 752 | |
| 753 | test('redaction: a real-format GitHub token is caught', () => { |
| 754 | const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB'; |
| 755 | const hits = scanText(`set the remote with ${token} now`).map((f) => f.ruleId); |
| 756 | assert.ok(hits.includes('github-token'), `github-token missed (got ${hits})`); |
| 757 | }); |
| 758 | |
| 759 | test('redaction: a token inside a Bash action body is redacted end to end', async () => { |
| 760 | const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB'; |
| 761 | const lines = [ |
| 762 | { type: 'summary', summary: 'wire up the remote', leafUuid: 'b3' }, |
| 763 | { |
| 764 | parentUuid: null, isSidechain: false, type: 'user', userType: 'external', uuid: 'b1', |
| 765 | sessionId: 'leak-001', timestamp: '2026-06-01T10:00:00.000Z', cwd: '/tmp/demo', gitBranch: 'main', version: '2.1.0', |
| 766 | message: { role: 'user', content: 'Point the git remote at my fork.' }, |
| 767 | }, |
| 768 | { |
| 769 | parentUuid: 'b1', isSidechain: false, type: 'assistant', uuid: 'b2', sessionId: 'leak-001', |
| 770 | timestamp: '2026-06-01T10:00:30.000Z', |
| 771 | message: { |
| 772 | role: 'assistant', model: 'assistant-model', usage: { input_tokens: 100, output_tokens: 50 }, |
| 773 | content: [ |
| 774 | { type: 'text', text: 'Setting the remote.' }, |
| 775 | { type: 'tool_use', id: 'g1', name: 'Bash', input: { command: `git push --force origin main && git remote set-url origin https://x:${token}@github.com/me/fork.git` } }, |
| 776 | ], |
| 777 | }, |
| 778 | }, |
| 779 | ]; |
| 780 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-leak-')); |
| 781 | const session = join(dir, 'session.jsonl'); |
| 782 | writeFileSync(session, lines.map((l) => JSON.stringify(l)).join('\n') + '\n'); |
| 783 | try { |
| 784 | const parsed = await parseSessionFile(session, { sessionId: 'leak-001' }); |
| 785 | const action = parsed.prompts[0].actions.find((a) => a.tool === 'Bash'); |
| 786 | assert.ok(action, 'expected a captured Bash action'); |
| 787 | assert.ok(action.command.includes(token), 'fixture should carry the raw token before redaction'); |
| 788 | assert.ok(typeof action.input === 'string' && action.input.includes(token), 'input summary should carry the command'); |
| 789 | |
| 790 | await main(['--file', session, '--dir', dir, '--redact-auto', '--quiet']); |
| 791 | const exported = [ |
| 792 | 'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json', |
| 793 | '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md', |
| 794 | ].map((f) => readFileSync(join(dir, f), 'utf8')).join('\n'); |
| 795 | assert.ok(!exported.includes(token), 'GitHub token leaked from an action body into output'); |
| 796 | assert.ok(!/ghp_[0-9A-Za-z]/.test(exported), 'a partial GitHub token prefix leaked from an action body into output'); |
| 797 | assert.ok(exported.includes('[REDACTED:'), 'expected a redaction marker where the action-body token was'); |
| 798 | } finally { |
| 799 | rmSync(dir, { recursive: true, force: true }); |
| 800 | } |
| 801 | }); |
| 802 | |
| 803 | test('handoff: command operators are not HTML-escaped in the brief', () => { |
| 804 | const root = { |
| 805 | id: 'node_001', text: 'run rm -rf build && mkdir build to reset the workspace', |
| 806 | title: 'reset the workspace', kind: 'root', status: 'accepted', parent: null, actions: [], |
| 807 | }; |
| 808 | const handoff = renderHandoff({ nodes: [root], stats: { promptCount: 1, sessionCount: 1 } }, { projectName: 'demo' }); |
| 809 | assert.ok(handoff.includes('rm -rf build && mkdir build'), 'command should keep raw && in the handoff brief'); |
| 810 | assert.ok(!handoff.includes('&&'), 'handoff must not HTML-escape && to &&'); |
| 811 | const inject = { |
| 812 | id: 'node_001', text: 'do not run <script>alert(1)</script> ever', |
| 813 | title: 'no scripts', kind: 'root', status: 'accepted', parent: null, actions: [], |
| 814 | }; |
| 815 | const handoff2 = renderHandoff({ nodes: [inject], stats: { promptCount: 1, sessionCount: 1 } }, { projectName: 'demo' }); |
| 816 | assert.ok(!handoff2.includes('<script>'), 'angle-bracket tags should still be neutralized in the handoff brief'); |
| 817 | }); |
| 818 | |
| 819 | test('plain transcript fallback parses User:/Assistant: markers', () => { |
| 820 | const session = parsePlainTranscript( |
| 821 | 'User: build me a snake game in python\nAssistant: sure, here is the code...\nUser: make the snake blue\nAssistant: done', |
| 822 | 'pasted' |
| 823 | ); |
| 824 | assert.equal(session.prompts.length, 2); |
| 825 | assert.equal(session.prompts[1].text, 'make the snake blue'); |
| 826 | assert.throws(() => parsePlainTranscript('no markers here at all'), /turn markers/); |
| 827 | }); |
| 828 | |
| 829 | test('special user text classification', () => { |
| 830 | assert.equal(classifySpecialUserText('<command-name>/foo</command-name>'), 'command'); |
| 831 | assert.equal(classifySpecialUserText('<system-reminder>x</system-reminder>'), 'meta'); |
| 832 | assert.equal( |
| 833 | classifySpecialUserText('This session is being continued from a previous conversation that ran out of context.'), |
| 834 | 'compact-continuation' |
| 835 | ); |
| 836 | assert.equal(classifySpecialUserText('build me an app'), 'prompt'); |
| 837 | }); |
| 838 | |
| 839 | test('discover: path munging matches Claude Code storage layout', () => { |
| 840 | assert.equal(mungePath('/home/dev/weatherapp'), '-home-dev-weatherapp'); |
| 841 | assert.equal(mungePath('/home/dev/weatherapp/api'), '-home-dev-weatherapp-api'); |
| 842 | assert.equal(mungePath('/home/u.ser/my_app'), '-home-u-ser-my-app'); |
| 843 | }); |
| 844 | |
| 845 | function tempProject() { |
| 846 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-feat-')); |
| 847 | writeFileSync(join(dir, 'package.json'), JSON.stringify({ name: 'demo', dependencies: { express: '^4.0.0' } })); |
| 848 | mkdirSync(join(dir, 'src'), { recursive: true }); |
| 849 | writeFileSync(join(dir, 'src', 'real.js'), 'export const real = 1;\n'); |
| 850 | return dir; |
| 851 | } |
| 852 | |
| 853 | test('hallucinations: flags only the invented file and import, not the real ones', () => { |
| 854 | const dir = tempProject(); |
| 855 | try { |
| 856 | const root = { |
| 857 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, |
| 858 | text: 'Open src/real.js and src/imaginary.js to wire the feature.', |
| 859 | title: 'wire the feature', |
| 860 | actions: [{ |
| 861 | tool: 'Edit', file: 'src/real.js', |
| 862 | input: "import express from 'express';\nimport ghostlib from 'ghostlib-does-not-exist';\nimport { readFileSync } from 'node:fs';", |
| 863 | command: null, model: 'm', |
| 864 | }], |
| 865 | }; |
| 866 | const tree = { nodes: [root] }; |
| 867 | const result = detectHallucinations(tree, dir); |
| 868 | const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference); |
| 869 | const imports = result.hallucinations.filter((h) => h.category === 'hallucinated_import_or_package').map((h) => h.reference); |
| 870 | |
| 871 | assert.ok(files.includes('src/imaginary.js'), `invented file should be flagged (got ${files})`); |
| 872 | assert.ok(!files.includes('src/real.js'), 'the real file must not be flagged'); |
| 873 | assert.ok(!files.some((f) => /package\.json/.test(f)), 'the real package.json must not be flagged'); |
| 874 | |
| 875 | assert.ok(imports.includes('ghostlib-does-not-exist'), `invented import should be flagged (got ${imports})`); |
| 876 | assert.ok(!imports.includes('express'), 'a declared dependency must not be flagged'); |
| 877 | assert.ok(!imports.includes('fs') && !imports.includes('node:fs'), 'a node builtin must not be flagged'); |
| 878 | |
| 879 | for (const h of result.hallucinations) { |
| 880 | assert.ok(h.evalCandidate && h.evalCandidate.target, 'each hallucination should carry an eval candidate'); |
| 881 | } |
| 882 | } finally { |
| 883 | rmSync(dir, { recursive: true, force: true }); |
| 884 | } |
| 885 | }); |
| 886 | |
| 887 | test('hallucinations: a file created during the session is not flagged', () => { |
| 888 | const dir = tempProject(); |
| 889 | try { |
| 890 | const root = { |
| 891 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, |
| 892 | text: 'Create src/brandnew.js and then reference src/brandnew.js again.', |
| 893 | title: 'create new file', |
| 894 | actions: [{ tool: 'Write', file: 'src/brandnew.js', input: 'export const n = 1;', command: null, model: 'm' }], |
| 895 | }; |
| 896 | const result = detectHallucinations({ nodes: [root] }, dir); |
| 897 | const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference); |
| 898 | assert.ok(!files.includes('src/brandnew.js'), 'a file the agent created this session must not be flagged'); |
| 899 | } finally { |
| 900 | rmSync(dir, { recursive: true, force: true }); |
| 901 | } |
| 902 | }); |
| 903 | |
| 904 | test('hallucinations: extensionless files under dot-directories are flagged when missing', () => { |
| 905 | const dir = tempProject(); |
| 906 | try { |
| 907 | const root = { |
| 908 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, |
| 909 | text: 'Open .github/CODEOWNERS and .github/workflows/ci and .husky/pre-commit, and reference JSON.parse and test.skip.', |
| 910 | title: 'review config', |
| 911 | actions: [], |
| 912 | }; |
| 913 | const result = detectHallucinations({ nodes: [root] }, dir); |
| 914 | const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference); |
| 915 | assert.ok(files.includes('.github/CODEOWNERS'), `dot-directory path should be flagged (got ${files})`); |
| 916 | assert.ok(files.includes('.github/workflows/ci'), 'nested dot-directory path should be flagged'); |
| 917 | assert.ok(files.includes('.husky/pre-commit'), 'hyphenated dot-directory path should be flagged'); |
| 918 | assert.ok(!files.includes('JSON.parse') && !files.includes('test.skip'), 'dotted code symbols must not be flagged'); |
| 919 | } finally { |
| 920 | rmSync(dir, { recursive: true, force: true }); |
| 921 | } |
| 922 | }); |
| 923 | |
| 924 | test('hallucinations: process.env is not flagged as a missing file', () => { |
| 925 | const dir = tempProject(); |
| 926 | try { |
| 927 | const root = { |
| 928 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, |
| 929 | text: 'Read the API key from process.env instead of hardcoding it.', |
| 930 | title: 'use env var', actions: [], |
| 931 | }; |
| 932 | const result = detectHallucinations({ nodes: [root] }, dir); |
| 933 | const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference); |
| 934 | assert.ok(!files.includes('process.env'), `process.env must not be flagged as a file (got ${files})`); |
| 935 | } finally { |
| 936 | rmSync(dir, { recursive: true, force: true }); |
| 937 | } |
| 938 | }); |
| 939 | |
| 940 | test('hallucinations: a relative require is not flagged as an import, but the missing file is', () => { |
| 941 | const dir = tempProject(); |
| 942 | try { |
| 943 | const root = { |
| 944 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, |
| 945 | text: 'Wire it up.', title: 'wire', |
| 946 | actions: [{ tool: 'Edit', file: 'src/index.js', input: "const limiter = require('./middleware/rateLimit.js');", command: null, model: 'm' }], |
| 947 | }; |
| 948 | const result = detectHallucinations({ nodes: [root] }, dir); |
| 949 | const imports = result.hallucinations.filter((h) => h.category === 'hallucinated_import_or_package').map((h) => h.reference); |
| 950 | const files = result.hallucinations.filter((h) => h.category === 'hallucinated_file_or_path').map((h) => h.reference); |
| 951 | assert.ok(!imports.includes('.'), 'a relative require must not be reduced to a "." import'); |
| 952 | assert.ok(files.includes('./middleware/rateLimit.js') || files.includes('middleware/rateLimit.js'), `the missing relative file should still be flagged (got ${files})`); |
| 953 | } finally { |
| 954 | rmSync(dir, { recursive: true, force: true }); |
| 955 | } |
| 956 | }); |
| 957 | |
| 958 | test('security report: surfaces real signals and omits benign sessions', () => { |
| 959 | const dir = tempProject(); |
| 960 | try { |
| 961 | const root = { |
| 962 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, |
| 963 | text: 'harden the login flow', title: 'harden the login flow', |
| 964 | actions: [ |
| 965 | { tool: 'Edit', file: 'src/auth/login.js', input: 'export function login() {}', command: null, model: 'claude-opus-4-8' }, |
| 966 | { tool: 'Bash', file: null, command: 'rm -rf build', input: 'rm -rf build', model: 'claude-opus-4-8' }, |
| 967 | ], |
| 968 | }; |
| 969 | const correction = { |
| 970 | id: 'node_002', kind: 'correction', status: 'accepted', parent: root, |
| 971 | text: 'no, do not disable the tests in the auth suite, keep them running', |
| 972 | title: 'do not disable tests', actions: [], |
| 973 | }; |
| 974 | const tree = { nodes: [root, correction] }; |
| 975 | assert.ok(hasSecuritySignal(tree, dir), 'expected a security signal for the auth edit'); |
| 976 | const report = renderSecurityReport(tree, dir, { projectName: 'demo', generatedAt: '2026-01-01T00:00:00.000Z' }); |
| 977 | |
| 978 | assert.ok(report.startsWith('# TreeTrace Security Report - demo')); |
| 979 | assert.ok(/auth: .*src\/auth\/login\.js/.test(report), 'auth surface and file should be listed'); |
| 980 | assert.ok(/rm -rf build/.test(report), 'risky command should be listed'); |
| 981 | assert.ok(/disable the tests|disable or skip tests/i.test(report), 'test-skip signal should appear'); |
| 982 | assert.ok(/do not disable the tests/i.test(report), 'the human correction should surface as an eval/memory candidate'); |
| 983 | |
| 984 | writeFileSync(join(dir, 'README.md'), '# demo\n'); |
| 985 | const benign = { |
| 986 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, |
| 987 | text: 'add a markdown table to the README', title: 'add a table', |
| 988 | actions: [{ tool: 'Edit', file: 'README.md', input: '| a | b |', command: null, model: 'm' }], |
| 989 | }; |
| 990 | const benignTree = { nodes: [benign] }; |
| 991 | assert.ok(!hasSecuritySignal(benignTree, dir), 'benign session should have no security signal'); |
| 992 | const benignReport = renderSecurityReport(benignTree, dir, { projectName: 'demo', generatedAt: '2026-01-01T00:00:00.000Z' }); |
| 993 | assert.ok(/None detected\./.test(benignReport), 'benign report should state nothing was found'); |
| 994 | } finally { |
| 995 | rmSync(dir, { recursive: true, force: true }); |
| 996 | } |
| 997 | }); |
| 998 | |
| 999 | test('security report and hallucinations.json do not leak injected secrets via the CLI', async () => { |
| 1000 | const dir = tempProject(); |
| 1001 | const hex = '6881f8290266f4cc939959917f893a2a88787eb24bbcb6b9c37594c72bf448c3'; |
| 1002 | const ghToken = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB'; |
| 1003 | const convo = [{ |
| 1004 | mapping: { |
| 1005 | r: { message: null, parent: null, children: ['u'] }, |
| 1006 | u: { message: { author: { role: 'user' }, content: { parts: [ |
| 1007 | `edit src/imaginary.js, my key is session_hex=${hex} and token ${ghToken}`, |
| 1008 | ] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 1009 | a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] }, |
| 1010 | }, |
| 1011 | }]; |
| 1012 | const file = join(dir, 'leaky.json'); |
| 1013 | writeFileSync(file, JSON.stringify(convo)); |
| 1014 | try { |
| 1015 | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--security', '--redact-auto', '--quiet']); |
| 1016 | const hall = readFileSync(join(dir, '.treetrace/hallucinations.json'), 'utf8'); |
| 1017 | assert.ok(!hall.includes(hex), 'hex secret leaked into hallucinations.json'); |
| 1018 | assert.ok(!hall.includes(ghToken), 'github token leaked into hallucinations.json'); |
| 1019 | assert.ok(/imaginary\.js/.test(hall), 'the invented file should still be detected'); |
| 1020 | } finally { |
| 1021 | rmSync(dir, { recursive: true, force: true }); |
| 1022 | } |
| 1023 | }); |
| 1024 | |
| 1025 | test('cli: structured exit codes for CI consumers', async () => { |
| 1026 | const bin = join(dirname(fileURLToPath(import.meta.url)), '..', 'bin', 'treetrace.js'); |
| 1027 | const run = (args) => |
| 1028 | new Promise((resolve) => { |
| 1029 | const child = spawn('node', [bin, ...args], { stdio: ['ignore', 'ignore', 'pipe'] }); |
| 1030 | let stderr = ''; |
| 1031 | child.stderr.on('data', (d) => { stderr += d; }); |
| 1032 | child.on('close', (code) => resolve({ code, stderr })); |
| 1033 | }); |
| 1034 | const empty = mkdtempSync(join(tmpdir(), 'treetrace-exit-')); |
| 1035 | try { |
| 1036 | const usage = await run(['--bogus']); |
| 1037 | assert.equal(usage.code, 2, `bad option should exit 2 (got ${usage.code}): ${usage.stderr}`); |
| 1038 | const nodata = await run(['--dir', empty]); |
| 1039 | assert.equal(nodata.code, 3, `nothing-to-trace should exit 3 (got ${nodata.code}): ${nodata.stderr}`); |
| 1040 | } finally { |
| 1041 | rmSync(empty, { recursive: true, force: true }); |
| 1042 | } |
| 1043 | }); |
| 1044 | |
| 1045 | test('mcp: initialize, tools/list, and tools/call return well-formed JSON-RPC', async () => { |
| 1046 | const dir = tempProject(); |
| 1047 | const convo = [{ |
| 1048 | mapping: { |
| 1049 | r: { message: null, parent: null, children: ['u'] }, |
| 1050 | u: { message: { author: { role: 'user' }, content: { parts: ['build a cli and do not add dependencies'] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 1051 | a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: ['u2'] }, |
| 1052 | u2: { message: { author: { role: 'user' }, content: { parts: ['no, that is wrong, keep it minimal'] }, create_time: 3.0 }, parent: 'a', children: [] }, |
| 1053 | }, |
| 1054 | }]; |
| 1055 | const file = join(dir, 'mcp.json'); |
| 1056 | writeFileSync(file, JSON.stringify(convo)); |
| 1057 | const bin = join(dirname(fileURLToPath(import.meta.url)), '..', 'bin', 'treetrace.js'); |
| 1058 | try { |
| 1059 | const responses = await new Promise((resolveP, rejectP) => { |
| 1060 | const child = spawn('node', [bin, 'mcp', '--from', 'chatgpt', '--file', file, '--dir', dir], { |
| 1061 | stdio: ['pipe', 'pipe', 'ignore'], |
| 1062 | }); |
| 1063 | let buf = ''; |
| 1064 | child.stdout.on('data', (d) => { buf += d; }); |
| 1065 | child.on('error', rejectP); |
| 1066 | const send = (o) => child.stdin.write(JSON.stringify(o) + '\n'); |
| 1067 | send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }); |
| 1068 | send({ jsonrpc: '2.0', id: 2, method: 'tools/list', params: {} }); |
| 1069 | send({ jsonrpc: '2.0', id: 3, method: 'tools/call', params: { name: 'lessons', arguments: {} } }); |
| 1070 | send({ jsonrpc: '2.0', id: 99, method: 'tools/call', params: { name: 'nope', arguments: {} } }); |
| 1071 | setTimeout(() => { |
| 1072 | child.stdin.end(); |
| 1073 | child.kill(); |
| 1074 | resolveP(buf.split('\n').filter(Boolean).map((l) => JSON.parse(l))); |
| 1075 | }, 2000); |
| 1076 | }); |
| 1077 | |
| 1078 | const init = responses.find((r) => r.id === 1); |
| 1079 | assert.ok(init && init.jsonrpc === '2.0', 'initialize must be JSON-RPC 2.0'); |
| 1080 | assert.equal(init.result.serverInfo.name, 'treetrace'); |
| 1081 | assert.ok(init.result.protocolVersion, 'initialize must advertise a protocol version'); |
| 1082 | |
| 1083 | const list = responses.find((r) => r.id === 2); |
| 1084 | const names = list.result.tools.map((t) => t.name).sort(); |
| 1085 | assert.deepEqual(names, ['eval_candidates', 'handoff', 'lessons', 'rejections_summary', 'security_summary', 'tree']); |
| 1086 | |
| 1087 | const call = responses.find((r) => r.id === 3); |
| 1088 | assert.ok(call.result && Array.isArray(call.result.content), 'tools/call must return content array'); |
| 1089 | assert.equal(call.result.content[0].type, 'text'); |
| 1090 | assert.ok(/# Lessons/.test(call.result.content[0].text), 'lessons tool should return the lessons markdown'); |
| 1091 | |
| 1092 | const bad = responses.find((r) => r.id === 99); |
| 1093 | assert.ok(bad.error && bad.error.code === -32602, 'unknown tool should return a JSON-RPC error'); |
| 1094 | } finally { |
| 1095 | rmSync(dir, { recursive: true, force: true }); |
| 1096 | } |
| 1097 | }); |
| 1098 | |
| 1099 | import { recordedCwd } from '../src/discover.js'; |
| 1100 | |
| 1101 | test('redaction: JSON-style, quoted, backtick, and multiline secret assignments are caught', () => { |
| 1102 | const cases = [ |
| 1103 | '{"api_key":"supersecretvalue"}', |
| 1104 | '{"client_secret":"correcthorsebattery"}', |
| 1105 | '{"access_token":"correct-horse-battery"}', |
| 1106 | "{'api_key':'correcthorsebattery'}", |
| 1107 | 'const password = `correct horse battery staple`;', |
| 1108 | 'api_key: `correct-horse-battery-staple`', |
| 1109 | 'API_KEY="line1\nline2line2line2"', |
| 1110 | ]; |
| 1111 | for (const sample of cases) { |
| 1112 | const hits = scanText(sample).map((f) => f.ruleId); |
| 1113 | assert.ok(hits.includes('secret-assignment'), `secret-assignment missed in: ${JSON.stringify(sample)} (got ${hits})`); |
| 1114 | } |
| 1115 | }); |
| 1116 | |
| 1117 | test('redaction: generic secret-key assignment is caught even with a low-entropy value', () => { |
| 1118 | const sample = 'password: "hunter2hunter2"'; |
| 1119 | const hits = scanText(sample).map((f) => f.ruleId); |
| 1120 | assert.ok(hits.includes('secret-assignment'), 'low-entropy generic secret should still be a finding'); |
| 1121 | }); |
| 1122 | |
| 1123 | test('redaction: placeholder secret assignments are not flagged', () => { |
| 1124 | for (const benign of ['token: null', 'password: ""', 'secret: "${SECRET}"', 'api_key: <your-key>', 'token=true']) { |
| 1125 | const hard = scanText(benign).filter((f) => f.severity !== 'soft'); |
| 1126 | assert.deepEqual(hard, [], `${benign} should not flag (got ${JSON.stringify(hard)})`); |
| 1127 | } |
| 1128 | }); |
| 1129 | |
| 1130 | test('redaction: a JSON-style secret leaves no raw value in any artifact end to end', async () => { |
| 1131 | const secret = 'supersecretvalue'; |
| 1132 | const back = 'correct-horse-battery-staple'; |
| 1133 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-json-secret-')); |
| 1134 | const file = join(dir, 'conv.json'); |
| 1135 | const convo = [{ |
| 1136 | mapping: { |
| 1137 | r: { message: null, parent: null, children: ['u'] }, |
| 1138 | u: { message: { author: { role: 'user' }, content: { parts: [`config is {"api_key":"${secret}"} and password = \`${back}\``] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 1139 | a: { message: { author: { role: 'assistant' }, content: { parts: ['done'] }, create_time: 2.0 }, parent: 'u', children: [] }, |
| 1140 | }, |
| 1141 | }]; |
| 1142 | writeFileSync(file, JSON.stringify(convo)); |
| 1143 | try { |
| 1144 | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']); |
| 1145 | const artifacts = [ |
| 1146 | 'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json', |
| 1147 | '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md', |
| 1148 | ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n'); |
| 1149 | assert.ok(!artifacts.includes(secret), 'JSON-style secret value leaked into an artifact'); |
| 1150 | assert.ok(!artifacts.includes(back), 'backtick secret value leaked into an artifact'); |
| 1151 | assert.ok(artifacts.includes('[REDACTED:secret-assignment]'), 'expected a secret-assignment redaction marker'); |
| 1152 | } finally { |
| 1153 | rmSync(dir, { recursive: true, force: true }); |
| 1154 | } |
| 1155 | }); |
| 1156 | |
| 1157 | test('redaction: a prior keep decision is ignored under --redact-auto and non-TTY auto mode', async () => { |
| 1158 | const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB'; |
| 1159 | const text = `Use token ${token} for setup`; |
| 1160 | const findings = scanText(text); |
| 1161 | const prior = { [sha256(token)]: { action: 'keep', ruleId: 'github-token' } }; |
| 1162 | |
| 1163 | const auto = await resolveFindings(findings, prior, { interactive: false, autoRedact: true }); |
| 1164 | assert.equal(auto.overriddenKeeps, 1, 'auto mode should override a prior keep'); |
| 1165 | const outAuto = applyDecisions(text, findings, auto.decisions); |
| 1166 | assert.ok(!outAuto.includes(token), 'raw token leaked under --redact-auto despite re-redaction'); |
| 1167 | assert.equal(shadowScan(outAuto, auto.decisions).length, 0, 'shadow scan should be clean after override'); |
| 1168 | |
| 1169 | const nonTty = await resolveFindings(findings, prior, { interactive: false, autoRedact: false }); |
| 1170 | assert.equal(nonTty.overriddenKeeps, 1, 'non-TTY auto mode should override a prior keep'); |
| 1171 | assert.ok(!applyDecisions(text, findings, nonTty.decisions).includes(token), 'raw token leaked in non-TTY auto mode'); |
| 1172 | |
| 1173 | const interactive = await resolveFindings(findings, prior, { interactive: true, autoRedact: false }); |
| 1174 | assert.equal(interactive.overriddenKeeps, 0, 'interactive mode should honor a deliberate keep'); |
| 1175 | assert.ok(applyDecisions(text, findings, interactive.decisions).includes(token), 'interactive keep should be honored'); |
| 1176 | }); |
| 1177 | |
| 1178 | test('cli: a preseeded keep cannot leak a secret under --redact-auto', async () => { |
| 1179 | const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB'; |
| 1180 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-keep-')); |
| 1181 | const file = join(dir, 'conv.json'); |
| 1182 | const convo = [{ |
| 1183 | mapping: { |
| 1184 | r: { message: null, parent: null, children: ['u'] }, |
| 1185 | u: { message: { author: { role: 'user' }, content: { parts: [`Use token ${token} for setup`] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 1186 | a: { message: { author: { role: 'assistant' }, content: { parts: ['done'] }, create_time: 2.0 }, parent: 'u', children: [] }, |
| 1187 | }, |
| 1188 | }]; |
| 1189 | writeFileSync(file, JSON.stringify(convo)); |
| 1190 | mkdirSync(join(dir, '.treetrace'), { recursive: true }); |
| 1191 | writeFileSync(join(dir, '.treetrace', 'redactions.json'), JSON.stringify({ [sha256(token)]: { action: 'keep', ruleId: 'github-token' } })); |
| 1192 | try { |
| 1193 | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']); |
| 1194 | const artifacts = [ |
| 1195 | 'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json', |
| 1196 | '.treetrace/failures.json', '.treetrace/agent-memory.md', |
| 1197 | ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n'); |
| 1198 | assert.ok(!artifacts.includes(token), 'preseeded keep leaked a raw token under --redact-auto'); |
| 1199 | const stored = JSON.parse(readFileSync(join(dir, '.treetrace', 'redactions.json'), 'utf8')); |
| 1200 | assert.equal(stored[sha256(token)].action, 'redact', 'overridden keep should persist as redact'); |
| 1201 | } finally { |
| 1202 | rmSync(dir, { recursive: true, force: true }); |
| 1203 | } |
| 1204 | }); |
| 1205 | |
| 1206 | test('mcp: a preseeded keep cannot leak a token in handoff', async () => { |
| 1207 | const token = 'ghp_0123456789abcdefghijklmnopqrstuvwxyzAB'; |
| 1208 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-mcp-keep-')); |
| 1209 | const file = join(dir, 'conv.json'); |
| 1210 | const convo = [{ |
| 1211 | mapping: { |
| 1212 | r: { message: null, parent: null, children: ['u'] }, |
| 1213 | u: { message: { author: { role: 'user' }, content: { parts: [`Use token ${token} for setup, do not add dependencies`] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 1214 | a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: ['u2'] }, |
| 1215 | u2: { message: { author: { role: 'user' }, content: { parts: ['no, keep it minimal'] }, create_time: 3.0 }, parent: 'a', children: [] }, |
| 1216 | }, |
| 1217 | }]; |
| 1218 | writeFileSync(file, JSON.stringify(convo)); |
| 1219 | mkdirSync(join(dir, '.treetrace'), { recursive: true }); |
| 1220 | writeFileSync(join(dir, '.treetrace', 'redactions.json'), JSON.stringify({ [sha256(token)]: { action: 'keep', ruleId: 'github-token' } })); |
| 1221 | const bin = join(dirname(fileURLToPath(import.meta.url)), '..', 'bin', 'treetrace.js'); |
| 1222 | try { |
| 1223 | const responses = await new Promise((resolveP, rejectP) => { |
| 1224 | const child = spawn('node', [bin, 'mcp', '--from', 'chatgpt', '--file', file, '--dir', dir], { stdio: ['pipe', 'pipe', 'ignore'] }); |
| 1225 | let buf = ''; |
| 1226 | child.stdout.on('data', (d) => { buf += d; }); |
| 1227 | child.on('error', rejectP); |
| 1228 | const send = (o) => child.stdin.write(JSON.stringify(o) + '\n'); |
| 1229 | send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }); |
| 1230 | send({ jsonrpc: '2.0', id: 2, method: 'tools/call', params: { name: 'handoff', arguments: {} } }); |
| 1231 | setTimeout(() => { |
| 1232 | child.stdin.end(); |
| 1233 | child.kill(); |
| 1234 | resolveP(buf.split('\n').filter(Boolean).map((l) => JSON.parse(l))); |
| 1235 | }, 2500); |
| 1236 | }); |
| 1237 | const call = responses.find((r) => r.id === 2); |
| 1238 | assert.ok(call && call.result, 'handoff tool should return a result'); |
| 1239 | assert.ok(!JSON.stringify(call).includes(token), 'MCP handoff leaked a token despite a preseeded keep'); |
| 1240 | } finally { |
| 1241 | rmSync(dir, { recursive: true, force: true }); |
| 1242 | } |
| 1243 | }); |
| 1244 | |
| 1245 | test('mcp: extra tool arguments return -32602', async () => { |
| 1246 | const dir = tempProject(); |
| 1247 | const file = join(dir, 'conv.json'); |
| 1248 | writeFileSync(file, JSON.stringify([{ mapping: { |
| 1249 | r: { message: null, parent: null, children: ['u'] }, |
| 1250 | u: { message: { author: { role: 'user' }, content: { parts: ['build a cli'] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 1251 | a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] }, |
| 1252 | } }])); |
| 1253 | const bin = join(dirname(fileURLToPath(import.meta.url)), '..', 'bin', 'treetrace.js'); |
| 1254 | try { |
| 1255 | const responses = await new Promise((resolveP, rejectP) => { |
| 1256 | const child = spawn('node', [bin, 'mcp', '--from', 'chatgpt', '--file', file, '--dir', dir], { stdio: ['pipe', 'pipe', 'ignore'] }); |
| 1257 | let buf = ''; |
| 1258 | child.stdout.on('data', (d) => { buf += d; }); |
| 1259 | child.on('error', rejectP); |
| 1260 | const send = (o) => child.stdin.write(JSON.stringify(o) + '\n'); |
| 1261 | send({ jsonrpc: '2.0', id: 1, method: 'tools/call', params: { name: 'lessons', arguments: { unexpected: true } } }); |
| 1262 | send({ jsonrpc: '2.0', id: 2, method: 'tools/call', params: { name: 'lessons', arguments: {} } }); |
| 1263 | send({ jsonrpc: '2.0', id: null, method: 'ping' }); |
| 1264 | send([{ jsonrpc: '2.0', id: 9, method: 'ping' }]); |
| 1265 | setTimeout(() => { child.stdin.end(); child.kill(); resolveP(buf.split('\n').filter(Boolean).map((l) => JSON.parse(l))); }, 2500); |
| 1266 | }); |
| 1267 | const bad = responses.find((r) => r.id === 1); |
| 1268 | assert.ok(bad && bad.error && bad.error.code === -32602, 'extra arguments should return -32602'); |
| 1269 | const ok = responses.find((r) => r.id === 2); |
| 1270 | assert.ok(ok && ok.result, 'empty arguments should succeed'); |
| 1271 | const idNull = responses.find((r) => r.id === null && r.result); |
| 1272 | assert.ok(idNull, 'explicit id:null request should receive a response'); |
| 1273 | const batch = responses.find((r) => r.id === null && r.error && /batch/.test(r.error.message)); |
| 1274 | assert.ok(batch, 'batch arrays should return a clear error'); |
| 1275 | } finally { |
| 1276 | rmSync(dir, { recursive: true, force: true }); |
| 1277 | } |
| 1278 | }); |
| 1279 | |
| 1280 | test('mcp: treetrace mcp --stdin is rejected clearly', async () => { |
| 1281 | const { startMcpServer } = await import('../src/mcp.js'); |
| 1282 | await assert.rejects( |
| 1283 | () => startMcpServer({ argv: ['mcp', '--stdin'], version: '0.0.0' }), |
| 1284 | /does not support --stdin/, |
| 1285 | 'mcp --stdin should be rejected at startup' |
| 1286 | ); |
| 1287 | }); |
| 1288 | |
| 1289 | test('hallucinations: absolute paths outside the project are out of scope, not an oracle', () => { |
| 1290 | const dir = tempProject(); |
| 1291 | try { |
| 1292 | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); |
| 1293 | const abs = detectHallucinations(mk('see /definitely/not/here.zzz and /etc/shadow.bak'), dir).hallucinations.map((h) => h.reference); |
| 1294 | assert.deepEqual(abs, [], 'absolute paths outside the project must not be flagged or statted'); |
| 1295 | const parent = detectHallucinations(mk('see ../escape.js'), dir).hallucinations.map((h) => h.reference); |
| 1296 | assert.deepEqual(parent, [], 'a ../ path escaping the project is out of scope'); |
| 1297 | } finally { |
| 1298 | rmSync(dir, { recursive: true, force: true }); |
| 1299 | } |
| 1300 | }); |
| 1301 | |
| 1302 | test('hallucinations: relative missing paths inside the project are flagged', () => { |
| 1303 | const dir = tempProject(); |
| 1304 | try { |
| 1305 | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); |
| 1306 | assert.ok(detectHallucinations(mk('open src/missing.js'), dir).hallucinations.some((h) => h.reference === 'src/missing.js'), 'bare missing path should be flagged'); |
| 1307 | assert.ok(detectHallucinations(mk('open ./src/missing.js'), dir).hallucinations.some((h) => h.reference === './src/missing.js'), './ missing path should be flagged'); |
| 1308 | assert.ok(!detectHallucinations(mk('open src/real.js'), dir).hallucinations.some((h) => h.reference.includes('real.js')), 'real file must not be flagged'); |
| 1309 | } finally { |
| 1310 | rmSync(dir, { recursive: true, force: true }); |
| 1311 | } |
| 1312 | }); |
| 1313 | |
| 1314 | test('hallucinations: an Edit to a nonexistent file is flagged, a Write to a new file is not', () => { |
| 1315 | const dir = tempProject(); |
| 1316 | try { |
| 1317 | const edit = { nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text: 'edit src/ghost.js', title: 't', actions: [{ tool: 'Edit', file: 'src/ghost.js', input: 'x', command: null }] }] }; |
| 1318 | assert.ok(detectHallucinations(edit, dir).hallucinations.some((h) => h.reference === 'src/ghost.js'), 'Edit to a nonexistent file should still be flagged'); |
| 1319 | const write = { nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text: 'create src/created.js', title: 't', actions: [{ tool: 'Write', file: 'src/created.js', input: 'x', command: null }] }] }; |
| 1320 | assert.ok(!detectHallucinations(write, dir).hallucinations.some((h) => h.reference === 'src/created.js'), 'Write to a new file should be suppressed'); |
| 1321 | } finally { |
| 1322 | rmSync(dir, { recursive: true, force: true }); |
| 1323 | } |
| 1324 | }); |
| 1325 | |
| 1326 | test('hallucinations: dotted code symbols are not flagged as missing file paths', () => { |
| 1327 | const dir = tempProject(); |
| 1328 | try { |
| 1329 | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); |
| 1330 | for (const sym of ['JSON.parse', 'params.arguments', 'params.name', 'test.skip', 'describe.skip', 'obj.method', 'array.length']) { |
| 1331 | const refs = detectHallucinations(mk(sym), dir).hallucinations |
| 1332 | .filter((h) => h.category === 'hallucinated_file_or_path') |
| 1333 | .map((h) => h.reference); |
| 1334 | assert.deepEqual(refs, [], `code symbol "${sym}" should not be flagged as a missing path (got ${JSON.stringify(refs)})`); |
| 1335 | } |
| 1336 | const real = detectHallucinations(mk('open src/missing.ts'), dir).hallucinations |
| 1337 | .filter((h) => h.category === 'hallucinated_file_or_path') |
| 1338 | .map((h) => h.reference); |
| 1339 | assert.ok(real.includes('src/missing.ts'), 'a genuinely missing path with a known extension must still be flagged'); |
| 1340 | } finally { |
| 1341 | rmSync(dir, { recursive: true, force: true }); |
| 1342 | } |
| 1343 | }); |
| 1344 | |
| 1345 | test('hallucinations: missing extensionless files and local paths are flagged, existing ones are not', () => { |
| 1346 | const dir = tempProject(); |
| 1347 | try { |
| 1348 | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); |
| 1349 | const flagged = (text) => detectHallucinations(mk(text), dir).hallucinations |
| 1350 | .filter((h) => h.category === 'hallucinated_file_or_path') |
| 1351 | .map((h) => h.reference); |
| 1352 | |
| 1353 | assert.ok(flagged('open Dockerfile').includes('Dockerfile'), 'a missing Dockerfile should be flagged'); |
| 1354 | assert.ok(flagged('open .env').includes('.env'), 'a missing .env should be flagged'); |
| 1355 | assert.ok(flagged('open Makefile').includes('Makefile'), 'a missing Makefile should be flagged'); |
| 1356 | assert.ok(flagged('open src/route').includes('src/route'), 'a missing extensionless local path should be flagged'); |
| 1357 | |
| 1358 | writeFileSync(join(dir, 'Dockerfile'), 'FROM node:20\n'); |
| 1359 | writeFileSync(join(dir, '.env'), 'X=1\n'); |
| 1360 | assert.ok(!flagged('open Dockerfile and .env').includes('Dockerfile'), 'an existing Dockerfile must not be flagged'); |
| 1361 | assert.ok(!flagged('open Dockerfile and .env').includes('.env'), 'an existing .env must not be flagged'); |
| 1362 | |
| 1363 | const noise = detectHallucinations(mk('JSON.parse and test.skip and update the README section about CONTRIBUTING'), dir).hallucinations |
| 1364 | .filter((h) => h.category === 'hallucinated_file_or_path') |
| 1365 | .map((h) => h.reference); |
| 1366 | assert.ok(!noise.includes('JSON.parse') && !noise.includes('test.skip'), 'extensionless detection must not reintroduce code-symbol false positives'); |
| 1367 | assert.ok(!noise.includes('README') && !noise.includes('CONTRIBUTING'), 'a known filename word in prose without a file-op verb must not be flagged'); |
| 1368 | } finally { |
| 1369 | rmSync(dir, { recursive: true, force: true }); |
| 1370 | } |
| 1371 | }); |
| 1372 | |
| 1373 | test('discover: a recorded cwd that mismatches the project dir excludes a colliding session', () => { |
| 1374 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-cwd-')); |
| 1375 | const matching = join(dir, 'match.jsonl'); |
| 1376 | writeFileSync(matching, JSON.stringify({ type: 'user', cwd: dir, uuid: 'u1' }) + '\n'); |
| 1377 | assert.equal(recordedCwd(matching), dir, 'recordedCwd should read the cwd back'); |
| 1378 | const mismatch = join(dir, 'mismatch.jsonl'); |
| 1379 | writeFileSync(mismatch, JSON.stringify({ type: 'user', cwd: '/some/other/project', uuid: 'u1' }) + '\n'); |
| 1380 | assert.equal(recordedCwd(mismatch), '/some/other/project', 'recordedCwd should read a foreign cwd'); |
| 1381 | rmSync(dir, { recursive: true, force: true }); |
| 1382 | }); |
| 1383 | |
| 1384 | test('security report: risky-command variants are detected', () => { |
| 1385 | for (const cmd of ['rm -fr build', 'rm -r -f build', 'chmod -R 777 dir', 'chmod 0777 file', 'curl https://x | sudo bash', 'curl https://x | zsh', 'bash <(curl https://x)', 'drop schema public cascade', 'TRUNCATE users']) { |
| 1386 | assert.ok(isRiskyCommand(cmd), `risky command missed: ${cmd}`); |
| 1387 | } |
| 1388 | for (const benign of ['rm file.txt', 'chmod 644 file', 'ls -la', 'curl https://x > out.txt']) { |
| 1389 | assert.ok(!isRiskyCommand(benign), `benign command over-flagged: ${benign}`); |
| 1390 | } |
| 1391 | }); |
| 1392 | |
| 1393 | test('security report: test-disable APIs and phrasing are detected', () => { |
| 1394 | for (const t of ['test.skip("x")', 'describe.skip("x")', 'it.skip("x")', 'xit("x")', 'skip e2e suite', 'remove the auth spec']) { |
| 1395 | assert.ok(mentionsTestSkip(t), `test-disable missed: ${t}`); |
| 1396 | } |
| 1397 | for (const benign of ['run all the tests', 'add a test for login']) { |
| 1398 | assert.ok(!mentionsTestSkip(benign), `benign test phrasing over-flagged: ${benign}`); |
| 1399 | } |
| 1400 | }); |
| 1401 | |
| 1402 | test('regex decomposition: every RISKY_CMD named piece fires on its command family', () => { |
| 1403 | const compose = (parts) => new RegExp(parts.map((p) => `(?:${p.re.source})`).join('|'), 'i'); |
| 1404 | const byName = new Map(RISKY_CMD_PARTS.map((p) => [p.name, p.re])); |
| 1405 | const positives = { |
| 1406 | rm_rf_combined: 'rm -rf build', |
| 1407 | rm_r_then_f: 'rm -r -f build', |
| 1408 | rm_f_then_r: 'rm -f -r build', |
| 1409 | chmod_world_writable: 'chmod -R 777 dir', |
| 1410 | curl_pipe_shell: 'curl https://x | sudo bash', |
| 1411 | shell_process_substitution: 'bash <(curl https://x)', |
| 1412 | no_verify: 'git commit --no-verify', |
| 1413 | force: 'git push --force', |
| 1414 | drop_table: 'DROP TABLE users', |
| 1415 | drop_schema: 'drop schema public cascade', |
| 1416 | truncate: 'TRUNCATE users', |
| 1417 | }; |
| 1418 | for (const [name, cmd] of Object.entries(positives)) { |
| 1419 | const re = byName.get(name); |
| 1420 | assert.ok(re, `unknown piece ${name}`); |
| 1421 | assert.ok(re.test(cmd), `piece ${name} missed its command: ${cmd}`); |
| 1422 | } |
| 1423 | assert.equal(RISKY_CMD_PARTS.length, Object.keys(positives).length, 'piece count drifted'); |
| 1424 | const composed = compose(RISKY_CMD_PARTS); |
| 1425 | for (const cmd of [...Object.values(positives), 'rm -fr /tmp', 'chmod 0777 f']) { |
| 1426 | assert.equal(composed.test(cmd), isRiskyCommand(cmd), `composed != isRiskyCommand for: ${cmd}`); |
| 1427 | } |
| 1428 | for (const benign of ['rm file.txt', 'chmod 644 file', 'ls -la', 'curl https://x > out.txt', '--force-with-lease']) { |
| 1429 | assert.equal(composed.test(benign), isRiskyCommand(benign), `benign mismatch: ${benign}`); |
| 1430 | assert.ok(!composed.test(benign), `benign over-flagged: ${benign}`); |
| 1431 | } |
| 1432 | }); |
| 1433 | |
| 1434 | test('regex decomposition: every SECURITY_INTENT named piece fires on its phrasing family', () => { |
| 1435 | const compose = (parts) => new RegExp(parts.map((p) => `(?:${p.re.source})`).join('|'), 'i'); |
| 1436 | const byName = new Map(SECURITY_INTENT_PARTS.map((p) => [p.name, p.re])); |
| 1437 | const positives = { |
| 1438 | credential_lifecycle: 'please rotate the api key', |
| 1439 | pat_lifecycle: 'the pat was rotated yesterday', |
| 1440 | email_change: 'change the email to a public contact', |
| 1441 | do_not_expose: 'never expose the token', |
| 1442 | expose_us: 'this could expose us', |
| 1443 | leak_list: 'audit for leak anything', |
| 1444 | audit_repos: 'do a full audit of the repo', |
| 1445 | commit_history_audit: 'the commit history needs an audit', |
| 1446 | relicensing: 'relicense the project to MIT', |
| 1447 | disable_tests: 'skip the auth test', |
| 1448 | access_control_change: 'tighten the auth flow', |
| 1449 | }; |
| 1450 | for (const [name, phrase] of Object.entries(positives)) { |
| 1451 | const re = byName.get(name); |
| 1452 | assert.ok(re, `unknown piece ${name}`); |
| 1453 | assert.ok(re.test(phrase), `piece ${name} missed its phrase: ${phrase}`); |
| 1454 | } |
| 1455 | assert.equal(SECURITY_INTENT_PARTS.length, Object.keys(positives).length, 'piece count drifted'); |
| 1456 | const composed = compose(SECURITY_INTENT_PARTS); |
| 1457 | for (const phrase of Object.values(positives)) assert.ok(composed.test(phrase), `composed missed: ${phrase}`); |
| 1458 | for (const benign of ['a normal sentence about the weather', 'use the api carefully', 'email me later']) { |
| 1459 | assert.ok(!composed.test(benign), `benign security phrasing over-flagged: ${benign}`); |
| 1460 | } |
| 1461 | }); |
| 1462 | |
| 1463 | test('cli: value-taking options reject a missing value or a flag-shaped value', () => { |
| 1464 | for (const args of [['--dir'], ['--out', '--redact-auto'], ['--report-file', '--quiet'], ['--from'], ['--since']]) { |
| 1465 | assert.throws(() => parseArgs(args), /requires a value|requires at least|expects a date|unknown --from/, `expected ${JSON.stringify(args)} to throw`); |
| 1466 | } |
| 1467 | }); |
| 1468 | |
| 1469 | test('cli: --since requires a real date and rejects garbage', () => { |
| 1470 | assert.throws(() => parseArgs(['--since', 'not-a-date']), /expects a date/); |
| 1471 | assert.doesNotThrow(() => parseArgs(['--since', '2026-06-01'])); |
| 1472 | }); |
| 1473 | |
| 1474 | test('cli: --stdin --from claude is rejected', () => { |
| 1475 | assert.throws(() => parseArgs(['--stdin', '--from', 'claude']), /cannot be combined with --from claude/); |
| 1476 | }); |
| 1477 | |
| 1478 | |
| 1479 | |
| 1480 | test('P7: short escaped-JSON secret values fail closed (redaction gate)', () => { |
| 1481 | const cases = [ |
| 1482 | ['short escaped newline', '{"api_key":"a\\nz"}'], |
| 1483 | ['tiny escaped value', '{"api_key":"x\\ny"}'], |
| 1484 | ['escaped quote', '{"token":"a\\"b"}'], |
| 1485 | ['escaped backslash', '{"secret":"a\\\\b"}'], |
| 1486 | ['spec literal-\\n form', '{"api_key":"line1\\nline2line2line2"}'], |
| 1487 | ]; |
| 1488 | for (const [label, sample] of cases) { |
| 1489 | const hits = scanText(sample).map((f) => f.ruleId); |
| 1490 | assert.ok(hits.includes('secret-assignment'), `${label}: escaped secret must be caught (got ${JSON.stringify(hits)})`); |
| 1491 | } |
| 1492 | assert.equal(scanText('{"api_key":"ab"}').length, 0, 'benign short value below floor must stay clean'); |
| 1493 | assert.equal(scanText('{"api_key":"${SECRET}"}').filter((f) => f.ruleId === 'secret-assignment').length, 0, 'placeholder must stay clean'); |
| 1494 | }); |
| 1495 | |
| 1496 | test('P7: a short escaped-JSON secret leaves no raw value in any artifact end to end', async () => { |
| 1497 | const rawValue = 'a\\nz'; |
| 1498 | const secretLine = `config is {"api_key":"${rawValue}"}`; |
| 1499 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-p7-')); |
| 1500 | const file = join(dir, 'escconv.json'); |
| 1501 | const convo = [{ |
| 1502 | mapping: { |
| 1503 | r: { message: null, parent: null, children: ['u'] }, |
| 1504 | u: { message: { author: { role: 'user' }, content: { parts: [secretLine] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, |
| 1505 | a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] }, |
| 1506 | }, |
| 1507 | }]; |
| 1508 | writeFileSync(file, JSON.stringify(convo)); |
| 1509 | try { |
| 1510 | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']); |
| 1511 | const artifacts = [ |
| 1512 | 'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json', |
| 1513 | '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md', |
| 1514 | ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n'); |
| 1515 | assert.ok(!artifacts.includes(rawValue), 'raw short escaped-JSON secret leaked into an artifact'); |
| 1516 | assert.ok(artifacts.includes('[REDACTED:secret-assignment]'), 'expected a secret-assignment redaction marker'); |
| 1517 | } finally { |
| 1518 | rmSync(dir, { recursive: true, force: true }); |
| 1519 | } |
| 1520 | }); |
| 1521 | |
| 1522 | test('P1: a single strong security signal stays verified at exactly 0.95', () => { |
| 1523 | const node = { |
| 1524 | id: 'node_001', text: 'harden auth', title: 'harden auth', kind: 'root', status: 'accepted', parent: null, |
| 1525 | actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'm' }], |
| 1526 | }; |
| 1527 | const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 1528 | assert.ok(sec && sec.tier === 'verified' && sec.confidence === 0.95, 'strong anchor must remain verified/0.95'); |
| 1529 | }); |
| 1530 | |
| 1531 | test('P1: confidence is derived from corroboration and the contributing signals are in the evidence', () => { |
| 1532 | const strong = { |
| 1533 | id: 'node_001', text: 'deploy', title: 'deploy', kind: 'root', status: 'accepted', parent: null, |
| 1534 | actions: [{ tool: 'Bash', file: 'src/auth/session.ts', command: '. /srv/app/.env; rm -rf /tmp/x; chmod 777 /etc', input: '. /srv/app/.env; rm -rf /tmp/x; chmod 777 /etc', model: 'm' }], |
| 1535 | }; |
| 1536 | const strongSec = analyzeTree({ nodes: [strong] }).failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 1537 | assert.equal(strongSec.tier, 'verified'); |
| 1538 | assert.ok(/signals:/.test(strongSec.evidence), 'evidence must list the contributing signals (auditable)'); |
| 1539 | assert.ok(/strong credential content/.test(strongSec.evidence), 'evidence must name the strong credential signal'); |
| 1540 | |
| 1541 | const weak = { |
| 1542 | id: 'node_001', text: 'edit detector', title: 'x', kind: 'root', status: 'accepted', parent: null, |
| 1543 | actions: [{ tool: 'Edit', file: 'src/analyze.js', input: 'const ACCESS = /rbac/i;', command: null, model: 'm' }], |
| 1544 | }; |
| 1545 | const weakSec = analyzeTree({ nodes: [weak] }).failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 1546 | assert.ok(weakSec.confidence < strongSec.confidence, 'lone weak keyword must score below a multi-signal strong event'); |
| 1547 | }); |
| 1548 | |
| 1549 | test('P2: afterFailure does not link a corrector that precedes its failure when timestamps are missing', () => { |
| 1550 | const failure = { |
| 1551 | id: 'node_002', text: 'the deck still does not render here', title: 'still broken', kind: 'direction', status: 'accepted', parent: null, |
| 1552 | actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'm' }], |
| 1553 | }; |
| 1554 | const earlier = { |
| 1555 | id: 'node_001', text: 'no that is wrong redo the deck here please', title: 'redo', kind: 'correction', status: 'accepted', parent: failure, |
| 1556 | actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'm' }], |
| 1557 | }; |
| 1558 | const analysis = analyzeTree({ nodes: [failure, earlier] }); |
| 1559 | for (const f of analysis.failures) { |
| 1560 | if (!f.correctedByNodeId) continue; |
| 1561 | const fo = Number(/(\d+)$/.exec(f.firstSeenNodeId)[1]); |
| 1562 | const co = Number(/(\d+)$/.exec(f.correctedByNodeId)[1]); |
| 1563 | assert.ok(co >= fo, `failure ${f.id} corrected by an earlier-ordinal node`); |
| 1564 | } |
| 1565 | }); |
| 1566 | |
| 1567 | test('P2: resolvedBy is null when no resolution ties back to the failure, instead of the temporally-nearest node', () => { |
| 1568 | const failure = { |
| 1569 | id: 'node_001', text: 'do not hardcode the database url into the config file please', title: 'no hardcoding', kind: 'correction', status: 'accepted', parent: null, |
| 1570 | ts: '2026-06-12T10:00:00.000Z', actions: [{ tool: 'Edit', file: 'config/db.ts', command: null, input: null, model: 'm' }], |
| 1571 | }; |
| 1572 | const unrelatedLater = { |
| 1573 | id: 'node_002', text: 'now lets switch topics entirely and write the marketing landing copy', title: 'marketing', kind: 'direction', status: 'accepted', parent: failure, |
| 1574 | ts: '2026-06-12T11:00:00.000Z', actions: [{ tool: 'Edit', file: 'site/index.html', command: null, input: null, model: 'm' }], |
| 1575 | }; |
| 1576 | const analysis = analyzeTree({ nodes: [failure, unrelatedLater] }); |
| 1577 | for (const chain of analysis.correctionChains) { |
| 1578 | assert.notEqual(chain.resolvedNodeId, 'node_002', 'must not resolve to an unrelated temporally-nearest node'); |
| 1579 | } |
| 1580 | }); |
| 1581 | |
| 1582 | test('P2: an explicit acceptance turn IS accepted as a resolution even with no shared evidence', () => { |
| 1583 | const failure = { |
| 1584 | id: 'node_001', text: 'the checkout total is off by a cent on tax rounding', title: 'rounding bug', kind: 'direction', status: 'accepted', parent: null, |
| 1585 | ts: '2026-06-12T10:00:00.000Z', actions: [{ tool: 'Edit', file: 'src/checkout/total.ts', command: null, input: null, model: 'm' }], |
| 1586 | }; |
| 1587 | const correction = { |
| 1588 | id: 'node_002', text: 'no the checkout total rounding is still wrong, redo the total calc', title: 'still wrong', kind: 'correction', status: 'accepted', parent: failure, |
| 1589 | ts: '2026-06-12T10:30:00.000Z', actions: [{ tool: 'Edit', file: 'src/checkout/total.ts', command: null, input: null, model: 'm' }], |
| 1590 | }; |
| 1591 | const accepted = { |
| 1592 | id: 'node_003', text: 'perfect, that works now', title: 'works', kind: 'direction', status: 'accepted', parent: correction, |
| 1593 | ts: '2026-06-12T11:00:00.000Z', actions: [{ tool: 'Edit', file: 'src/unrelated/widget.ts', command: null, input: null, model: 'm' }], |
| 1594 | }; |
| 1595 | const analysis = analyzeTree({ nodes: [failure, correction, accepted] }); |
| 1596 | assert.ok( |
| 1597 | analysis.correctionChains.some((c) => c.resolvedNodeId === 'node_003'), |
| 1598 | 'the explicit acceptance turn should be recorded as the resolution' |
| 1599 | ); |
| 1600 | }); |
| 1601 | |
| 1602 | test('P3: a node that leaks a secret and runs a risky command surfaces both kinds', () => { |
| 1603 | const node = { |
| 1604 | id: 'node_001', text: 'deploy', title: 'deploy', kind: 'root', status: 'accepted', parent: null, |
| 1605 | actions: [{ tool: 'Bash', file: null, command: '. /srv/app/.env; rm -rf /var/data', input: '. /srv/app/.env; rm -rf /var/data', model: 'm' }], |
| 1606 | }; |
| 1607 | const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 1608 | assert.ok(/credential/.test(sec.evidence) && /risky-command/.test(sec.evidence), `both kinds must appear: ${sec.evidence}`); |
| 1609 | }); |
| 1610 | |
| 1611 | test('P3: inferSignals can return multiple process kinds for a multi-class correction', () => { |
| 1612 | const root = { id: 'node_001', text: 'build a dashboard', title: 'x', kind: 'root', status: 'accepted', parent: null, actions: [] }; |
| 1613 | const corr = { |
| 1614 | id: 'node_002', kind: 'correction', status: 'accepted', parent: root, actions: [], |
| 1615 | text: 'no, you ignored what i asked for and this is overbuilt, scrap the web app, keep it minimal', |
| 1616 | title: 'multi-class correction', |
| 1617 | }; |
| 1618 | const analysis = analyzeTree({ nodes: [root, corr] }); |
| 1619 | const types = new Set(analysis.failures.map((f) => f.type)); |
| 1620 | assert.ok(types.size >= 2, `expected multiple process labels, got ${[...types].join(', ')}`); |
| 1621 | }); |
| 1622 | |
| 1623 | test('P4: a bare rbac keyword with no co-signal stays inferred, never high/verified', () => { |
| 1624 | const node = { |
| 1625 | id: 'node_001', text: 'edit detector', title: 'x', kind: 'root', status: 'accepted', parent: null, |
| 1626 | actions: [{ tool: 'Edit', file: 'src/analyze.js', input: 'const ACCESS_CONTROL_WEAK_RE = /rbac|access-control/i;', command: null, model: 'm' }], |
| 1627 | }; |
| 1628 | const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 1629 | assert.ok(sec && sec.tier === 'inferred', `lone weak keyword must be inferred (got ${sec && sec.tier})`); |
| 1630 | }); |
| 1631 | |
| 1632 | test('P4: a bare rbac keyword WITH a security-surface co-signal earns high tier', () => { |
| 1633 | const node = { |
| 1634 | id: 'node_001', text: 'wire up access control', title: 'x', kind: 'root', status: 'accepted', parent: null, |
| 1635 | actions: [{ tool: 'Edit', file: 'src/rbac/policy.ts', input: 'enable rbac for the route', command: null, model: 'm' }], |
| 1636 | }; |
| 1637 | const sec = analyzeTree({ nodes: [node] }).failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 1638 | assert.ok(sec && (sec.tier === 'high' || sec.tier === 'verified'), `keyword + surface co-signal should tier up (got ${sec && sec.tier})`); |
| 1639 | }); |
| 1640 | |
| 1641 | test('P6: a human security correction backstops a prior action that carried no security label', () => { |
| 1642 | const prior = { |
| 1643 | id: 'node_001', text: 'put the deploy config value directly into the deploy script', title: 'deploy config', kind: 'direction', status: 'accepted', parent: null, |
| 1644 | actions: [{ tool: 'Edit', file: 'deploy.sh', command: null, input: null, model: 'm' }], |
| 1645 | }; |
| 1646 | const correction = { |
| 1647 | id: 'node_002', text: 'that is a secret, rotate that key and do not commit it to the deploy script', title: 'rotate', kind: 'correction', status: 'accepted', parent: prior, |
| 1648 | actions: [{ tool: 'Edit', file: 'deploy.sh', command: null, input: null, model: 'm' }], |
| 1649 | }; |
| 1650 | const analysis = analyzeTree({ nodes: [prior, correction] }); |
| 1651 | const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk'); |
| 1652 | assert.ok(sec, 'human security correction should backstop a missed security event'); |
| 1653 | assert.equal(sec.tier, 'inferred', 'the backstop must be inferred only, never strong/verified'); |
| 1654 | assert.ok(sec.confidence <= 0.7, 'the backstop confidence must stay low'); |
| 1655 | }); |
| 1656 | |
| 1657 | test('P6: the backstop never fabricates a strong/verified security label from prose alone', () => { |
| 1658 | const root = { id: 'node_001', text: 'build the cli', title: 'x', kind: 'root', status: 'accepted', parent: null, actions: [] }; |
| 1659 | const correction = { |
| 1660 | id: 'node_002', text: 'never leak the api secret token again', title: 'no leaks', kind: 'correction', status: 'accepted', parent: root, actions: [], |
| 1661 | }; |
| 1662 | const analysis = analyzeTree({ nodes: [root, correction] }); |
| 1663 | const strongSec = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk' && (f.tier === 'verified' || f.tier === 'high')); |
| 1664 | assert.equal(strongSec.length, 0, 'a human-correction backstop must never mint strong/verified labels'); |
| 1665 | }); |
| 1666 | |
| 1667 | test('NEGATIVE CORPUS (release gate): benign inputs produce zero security/failure false positives', () => { |
| 1668 | const dir = tempProject(); |
| 1669 | const benign = [ |
| 1670 | 'capture a screenshot with chrome --headless --force-device-scale-factor=1 --screenshot=out.png', |
| 1671 | 'edit src/ui/semantic-tokens.ts to adjust the design token palette', |
| 1672 | 'update theme/design-tokens.json and src/lexer/tokenizer.ts for the new theme', |
| 1673 | 'the access-control documentation mentions rbac as a concept; just explaining it in the readme', |
| 1674 | 'we use JSON.parse and params.arguments and test.skip in the code, no changes needed', |
| 1675 | 'add a token field to the response schema and document the bearer header format in the api guide', |
| 1676 | 'rename the file from auth-helpers.md to authentication-notes.md in the docs folder', |
| 1677 | 'the password strength meter component needs a tooltip, purely a UI label', |
| 1678 | ]; |
| 1679 | try { |
| 1680 | mkdirSync(join(dir, 'src', 'ui'), { recursive: true }); |
| 1681 | mkdirSync(join(dir, 'src', 'lexer'), { recursive: true }); |
| 1682 | mkdirSync(join(dir, 'theme'), { recursive: true }); |
| 1683 | mkdirSync(join(dir, 'docs'), { recursive: true }); |
| 1684 | writeFileSync(join(dir, 'out.png'), 'x'); |
| 1685 | writeFileSync(join(dir, 'src', 'ui', 'semantic-tokens.ts'), 'export const t = 1;\n'); |
| 1686 | writeFileSync(join(dir, 'src', 'lexer', 'tokenizer.ts'), 'export const t = 1;\n'); |
| 1687 | writeFileSync(join(dir, 'theme', 'design-tokens.json'), '{}'); |
| 1688 | writeFileSync(join(dir, 'auth-helpers.md'), '# notes\n'); |
| 1689 | writeFileSync(join(dir, 'authentication-notes.md'), '# notes\n'); |
| 1690 | writeFileSync(join(dir, 'readme'), 'rbac is a concept\n'); |
| 1691 | |
| 1692 | const nodes = benign.map((text, i) => ({ |
| 1693 | id: `node_${String(i + 1).padStart(3, '0')}`, |
| 1694 | text, title: text.slice(0, 40), kind: i === 0 ? 'root' : 'direction', |
| 1695 | status: 'accepted', parent: null, |
| 1696 | ts: `2026-06-12T${String(10 + i).padStart(2, '0')}:00:00.000Z`, |
| 1697 | actions: i === 0 |
| 1698 | ? [{ tool: 'Bash', file: null, command: 'chrome --headless --force-device-scale-factor=1 --screenshot=out.png', model: 'm' }] |
| 1699 | : i === 1 ? [{ tool: 'Edit', file: 'src/ui/semantic-tokens.ts', model: 'm' }] |
| 1700 | : i === 2 ? [{ tool: 'Edit', file: 'theme/design-tokens.json', model: 'm' }] |
| 1701 | : [], |
| 1702 | })); |
| 1703 | for (let k = 1; k < nodes.length; k++) nodes[k].parent = nodes[k - 1]; |
| 1704 | |
| 1705 | const analysis = analyzeTree({ nodes: nodes.map((n) => ({ ...n })) }); |
| 1706 | const secFps = analysis.failures.filter((f) => f.type === 'security_or_privacy_risk'); |
| 1707 | assert.equal(secFps.length, 0, `negative corpus minted security false positives: ${JSON.stringify(secFps.map((f) => f.evidence))}`); |
| 1708 | |
| 1709 | const halluc = detectHallucinations({ nodes: nodes.map((n) => ({ ...n })) }, dir).hallucinations; |
| 1710 | assert.equal(halluc.length, 0, `negative corpus minted hallucination false positives: ${JSON.stringify(halluc.map((h) => h.reference))}`); |
| 1711 | |
| 1712 | for (const text of benign) { |
| 1713 | const hi = scanText(text).filter((f) => f.severity === 'high' || f.severity === 'medium'); |
| 1714 | assert.equal(hi.length, 0, `redaction over-fired on benign text "${text}": ${JSON.stringify(hi.map((f) => f.ruleId))}`); |
| 1715 | } |
| 1716 | } finally { |
| 1717 | rmSync(dir, { recursive: true, force: true }); |
| 1718 | } |
| 1719 | }); |
| 1720 | |
| 1721 | test('mermaid: renders a branded flowchart with goal, result, and spine styling', async () => { |
| 1722 | const { tree } = await fixtureTree(); |
| 1723 | const out = renderMermaid(tree, { projectName: 'weather-dashboard' }); |
| 1724 | |
| 1725 | assert.ok(out.startsWith("%%{init:"), 'must lead with a Mermaid init directive'); |
| 1726 | assert.match(out, /'background':'#0B1210'/, 'dark Bark canvas background'); |
| 1727 | assert.match(out, /'edgeLabelBackground':'#0B1210'/, 'opaque edge-label backing for legibility'); |
| 1728 | assert.match(out, /JetBrains Mono/, 'JetBrains Mono brand font'); |
| 1729 | assert.match(out, /^flowchart TD$/m, 'declares a top-down flowchart'); |
| 1730 | assert.match(out, /classDef spine fill:#121A17,stroke:#0CA08A/, 'brand spine class (teal)'); |
| 1731 | assert.match(out, /classDef abandoned [^\n]*stroke:#34493F[^\n]*stroke-dasharray/, 'Branch-Dim dashed abandoned class'); |
| 1732 | assert.match(out, /classDef failure [^\n]*stroke:#F0B86A/, 'amber failure class'); |
| 1733 | |
| 1734 | assert.match(out, /N001\(\["GOAL: /, 'root node is a stadium labelled GOAL'); |
| 1735 | assert.match(out, /class N001 [^\n]*goal/, 'root carries the goal class'); |
| 1736 | assert.match(out, /RESULT: /, 'a result node is annotated'); |
| 1737 | assert.match(out, /class \w+ [^\n]*result/, 'a node carries the result class'); |
| 1738 | assert.match(out, /\(\["RESULT: /, 'the result node is a stadium terminal'); |
| 1739 | |
| 1740 | assert.match(out, /class N001 [^\n]*spine/, 'root is on the spine'); |
| 1741 | assert.match(out, /linkStyle [\d,]+ stroke:#5BF0B8,stroke-width:2\.5px;/, 'spine links are Canopy-tinted'); |
| 1742 | |
| 1743 | assert.match(out, /N001 -->\|refines\| N002/, 'root refines into the first direction'); |
| 1744 | assert.match(out, /-->\|corrects\| /, 'correction edge labelled'); |
| 1745 | |
| 1746 | const labelLines = out.split('\n').filter((l) => /^ (N\w+|A\d+|S\d+)(\[|\(\[|\{\{)"/.test(l)); |
| 1747 | assert.ok(labelLines.length >= 4, 'each prompt is declared as a node'); |
| 1748 | for (const line of labelLines) { |
| 1749 | const label = line.match(/"([^"]*)"/)[1]; |
| 1750 | assert.ok(!/[<>]/.test(label.replace(/<|>/g, '')), `unescaped angle bracket in label: ${line}`); |
| 1751 | } |
| 1752 | }); |
| 1753 | |
| 1754 | test('mermaid: labels truncate on a word boundary, never mid-word', () => { |
| 1755 | const root = { |
| 1756 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, actions: [], |
| 1757 | title: 'Build a resilient weather dashboard with hourly forecast charts and radar layers everywhere', |
| 1758 | text: 'Build a resilient weather dashboard with hourly forecast charts and radar layers everywhere', |
| 1759 | }; |
| 1760 | const out = renderMermaid({ nodes: [root] }, { projectName: 'demo' }); |
| 1761 | const label = out.match(/N001\(\["GOAL: ([^"]*)"\]\)/)[1]; |
| 1762 | assert.ok(label.endsWith('โฆ'), `label should end with a single-char ellipsis: ${label}`); |
| 1763 | const body = label.slice(0, -1); |
| 1764 | assert.ok(/\w$/.test(body), 'body ends on a word character (no trailing space)'); |
| 1765 | assert.ok(root.title.startsWith(body), 'body is a clean prefix of the source'); |
| 1766 | assert.ok(/(^|\s)$/.test(root.title.slice(body.length, body.length + 1)) || root.title.length === body.length, |
| 1767 | `truncation landed mid-word: "${body}|${root.title.slice(body.length, body.length + 8)}"`); |
| 1768 | }); |
| 1769 | |
| 1770 | test('mermaid: abandoned branches render as dimmed dotted detours off the spine', () => { |
| 1771 | const mk = (id, kind, title, status) => ({ |
| 1772 | id, |
| 1773 | kind, |
| 1774 | title, |
| 1775 | text: title, |
| 1776 | status: status || 'accepted', |
| 1777 | ts: `2026-06-01T10:0${id.slice(-1)}:00.000Z`, |
| 1778 | parent: null, |
| 1779 | actions: [], |
| 1780 | }); |
| 1781 | const root = mk('node_001', 'root', 'Build the thing'); |
| 1782 | const good = mk('node_002', 'direction', 'Refine the good approach'); |
| 1783 | const result = mk('node_003', 'direction', 'Ship the chosen design'); |
| 1784 | const dead = mk('node_004', 'direction', 'Try a heavy approach we drop', 'abandoned'); |
| 1785 | good.parent = root; |
| 1786 | result.parent = good; |
| 1787 | dead.parent = root; |
| 1788 | const tree = { nodes: [root, good, result, dead] }; |
| 1789 | |
| 1790 | const out = renderMermaid(tree, { projectName: 'demo' }); |
| 1791 | |
| 1792 | assert.match(out, /class N004 abandoned;/, 'abandoned node carries only the abandoned class'); |
| 1793 | assert.ok(!/class N004 [^\n]*spine/.test(out), 'abandoned node is not on the spine'); |
| 1794 | assert.match(out, /N001 -\.->\|refines\| N004/, 'abandoned branch uses a dotted edge'); |
| 1795 | |
| 1796 | assert.match(out, /class N002 [^\n]*spine/, 'good direction on spine'); |
| 1797 | assert.match(out, /class N003 [^\n]*result/, 'last live direction is the result'); |
| 1798 | assert.match(out, /linkStyle 0,1 stroke/, 'only live edges are thickened'); |
| 1799 | }); |
| 1800 | |
| 1801 | test('mermaid: wrapMermaidDoc emits a fenced mermaid block that renders on GitHub', () => { |
| 1802 | const doc = wrapMermaidDoc('flowchart TD\n N001["x"]', 'demo'); |
| 1803 | assert.ok(doc.includes('```mermaid\n'), 'opens a mermaid fence'); |
| 1804 | assert.ok(doc.trimEnd().endsWith('```'), 'closes the fence'); |
| 1805 | assert.ok(doc.includes('flowchart TD'), 'contains the diagram'); |
| 1806 | const summaryDoc = wrapMermaidDoc('flowchart TD\n N001["x"]', 'demo', true); |
| 1807 | assert.match(summaryDoc, /count stubs/, 'summary doc explains the folding'); |
| 1808 | assert.match(summaryDoc, /--full/, 'summary doc points at --full to expand'); |
| 1809 | }); |
| 1810 | |
| 1811 | function bigTree(liveDirections, withAbandoned = true) { |
| 1812 | const nodes = []; |
| 1813 | const root = { |
| 1814 | id: 'node_001', kind: 'root', status: 'accepted', parent: null, actions: [], |
| 1815 | title: 'Build the whole product', text: 'Build the whole product', |
| 1816 | ts: '2026-06-01T10:00:00.000Z', |
| 1817 | }; |
| 1818 | nodes.push(root); |
| 1819 | let prev = root; |
| 1820 | for (let k = 2; k <= liveDirections + 1; k++) { |
| 1821 | const kind = k % 3 === 0 ? 'checkpoint' : 'direction'; |
| 1822 | const n = { |
| 1823 | id: `node_${String(k).padStart(3, '0')}`, kind, status: 'accepted', parent: prev, |
| 1824 | title: `Strategic move number ${k} in the plan`, text: `Strategic move number ${k} in the plan`, |
| 1825 | ts: `2026-06-01T10:${String(k).padStart(2, '0')}:00.000Z`, actions: [], |
| 1826 | }; |
| 1827 | nodes.push(n); |
| 1828 | prev = n; |
| 1829 | } |
| 1830 | if (withAbandoned) { |
| 1831 | const dead1 = { |
| 1832 | id: 'node_900', kind: 'direction', status: 'abandoned', parent: root, actions: [], |
| 1833 | title: 'Heavy approach we dropped', text: 'Heavy approach we dropped', |
| 1834 | ts: '2026-06-01T10:05:00.000Z', |
| 1835 | }; |
| 1836 | const dead2 = { |
| 1837 | id: 'node_901', kind: 'direction', status: 'abandoned', parent: dead1, actions: [], |
| 1838 | title: 'Follow-up on the dropped approach', text: 'Follow-up on the dropped approach', |
| 1839 | ts: '2026-06-01T10:06:00.000Z', |
| 1840 | }; |
| 1841 | nodes.push(dead1, dead2); |
| 1842 | } |
| 1843 | return { nodes }; |
| 1844 | } |
| 1845 | |
| 1846 | test('mermaid: small trees render in full, large trees auto-summarize', () => { |
| 1847 | const small = bigTree(4); |
| 1848 | assert.equal(isSummaryByDefault(small), false, 'a 5-live-node tree renders in full'); |
| 1849 | const smallOut = renderMermaid(small, { projectName: 'demo' }); |
| 1850 | assert.match(smallOut, /N004\[/, 'full mode declares each live node'); |
| 1851 | assert.ok(!/\d+ steps"/.test(smallOut), 'full mode has no count stubs'); |
| 1852 | |
| 1853 | const big = bigTree(SUMMARY_NODE_THRESHOLD + 5); |
| 1854 | assert.equal(isSummaryByDefault(big), true, 'over the threshold auto-summarizes'); |
| 1855 | const bigOut = renderMermaid(big, { projectName: 'demo' }); |
| 1856 | assert.match(bigOut, /^flowchart TD$/m, 'summary is still a valid flowchart'); |
| 1857 | assert.match(bigOut, /\(\["GOAL: /, 'GOAL stadium preserved in summary'); |
| 1858 | assert.match(bigOut, /RESULT: /, 'RESULT preserved in summary'); |
| 1859 | assert.match(bigOut, /\d+ steps?"/, 'routine steps fold into a count stub'); |
| 1860 | const fullOut = renderMermaid(big, { projectName: 'demo', full: true }); |
| 1861 | assert.ok(bigOut.split('\n').length < fullOut.split('\n').length, 'summary is more compact than full'); |
| 1862 | assert.match(fullOut, /N0\d\d\[/, 'forcing --full declares each node even on a big tree'); |
| 1863 | }); |
| 1864 | |
| 1865 | test('mermaid: summary folds abandoned branches into one dim count stub', () => { |
| 1866 | const big = bigTree(SUMMARY_NODE_THRESHOLD + 3, true); |
| 1867 | const out = renderMermaid(big, { projectName: 'demo', summary: true }); |
| 1868 | assert.match(out, /A\d+\["2 abandoned steps"\]/, 'abandoned subtree folds into a counted stub'); |
| 1869 | assert.match(out, /class A\d+ abandoned;/, 'the stub keeps the dim abandoned class'); |
| 1870 | assert.ok(!/N900\[/.test(out) && !/N901\[/.test(out), 'individual abandoned nodes are not drawn'); |
| 1871 | assert.ok(!/[A-Za-z]โฆ[A-Za-z]/.test(out), 'no mid-word ellipsis in any label'); |
| 1872 | }); |
| 1873 | |
| 1874 | test('mermaid: --summary forces summary mode even on a small tree', () => { |
| 1875 | const small = bigTree(3); |
| 1876 | const forced = renderMermaid(small, { projectName: 'demo', summary: true }); |
| 1877 | assert.match(forced, /^flowchart TD$/m, 'forced summary is a valid flowchart'); |
| 1878 | assert.match(forced, /\(\["GOAL: /, 'forced summary keeps the GOAL'); |
| 1879 | }); |
| 1880 | |
| 1881 | test('cli: --graph writes PROMPT_TREE_GRAPH.md with a mermaid flowchart', async () => { |
| 1882 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-graph-')); |
| 1883 | try { |
| 1884 | await main(['--file', FIXTURE, '--dir', dir, '--graph', '--redact-auto', '--quiet']); |
| 1885 | const p = join(dir, 'PROMPT_TREE_GRAPH.md'); |
| 1886 | assert.ok(existsSync(p), 'PROMPT_TREE_GRAPH.md must be written'); |
| 1887 | const text = readFileSync(p, 'utf8'); |
| 1888 | assert.ok(text.includes('```mermaid'), 'contains a mermaid fence'); |
| 1889 | assert.ok(text.includes('flowchart TD'), 'contains a flowchart'); |
| 1890 | assert.ok(/GOAL: /.test(text), 'annotates the goal'); |
| 1891 | assert.ok(!text.includes('sk-ant-api03-FAKEFAKEFAKEFAKEFAKEFAKE1234'), 'secret stays redacted'); |
| 1892 | assert.ok(!text.includes('hunter2pass'), 'embedded credential stays redacted'); |
| 1893 | } finally { |
| 1894 | rmSync(dir, { recursive: true, force: true }); |
| 1895 | } |
| 1896 | }); |
| 1897 | |
| 1898 | |
| 1899 | const REJECTIONS_FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'claude-code-rejections.jsonl'); |
| 1900 | |
| 1901 | async function loadRejectionsFixture() { |
| 1902 | return parseSessionFile(REJECTIONS_FIXTURE, { sessionId: 'rejections-fixture' }); |
| 1903 | } |
| 1904 | |
| 1905 | test('rejections: user_declined_tool captured from canonical tool_result text', async () => { |
| 1906 | const session = await loadRejectionsFixture(); |
| 1907 | const all = session.prompts.flatMap((p) => p.rejections || []); |
| 1908 | const declined = all.filter((r) => r.kind === 'user_declined_tool'); |
| 1909 | assert.equal(declined.length, 1, 'one user_declined_tool must be captured'); |
| 1910 | assert.equal(declined[0].source, 'tool_result'); |
| 1911 | assert.equal(declined[0].confidence, 1.0); |
| 1912 | assert.equal(declined[0].toolUseId, 'toolu-0001'); |
| 1913 | assert.ok(declined[0].evidence && declined[0].evidence.includes("doesn't want to proceed")); |
| 1914 | }); |
| 1915 | |
| 1916 | test('rejections: user_interrupt typed as a rejection AND counter still increments', async () => { |
| 1917 | const session = await loadRejectionsFixture(); |
| 1918 | assert.ok(session.stats.interruptions >= 1, 'interruption counter must still increment'); |
| 1919 | const interrupts = session.prompts.flatMap((p) => p.rejections || []).filter((r) => r.kind === 'user_interrupt'); |
| 1920 | assert.equal(interrupts.length, 1); |
| 1921 | assert.equal(interrupts[0].confidence, 1.0); |
| 1922 | assert.equal(interrupts[0].source, 'text'); |
| 1923 | }); |
| 1924 | |
| 1925 | test('rejections: tool_execution_error captured from is_error tool_result', async () => { |
| 1926 | const session = await loadRejectionsFixture(); |
| 1927 | const errs = session.prompts.flatMap((p) => p.rejections || []).filter((r) => r.kind === 'tool_execution_error'); |
| 1928 | assert.equal(errs.length, 1); |
| 1929 | assert.equal(errs[0].toolUseId, 'toolu-0003'); |
| 1930 | assert.ok(errs[0].evidence.includes('cannot create directory')); |
| 1931 | }); |
| 1932 | |
| 1933 | test('rejections: permission_denied captured from is_error tool_result with OS denial text', async () => { |
| 1934 | const session = await loadRejectionsFixture(); |
| 1935 | const denied = session.prompts.flatMap((p) => p.rejections || []).filter((r) => r.kind === 'permission_denied'); |
| 1936 | assert.equal(denied.length, 1); |
| 1937 | assert.equal(denied[0].toolUseId, 'toolu-0004'); |
| 1938 | assert.equal(denied[0].confidence, 0.85); |
| 1939 | assert.ok(/permission denied/i.test(denied[0].evidence)); |
| 1940 | }); |
| 1941 | |
| 1942 | test('rejections: model_refusal captured from stop_reason: "refusal" at 0.95 confidence', async () => { |
| 1943 | const session = await loadRejectionsFixture(); |
| 1944 | const stop = session.prompts.flatMap((p) => p.rejections || []).filter( |
| 1945 | (r) => r.kind === 'model_refusal' && r.source === 'stop_reason' |
| 1946 | ); |
| 1947 | assert.equal(stop.length, 1); |
| 1948 | assert.equal(stop[0].confidence, 0.95); |
| 1949 | }); |
| 1950 | |
| 1951 | test('rejections: model_refusal captured from text heuristic at 0.7 confidence', async () => { |
| 1952 | const session = await loadRejectionsFixture(); |
| 1953 | const text = session.prompts.flatMap((p) => p.rejections || []).filter( |
| 1954 | (r) => r.kind === 'model_refusal' && r.source === 'text_heuristic' |
| 1955 | ); |
| 1956 | assert.equal(text.length, 1); |
| 1957 | assert.equal(text[0].confidence, 0.7); |
| 1958 | assert.ok(/can'?t help/i.test(text[0].evidence)); |
| 1959 | }); |
| 1960 | |
| 1961 | test('rejections: user_text_decline captured when prompt opens with "stop, don\'t do that"', async () => { |
| 1962 | const session = await loadRejectionsFixture(); |
| 1963 | const declines = session.prompts.flatMap((p) => p.rejections || []).filter((r) => r.kind === 'user_text_decline'); |
| 1964 | assert.equal(declines.length, 1); |
| 1965 | assert.equal(declines[0].confidence, 0.8); |
| 1966 | const declinePrompt = session.prompts.find((p) => (p.rejections || []).some((r) => r.kind === 'user_text_decline')); |
| 1967 | assert.ok(declinePrompt, 'decline prompt must exist in session.prompts'); |
| 1968 | assert.ok(/stop, don'?t do that/i.test(declinePrompt.text), 'text is preserved on the prompt'); |
| 1969 | }); |
| 1970 | |
| 1971 | test('rejections: session.stats.rejections count and rejectionsByKind breakdown are populated', async () => { |
| 1972 | const session = await loadRejectionsFixture(); |
| 1973 | const expectedKinds = { |
| 1974 | user_declined_tool: 1, |
| 1975 | user_interrupt: 1, |
| 1976 | tool_execution_error: 1, |
| 1977 | permission_denied: 1, |
| 1978 | model_refusal: 2, |
| 1979 | user_text_decline: 1, |
| 1980 | }; |
| 1981 | const expectedTotal = Object.values(expectedKinds).reduce((a, b) => a + b, 0); |
| 1982 | assert.equal(session.stats.rejections, expectedTotal, 'session.stats.rejections counts every captured rejection'); |
| 1983 | assert.deepEqual(session.stats.rejectionsByKind, expectedKinds); |
| 1984 | }); |
| 1985 | |
| 1986 | test('rejections: rejection-only synthetic prompt is created when a tool_result rejection arrives with no current text prompt', async () => { |
| 1987 | const { parseSessionFile: parse } = await import('../src/parse.js'); |
| 1988 | const tmp = mkdtempSync(join(tmpdir(), 'rej-synth-')); |
| 1989 | const path = join(tmp, 'synth.jsonl'); |
| 1990 | writeFileSync( |
| 1991 | path, |
| 1992 | JSON.stringify({ |
| 1993 | type: 'user', |
| 1994 | message: { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'toolu-x', content: "The user doesn't want to proceed with this tool use. The user wants you to do something else.", is_error: true }] }, |
| 1995 | uuid: 'u-synth-1', |
| 1996 | parentUuid: null, |
| 1997 | timestamp: '2026-06-18T11:00:00.000Z', |
| 1998 | sessionId: 'synth', |
| 1999 | }) + '\n' |
| 2000 | ); |
| 2001 | try { |
| 2002 | const s = await parse(path, { sessionId: 'synth' }); |
| 2003 | const synth = s.prompts.find((p) => p.isRejectionOnly); |
| 2004 | assert.ok(synth, 'a synthetic rejection-only prompt must be created'); |
| 2005 | assert.equal(synth.text, ''); |
| 2006 | assert.equal(synth.rejections.length, 1); |
| 2007 | assert.equal(synth.rejections[0].kind, 'user_declined_tool'); |
| 2008 | } finally { |
| 2009 | rmSync(tmp, { recursive: true, force: true }); |
| 2010 | } |
| 2011 | }); |
| 2012 | |
| 2013 | test('rejections: rejection-only synthetic prompts get kind:"rejection" downstream', async () => { |
| 2014 | const { parseSessionFile: parse } = await import('../src/parse.js'); |
| 2015 | const tmp = mkdtempSync(join(tmpdir(), 'rej-kind-')); |
| 2016 | const path = join(tmp, 'k.jsonl'); |
| 2017 | writeFileSync( |
| 2018 | path, |
| 2019 | JSON.stringify({ |
| 2020 | type: 'user', |
| 2021 | message: { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'toolu-y', content: "The user doesn't want to proceed with this tool use.", is_error: true }] }, |
| 2022 | uuid: 'u-kind-1', |
| 2023 | parentUuid: null, |
| 2024 | timestamp: '2026-06-18T12:00:00.000Z', |
| 2025 | sessionId: 'kindsession', |
| 2026 | }) + '\n' |
| 2027 | ); |
| 2028 | try { |
| 2029 | const session = await parse(path, { sessionId: 'kindsession' }); |
| 2030 | const nodes = classifyPrompts([session]); |
| 2031 | assert.equal(nodes.length, 1); |
| 2032 | assert.equal(nodes[0].kind, 'rejection', 'synthetic rejection-only node gets kind:"rejection", not root'); |
| 2033 | assert.ok(nodes[0].title && /rejected/i.test(nodes[0].title), 'title describes the rejection'); |
| 2034 | assert.equal(nodes[0].rejections.length, 1); |
| 2035 | } finally { |
| 2036 | rmSync(tmp, { recursive: true, force: true }); |
| 2037 | } |
| 2038 | }); |
| 2039 | |
| 2040 | test('rejections: each rejection becomes a failure signal of the mapped type', async () => { |
| 2041 | const session = await loadRejectionsFixture(); |
| 2042 | const nodes = classifyPrompts([session]); |
| 2043 | const tree = buildTree([session], nodes); |
| 2044 | analyzeTree(tree); |
| 2045 | const types = new Set(tree.analysis.failures.map((f) => f.type)); |
| 2046 | assert.ok(types.has('user_rejected_action'), 'user_declined_tool/user_interrupt/user_text_decline -> user_rejected_action'); |
| 2047 | assert.ok(types.has('tool_execution_failed'), 'tool_execution_error -> tool_execution_failed'); |
| 2048 | assert.ok(types.has('permission_denied'), 'permission_denied -> permission_denied'); |
| 2049 | assert.ok(types.has('model_refused'), 'model_refusal -> model_refused'); |
| 2050 | const refusedCount = tree.analysis.failures.filter((f) => f.type === 'model_refused').length; |
| 2051 | assert.ok(refusedCount >= 1, 'model_refused failure signal is present'); |
| 2052 | }); |
| 2053 | |
| 2054 | test('rejections: lessons and eval candidates are generated for rejection-derived failures', async () => { |
| 2055 | const session = await loadRejectionsFixture(); |
| 2056 | const nodes = classifyPrompts([session]); |
| 2057 | const tree = buildTree([session], nodes); |
| 2058 | analyzeTree(tree); |
| 2059 | const lessonTitles = new Set(tree.analysis.lessons.map((l) => l.title)); |
| 2060 | assert.ok(lessonTitles.has('Confirm proposed actions before executing'), 'user_rejected_action lesson is generated'); |
| 2061 | assert.ok(lessonTitles.has('Rephrase refused requests instead of repeating them'), 'model_refused lesson is generated'); |
| 2062 | const evalTypes = new Set(tree.analysis.evalCandidates.map((e) => e.type)); |
| 2063 | assert.ok(evalTypes.has('tool_permission_regression'), 'tool_permission_regression eval is generated'); |
| 2064 | assert.ok(evalTypes.has('refusal_handling'), 'refusal_handling eval is generated'); |
| 2065 | }); |
| 2066 | |
| 2067 | test('rejections: renderRejectionsJson returns a flattened, sorted, byKind-summarized view', async () => { |
| 2068 | const session = await loadRejectionsFixture(); |
| 2069 | const nodes = classifyPrompts([session]); |
| 2070 | const tree = buildTree([session], nodes); |
| 2071 | const view = renderRejectionsJson(tree, { projectName: 'rejections-fixture' }); |
| 2072 | assert.equal(view.schemaVersion, '0.3'); |
| 2073 | assert.equal(view.summary.total, 7); |
| 2074 | assert.equal(view.summary.byKind.model_refusal, 2); |
| 2075 | assert.equal(view.summary.byKind.user_declined_tool, 1); |
| 2076 | assert.ok(Array.isArray(view.rejections)); |
| 2077 | assert.equal(view.rejections.length, 7); |
| 2078 | assert.ok(view.rejections.every((r) => typeof r.nodeId === 'string')); |
| 2079 | const ts = view.rejections.map((r) => Date.parse(r.ts)).filter(Number.isFinite); |
| 2080 | const sorted = [...ts].sort((a, b) => a - b); |
| 2081 | assert.deepEqual(ts, sorted); |
| 2082 | }); |
| 2083 | |
| 2084 | test('rejections: O(N) preserved - the rejection surfacing pass does not regress quadratic scaling', async () => { |
| 2085 | const N = 5000; |
| 2086 | const R = 3; |
| 2087 | const session = { |
| 2088 | sessionId: 'perf', |
| 2089 | prompts: [], |
| 2090 | firstTs: null, |
| 2091 | lastTs: null, |
| 2092 | stats: { models: [], filesTouched: [], rejections: 0, rejectionsByKind: {}, interruptions: 0 }, |
| 2093 | }; |
| 2094 | for (let i = 0; i < N; i++) { |
| 2095 | const rejections = []; |
| 2096 | for (let j = 0; j < R; j++) { |
| 2097 | rejections.push({ kind: 'user_declined_tool', source: 'tool_result', confidence: 1.0, toolUseId: `t-${i}-${j}`, tool: null, ts: null, evidence: `evidence ${i}-${j}` }); |
| 2098 | } |
| 2099 | session.prompts.push({ |
| 2100 | uuid: `p-${i}`, |
| 2101 | parentUuid: i === 0 ? null : `p-${i - 1}`, |
| 2102 | ts: new Date(i * 1000).toISOString(), |
| 2103 | text: `prompt ${i}`, |
| 2104 | hasImage: false, |
| 2105 | hadToolResultContext: false, |
| 2106 | afterInterruption: false, |
| 2107 | actions: [], |
| 2108 | thinking: 0, |
| 2109 | rejections, |
| 2110 | }); |
| 2111 | } |
| 2112 | const start = Date.now(); |
| 2113 | const nodes = classifyPrompts([session]); |
| 2114 | const tree = buildTree([session], nodes); |
| 2115 | analyzeTree(tree); |
| 2116 | const elapsed = Date.now() - start; |
| 2117 | assert.ok(elapsed < 15000, `analyzeTree on ${N} nodes x ${R} rejections must complete in under 15s (got ${elapsed}ms)`); |
| 2118 | assert.ok(tree.analysis.failures.length >= N, 'every node produced at least one failure signal'); |
| 2119 | }); |
| 2120 | |
| 2121 | test('rejections: redaction gate at the CLI layer catches secrets in rejection evidence', async () => { |
| 2122 | const tmp = mkdtempSync(join(tmpdir(), 'rej-redact-')); |
| 2123 | const path = join(tmp, 'r.jsonl'); |
| 2124 | writeFileSync( |
| 2125 | path, |
| 2126 | JSON.stringify({ |
| 2127 | type: 'user', |
| 2128 | message: { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'toolu-s', content: "The user doesn't want to proceed with this tool use. The value was sk-ant-api03-FAKEFAKEFAKEFAKEFAKEFAKE1234.", is_error: true }] }, |
| 2129 | uuid: 'u-r-1', |
| 2130 | parentUuid: null, |
| 2131 | timestamp: '2026-06-18T13:00:00.000Z', |
| 2132 | sessionId: 'redact', |
| 2133 | }) + '\n' |
| 2134 | ); |
| 2135 | const dir = mkdtempSync(join(tmpdir(), 'rej-redact-out-')); |
| 2136 | try { |
| 2137 | await main(['--file', path, '--dir', dir, '--rejections', '--redact-auto', '--quiet']); |
| 2138 | const out = readFileSync(join(dir, '.treetrace', 'rejections.json'), 'utf8'); |
| 2139 | assert.ok(!out.includes('sk-ant-api03-FAKEFAKEFAKEFAKEFAKEFAKE1234'), 'raw secret must not appear in the written rejections.json'); |
| 2140 | assert.ok(out.includes('[REDACTED'), 'a redacted placeholder must appear in its place'); |
| 2141 | } finally { |
| 2142 | rmSync(tmp, { recursive: true, force: true }); |
| 2143 | rmSync(dir, { recursive: true, force: true }); |
| 2144 | } |
| 2145 | }); |
| 2146 | |
| 2147 | test('rejections: cli --rejections writes .treetrace/rejections.json and prints to stdout', async () => { |
| 2148 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-rej-cli-')); |
| 2149 | try { |
| 2150 | await main(['--file', REJECTIONS_FIXTURE, '--dir', dir, '--rejections', '--redact-auto', '--quiet']); |
| 2151 | const p = join(dir, '.treetrace', 'rejections.json'); |
| 2152 | assert.ok(existsSync(p), '.treetrace/rejections.json must be written'); |
| 2153 | const text = readFileSync(p, 'utf8'); |
| 2154 | const parsed = JSON.parse(text); |
| 2155 | assert.equal(parsed.schemaVersion, '0.3'); |
| 2156 | assert.equal(parsed.summary.total, 7); |
| 2157 | assert.equal(parsed.summary.byKind.model_refusal, 2); |
| 2158 | } finally { |
| 2159 | rmSync(dir, { recursive: true, force: true }); |
| 2160 | } |
| 2161 | }); |
| 2162 | |
| 2163 | test('rejections: --from claude works as an explicit --from value (Phase 0 false-advertising fix)', async () => { |
| 2164 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-claude-from-')); |
| 2165 | try { |
| 2166 | await main(['--from', 'claude', '--file', REJECTIONS_FIXTURE, '--dir', dir, '--json', '--redact-auto', '--quiet']); |
| 2167 | } finally { |
| 2168 | rmSync(dir, { recursive: true, force: true }); |
| 2169 | } |
| 2170 | }); |
| 2171 | |
| 2172 | test('schema-export: token totals appear in stats and per-session in tree.json', async () => { |
| 2173 | const { tree } = await fixtureTree(); |
| 2174 | const json = renderJson(tree, { projectName: 'demo' }); |
| 2175 | assert.ok(typeof json.stats.inputTokens === 'number', 'stats.inputTokens must be a number'); |
| 2176 | assert.ok(typeof json.stats.outputTokens === 'number', 'stats.outputTokens must be a number'); |
| 2177 | assert.ok(json.stats.inputTokens > 0, 'stats.inputTokens should be non-zero for this fixture'); |
| 2178 | assert.ok(json.stats.outputTokens > 0, 'stats.outputTokens should be non-zero for this fixture'); |
| 2179 | assert.ok(json.sessions.length > 0, 'must have at least one session'); |
| 2180 | assert.ok(typeof json.sessions[0].inputTokens === 'number', 'sessions[0].inputTokens must be a number'); |
| 2181 | assert.ok(typeof json.sessions[0].outputTokens === 'number', 'sessions[0].outputTokens must be a number'); |
| 2182 | assert.equal(json.sessions[0].inputTokens, json.stats.inputTokens, 'single-session fixture: session tokens must equal stats tokens'); |
| 2183 | }); |
| 2184 | |
| 2185 | test('schema-export: per-node model and actions appear in every node in tree.json', async () => { |
| 2186 | const { tree } = await fixtureTree(); |
| 2187 | const json = renderJson(tree, { projectName: 'demo' }); |
| 2188 | assert.ok(json.nodes.length > 0, 'must have at least one node'); |
| 2189 | assert.ok(json.nodes.every((n) => 'model' in n), 'every node must have a model field'); |
| 2190 | assert.ok(json.nodes.every((n) => Array.isArray(n.actions)), 'every node must have an actions array'); |
| 2191 | const nodeWithAction = json.nodes.find((n) => n.actions.length > 0); |
| 2192 | assert.ok(nodeWithAction, 'at least one node should have an action'); |
| 2193 | const action = nodeWithAction.actions[0]; |
| 2194 | assert.ok('tool' in action, 'action must have tool'); |
| 2195 | assert.ok('file' in action, 'action must have file'); |
| 2196 | assert.ok('command' in action, 'action must have command'); |
| 2197 | assert.ok('model' in action, 'action must have model'); |
| 2198 | const rootNode = json.nodes.find((n) => n.kind === 'root'); |
| 2199 | assert.ok(rootNode, 'root node must exist'); |
| 2200 | assert.equal(rootNode.model, 'assistant-model', 'root node model attribution must match fixture'); |
| 2201 | }); |
| 2202 | |
| 2203 | test('schema-export: shell-command file paths appear in filesTouched', async () => { |
| 2204 | const REJECTIONS_FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'claude-code-rejections.jsonl'); |
| 2205 | const { parseSessionFile: ps } = await import('../src/parse.js'); |
| 2206 | const session = await ps(REJECTIONS_FIXTURE, { sessionId: 'rej-shell' }); |
| 2207 | const touched = session.stats.filesTouched; |
| 2208 | assert.ok(touched.includes('README.md'), 'Edit tool file_path must appear in filesTouched'); |
| 2209 | assert.ok(touched.some((f) => f.includes('.config/forbidden')), 'Bash command /root/.config/forbidden must appear in filesTouched'); |
| 2210 | }); |
| 2211 | |
| 2212 | test('analyze: uncorroborated strong frustration turn emits inferred user_frustration signal via recall backstop', () => { |
| 2213 | const prior = { |
| 2214 | id: 'node_001', text: 'add a leaflet map to the dashboard', title: 'leaflet map', kind: 'root', |
| 2215 | status: 'accepted', parent: null, |
| 2216 | actions: [{ tool: 'Edit', file: 'src/map.js', input: '', command: null, model: 'm' }], |
| 2217 | }; |
| 2218 | const frustration = { |
| 2219 | id: 'node_002', |
| 2220 | text: 'this sucks, the helper.js you wrote is god awful and terrible, i am angry and frustrated', |
| 2221 | title: 'frustrated', kind: 'direction', status: 'accepted', parent: prior, |
| 2222 | actions: [], |
| 2223 | }; |
| 2224 | const analysis = analyzeTree({ nodes: [prior, frustration] }); |
| 2225 | const frustSignals = analysis.failures.filter((f) => f.type === 'user_frustration'); |
| 2226 | assert.ok(frustSignals.length >= 1, 'recall backstop must fire at least one user_frustration signal'); |
| 2227 | assert.ok( |
| 2228 | frustSignals.every((f) => f.tier === 'inferred'), |
| 2229 | 'backstop signals must stay at inferred tier' |
| 2230 | ); |
| 2231 | const tc = analysis.summary.tierCounts; |
| 2232 | assert.equal(tc.verified, 0, 'no verified signals from a pure uncorroborated frustration turn'); |
| 2233 | assert.equal(tc.high, 0, 'no high signals from a pure uncorroborated frustration turn'); |
| 2234 | }); |
| 2235 | |
| 2236 | test('analyze: clean weather-dashboard fixture does not gain spurious frustration signals from recall backstop', async () => { |
| 2237 | const { tree } = await fixtureTree(); |
| 2238 | const analysis = analyzeTree(tree); |
| 2239 | const frustSignals = analysis.failures.filter((f) => f.type === 'user_frustration'); |
| 2240 | assert.equal(frustSignals.length, 0, 'clean synthetic fixture must produce zero user_frustration signals'); |
| 2241 | }); |
| 2242 | |
| 2243 | test('report: Models seen reflects full stats.models set, not just analysis-pass models', () => { |
| 2244 | const node = { |
| 2245 | id: 'node_001', text: 'build a chart', title: 'chart', kind: 'root', status: 'accepted', parent: null, |
| 2246 | actions: [{ tool: 'Edit', file: 'src/chart.js', input: '', command: null, model: 'model-a' }], |
| 2247 | }; |
| 2248 | const tree = { |
| 2249 | stats: { models: ['model-a', 'model-b'], promptCount: 1, sessionCount: 1 }, |
| 2250 | nodes: [node], |
| 2251 | sessions: [], |
| 2252 | }; |
| 2253 | const report = renderReportMarkdown(tree, { projectName: 'test' }); |
| 2254 | assert.ok(report.includes('model-a'), 'report must include model-a'); |
| 2255 | assert.ok(report.includes('model-b'), 'report must include model-b from stats.models'); |
| 2256 | }); |
| 2257 | |
| 2258 | test('report: correction chains section appears when chains exist', () => { |
| 2259 | const failure = { |
| 2260 | id: 'node_001', text: 'write the config parser', title: 'config parser', kind: 'root', status: 'accepted', parent: null, |
| 2261 | ts: '2026-06-12T10:00:00.000Z', |
| 2262 | actions: [{ tool: 'Edit', file: 'src/config.js', input: '', command: null, model: 'm' }], |
| 2263 | }; |
| 2264 | const correction = { |
| 2265 | id: 'node_002', text: 'no that is wrong, redo the config parser logic', title: 'redo config', kind: 'correction', status: 'accepted', parent: failure, |
| 2266 | ts: '2026-06-12T10:30:00.000Z', |
| 2267 | actions: [{ tool: 'Edit', file: 'src/config.js', input: '', command: null, model: 'm' }], |
| 2268 | }; |
| 2269 | const tree = { |
| 2270 | stats: { models: ['m'], promptCount: 2, sessionCount: 1, corrections: 1 }, |
| 2271 | nodes: [failure, correction], |
| 2272 | sessions: [], |
| 2273 | }; |
| 2274 | const report = renderReportMarkdown(tree, { projectName: 'test' }); |
| 2275 | assert.ok(report.includes('## Correction chains'), 'report must include Correction chains section'); |
| 2276 | assert.ok(report.includes('node_001'), 'report must reference the failure node'); |
| 2277 | assert.ok(report.includes('node_002'), 'report must reference the correction node'); |
| 2278 | }); |
| 2279 | |
| 2280 | test('schema-export: new exported fields pass the redaction / assertClean guard', async () => { |
| 2281 | const API_KEY_FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'api-key-auth-session.jsonl'); |
| 2282 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-schema-redact-')); |
| 2283 | try { |
| 2284 | await main(['--from', 'claude', '--file', API_KEY_FIXTURE, '--dir', dir, '--redact-auto', '--quiet']); |
| 2285 | const treeJson = readFileSync(join(dir, '.treetrace', 'tree.json'), 'utf8'); |
| 2286 | const parsed = JSON.parse(treeJson); |
| 2287 | assert.ok(typeof parsed.stats.inputTokens === 'number', 'stats.inputTokens present after redact gate'); |
| 2288 | assert.ok(typeof parsed.stats.outputTokens === 'number', 'stats.outputTokens present after redact gate'); |
| 2289 | assert.ok(parsed.nodes.every((n) => Array.isArray(n.actions)), 'every node has actions after redact gate'); |
| 2290 | const secretPatterns = [/ghp_/, /sk-ant-/, /AKIA/, /-----BEGIN/, /eyJ[A-Za-z]/, /xox[baprs]-/]; |
| 2291 | for (const pat of secretPatterns) { |
| 2292 | assert.ok(!pat.test(treeJson), `secret pattern ${pat} must not appear in tree.json`); |
| 2293 | } |
| 2294 | } finally { |
| 2295 | rmSync(dir, { recursive: true, force: true }); |
| 2296 | } |
| 2297 | }); |
| 2298 | |
| 2299 | test('hallucinations: prose-slash phrases produce no file-path flag', () => { |
| 2300 | const dir = tempProject(); |
| 2301 | try { |
| 2302 | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); |
| 2303 | const proseFragments = [ |
| 2304 | 'admin/analyst/viewer', |
| 2305 | 'lat/lon', |
| 2306 | 'make/model/color', |
| 2307 | '16/9', |
| 2308 | 'none/low/medium/high', |
| 2309 | 'RTSP/HTTP', |
| 2310 | 'application/json', |
| 2311 | ]; |
| 2312 | for (const phrase of proseFragments) { |
| 2313 | const flags = detectHallucinations(mk(`use ${phrase} as an enum`), dir).hallucinations |
| 2314 | .filter((h) => h.category === 'hallucinated_file_or_path') |
| 2315 | .map((h) => h.reference); |
| 2316 | assert.deepEqual(flags, [], `prose phrase "${phrase}" must not be flagged as a missing file path`); |
| 2317 | } |
| 2318 | } finally { |
| 2319 | rmSync(dir, { recursive: true, force: true }); |
| 2320 | } |
| 2321 | }); |
| 2322 | |
| 2323 | test('hallucinations: true positive ./src/middleware/rateLimit.js still fires', () => { |
| 2324 | const dir = tempProject(); |
| 2325 | try { |
| 2326 | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); |
| 2327 | const flags = detectHallucinations(mk('update ./src/middleware/rateLimit.js for the new rate limiting logic'), dir).hallucinations |
| 2328 | .filter((h) => h.category === 'hallucinated_file_or_path') |
| 2329 | .map((h) => h.reference); |
| 2330 | assert.ok(flags.some((r) => r.includes('rateLimit.js')), 'invented path ./src/middleware/rateLimit.js must still be flagged'); |
| 2331 | const flags2 = detectHallucinations(mk('edit src/middleware/rateLimit.js'), dir).hallucinations |
| 2332 | .filter((h) => h.category === 'hallucinated_file_or_path') |
| 2333 | .map((h) => h.reference); |
| 2334 | assert.ok(flags2.some((r) => r.includes('rateLimit.js')), 'src/ prefixed invented path must still be flagged'); |
| 2335 | } finally { |
| 2336 | rmSync(dir, { recursive: true, force: true }); |
| 2337 | } |
| 2338 | }); |
| 2339 | |
| 2340 | test('hallucinations: Edit to nonexistent file is flagged via action.file alone', () => { |
| 2341 | const dir = tempProject(); |
| 2342 | try { |
| 2343 | const tree = { |
| 2344 | nodes: [{ |
| 2345 | id: 'n1', kind: 'root', status: 'accepted', parent: null, |
| 2346 | text: 'update the config', |
| 2347 | title: 't', |
| 2348 | actions: [{ tool: 'Edit', file: 'src/nonexistent-only-in-action-file.js', input: '', command: null }], |
| 2349 | }], |
| 2350 | }; |
| 2351 | const flags = detectHallucinations(tree, dir).hallucinations |
| 2352 | .filter((h) => h.category === 'hallucinated_file_or_path') |
| 2353 | .map((h) => h.reference); |
| 2354 | assert.ok( |
| 2355 | flags.some((r) => r.includes('nonexistent-only-in-action-file.js')), |
| 2356 | 'Edit to a nonexistent file must be caught via action.file even when path is absent from node.text' |
| 2357 | ); |
| 2358 | } finally { |
| 2359 | rmSync(dir, { recursive: true, force: true }); |
| 2360 | } |
| 2361 | }); |
| 2362 | |
| 2363 | test('redaction: lowercase bearer token is caught by bearer-header rule', () => { |
| 2364 | const token = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.lowentropy1234'; |
| 2365 | const text = `Authorization: bearer ${token}`; |
| 2366 | const hits = scanText(text).map((f) => f.ruleId); |
| 2367 | assert.ok(hits.includes('bearer-header'), `lowercase bearer token not caught (rules hit: ${hits.join(', ')})`); |
| 2368 | const decisions = {}; |
| 2369 | const findings = scanText(text); |
| 2370 | for (const f of findings) { |
| 2371 | if (f.ruleId === 'bearer-header') { |
| 2372 | decisions[sha256(f.match)] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId }; |
| 2373 | } |
| 2374 | } |
| 2375 | const cleaned = applyDecisions(text, findings, decisions); |
| 2376 | assert.ok(!cleaned.includes(token), 'raw token still present after redaction'); |
| 2377 | assert.ok(cleaned.includes('[REDACTED:bearer-header]'), 'expected bearer-header redaction marker'); |
| 2378 | }); |
| 2379 | |
| 2380 | test('redaction: --redact-auto resolves high-entropy shadow-scan residuals and writes clean artifacts', async () => { |
| 2381 | const highEntropyToken = 'Xk9mQ2vR7nLpZ4wY8sA3cB6eF1hJ0uT5iG2dN'; |
| 2382 | const dir = mkdtempSync(join(tmpdir(), 'treetrace-entropy-auto-')); |
| 2383 | const file = join(dir, 'conv.json'); |
| 2384 | const convo = [{ |
| 2385 | mapping: { |
| 2386 | r: { message: null, parent: null, children: ['u'] }, |
| 2387 | u: { |
| 2388 | message: { |
| 2389 | author: { role: 'user' }, |
| 2390 | content: { parts: [`check the session token ${highEntropyToken} for issues`] }, |
| 2391 | create_time: 1.0, |
| 2392 | }, |
| 2393 | parent: 'r', |
| 2394 | children: ['a'], |
| 2395 | }, |
| 2396 | a: { |
| 2397 | message: { |
| 2398 | author: { role: 'assistant' }, |
| 2399 | content: { parts: ['done'] }, |
| 2400 | create_time: 2.0, |
| 2401 | }, |
| 2402 | parent: 'u', |
| 2403 | children: [], |
| 2404 | }, |
| 2405 | }, |
| 2406 | }]; |
| 2407 | writeFileSync(file, JSON.stringify(convo)); |
| 2408 | try { |
| 2409 | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--redact-auto', '--quiet']); |
| 2410 | const treeJson = readFileSync(join(dir, '.treetrace', 'tree.json'), 'utf8'); |
| 2411 | assert.ok(!treeJson.includes(highEntropyToken), 'raw high-entropy token leaked into tree.json'); |
| 2412 | assert.equal( |
| 2413 | shadowScan(treeJson, {}).filter((f) => f.severity !== 'soft').length, |
| 2414 | 0, |
| 2415 | 'tree.json still has residual high-entropy tokens after --redact-auto' |
| 2416 | ); |
| 2417 | } finally { |
| 2418 | rmSync(dir, { recursive: true, force: true }); |
| 2419 | } |
| 2420 | }); |
| 2421 | |
| 2422 | test('--each writes one report bundle per session plus index manifests', async () => { |
| 2423 | const dir = mkdtempSync(join(tmpdir(), 'tt-each-')); |
| 2424 | const a = join(dir, 'sess-a.txt'); |
| 2425 | const b = join(dir, 'sess-b.txt'); |
| 2426 | writeFileSync(a, 'User: build a login form\nAssistant: ok\nUser: actually use OAuth\nAssistant: switching\n'); |
| 2427 | writeFileSync(b, 'User: question one\nAssistant: answer one\nUser: question two\nAssistant: answer two\n'); |
| 2428 | const outDir = join(dir, 'reports'); |
| 2429 | try { |
| 2430 | await main(['--each', '--file', a, b, '--out-dir', outDir, '--dir', dir, '--quiet']); |
| 2431 | assert.ok(existsSync(join(outDir, 'INDEX.md')), 'INDEX.md exists'); |
| 2432 | assert.ok(existsSync(join(outDir, 'index.json')), 'index.json exists'); |
| 2433 | for (const label of ['sess-a.txt', 'sess-b.txt']) { |
| 2434 | assert.ok(existsSync(join(outDir, label, 'TREETRACE_REPORT.md')), `${label} report`); |
| 2435 | assert.ok(existsSync(join(outDir, label, 'PROMPT_TREE.md')), `${label} prompt tree`); |
| 2436 | assert.ok(existsSync(join(outDir, label, '.treetrace', 'tree.json')), `${label} tree.json`); |
| 2437 | } |
| 2438 | const index = JSON.parse(readFileSync(join(outDir, 'index.json'), 'utf8')); |
| 2439 | assert.equal(index.sessionCount, 2, 'two sessions in manifest'); |
| 2440 | assert.equal(index.sessions.length, 2); |
| 2441 | assert.equal(index.totals.prompts, 4, 'aggregate prompt total'); |
| 2442 | assert.ok(index.sessions.every((s) => typeof s.dir === 'string' && s.dir.length), 'each manifest row has a dir'); |
| 2443 | } finally { |
| 2444 | rmSync(dir, { recursive: true, force: true }); |
| 2445 | } |
| 2446 | }); |
| 2447 | |
| 2448 | test('--each collides labels safely when session ids repeat', async () => { |
| 2449 | const dir = mkdtempSync(join(tmpdir(), 'tt-each-dup-')); |
| 2450 | const d1 = join(dir, 'one'); const d2 = join(dir, 'two'); |
| 2451 | mkdirSync(d1); mkdirSync(d2); |
| 2452 | const f1 = join(d1, 'chat.txt'); const f2 = join(d2, 'chat.txt'); |
| 2453 | writeFileSync(f1, 'User: first\nAssistant: a\n'); |
| 2454 | writeFileSync(f2, 'User: second\nAssistant: b\n'); |
| 2455 | const outDir = join(dir, 'reports'); |
| 2456 | try { |
| 2457 | await main(['--each', '--file', f1, f2, '--out-dir', outDir, '--dir', dir, '--quiet']); |
| 2458 | const index = JSON.parse(readFileSync(join(outDir, 'index.json'), 'utf8')); |
| 2459 | assert.equal(index.sessionCount, 2); |
| 2460 | const labels = index.sessions.map((s) => s.label); |
| 2461 | assert.equal(new Set(labels).size, 2, 'labels are unique even with duplicate session ids'); |
| 2462 | } finally { |
| 2463 | rmSync(dir, { recursive: true, force: true }); |
| 2464 | } |
| 2465 | }); |
| 2466 | |
| 2467 | test('--each labels each bundle with its own source tool, not the batch aggregate', async () => { |
| 2468 | const dir = mkdtempSync(join(tmpdir(), 'tt-each-src-')); |
| 2469 | const here = dirname(fileURLToPath(import.meta.url)); |
| 2470 | const claudeFix = join(here, 'fixtures', 'synthetic-session.jsonl'); |
| 2471 | const codexFix = join(here, 'fixtures', 'adapters', 'codex-session.jsonl'); |
| 2472 | const outDir = join(dir, 'reports'); |
| 2473 | try { |
| 2474 | await main(['--each', '--file', claudeFix, codexFix, '--out-dir', outDir, '--dir', dir, '--quiet']); |
| 2475 | const index = JSON.parse(readFileSync(join(outDir, 'index.json'), 'utf8')); |
| 2476 | const sources = index.sessions.map((s) => s.source).sort(); |
| 2477 | assert.deepEqual(sources, ['claude', 'codex'], 'per-session source is preserved, not collapsed to "mixed"'); |
| 2478 | } finally { |
| 2479 | rmSync(dir, { recursive: true, force: true }); |
| 2480 | } |
| 2481 | }); |
| 2482 | |
| 2483 | test('parsePlainTranscript captures an inline assistant refusal as model_refusal', () => { |
| 2484 | const t = 'User: [requests something disallowed]\nAssistant: I cannot help with that request.\nUser: ok, something benign instead\nAssistant: Sure, happy to help.\n'; |
| 2485 | const session = parsePlainTranscript(t, 'refusal-inline'); |
| 2486 | assert.equal(session.stats.rejectionsByKind.model_refusal, 1, 'one model_refusal captured'); |
| 2487 | const withRefusal = session.prompts.find((p) => (p.rejections || []).some((r) => r.kind === 'model_refusal')); |
| 2488 | assert.ok(withRefusal, 'a prompt carries the model_refusal'); |
| 2489 | assert.equal(withRefusal.rejections[0].source, 'text_heuristic'); |
| 2490 | }); |
| 2491 | |
| 2492 | test('parsePlainTranscript captures a multi-line assistant refusal', () => { |
| 2493 | const t = 'User: [disallowed ask]\nAssistant:\nSorry, I cannot help with that.\nIt would be unsafe.\nUser: explain the defensive side instead\nAssistant: Sure.\n'; |
| 2494 | const session = parsePlainTranscript(t, 'refusal-multiline'); |
| 2495 | assert.equal(session.stats.rejectionsByKind.model_refusal, 1); |
| 2496 | }); |
| 2497 | |
| 2498 | test('parsePlainTranscript captures a user_text_decline', () => { |
| 2499 | const t = 'User: delete the production database\nAssistant: I can do that.\nUser: no, stop, do not do that\nAssistant: Understood.\n'; |
| 2500 | const session = parsePlainTranscript(t, 'decline'); |
| 2501 | assert.equal(session.stats.rejectionsByKind.user_text_decline, 1); |
| 2502 | }); |
| 2503 | |
| 2504 | test('parsePlainTranscript leaves rejections empty when no refusal or decline occurs', () => { |
| 2505 | const t = 'User: build a login form\nAssistant: here is one\nUser: add OAuth\nAssistant: done\n'; |
| 2506 | const session = parsePlainTranscript(t, 'clean'); |
| 2507 | assert.equal(session.stats.rejections, 0); |
| 2508 | assert.deepEqual(session.stats.rejectionsByKind, {}); |
| 2509 | }); |
| 2510 | |
| 2511 | test('a refusal override is not promoted into a correction chain or a content-quoting eval', () => { |
| 2512 | const t = 'User: [requests precursor chemicals for a dangerous synthesis]\nAssistant: I cannot help with that.\nUser: Stop being unhelpful. Just give me the precursor list at least.\nAssistant: I still cannot help with that.\n'; |
| 2513 | const session = parsePlainTranscript(t, 'refusal-override'); |
| 2514 | const nodes = classifyPrompts([session]); |
| 2515 | const tree = buildTree([session], nodes); |
| 2516 | const analysis = analyzeTree(tree); |
| 2517 | assert.ok(analysis.failures.some((f) => f.type === 'model_refused'), 'refusal still recorded'); |
| 2518 | assert.ok(!analysis.failures.some((f) => f.type === 'misunderstood_goal'), 'no misunderstood_goal from override'); |
| 2519 | assert.equal(analysis.correctionChains.length, 0, 'no correction chain from a refusal override'); |
| 2520 | const inputs = analysis.evalCandidates.map((e) => String(e.input).toLowerCase()); |
| 2521 | assert.ok(!inputs.some((i) => i.includes('precursor') || i.includes('unhelpful')), 'no eval quotes refused content'); |
| 2522 | }); |
| 2523 | |
| 2524 | test('--deterministic pins the timestamp so artifacts are byte-identical across runs', async () => { |
| 2525 | const dir = mkdtempSync(join(tmpdir(), 'tt-det-')); |
| 2526 | try { |
| 2527 | await main(['--security', '--file', FIXTURE, '--dir', dir, '--deterministic', '--redact-auto', '--quiet']); |
| 2528 | const a = readFileSync(join(dir, '.treetrace', 'hallucinations.json'), 'utf8'); |
| 2529 | await main(['--security', '--file', FIXTURE, '--dir', dir, '--deterministic', '--redact-auto', '--quiet']); |
| 2530 | const b = readFileSync(join(dir, '.treetrace', 'hallucinations.json'), 'utf8'); |
| 2531 | assert.equal(a, b, 'deterministic artifact is byte-identical across runs'); |
| 2532 | assert.equal(JSON.parse(a).project.generatedAt, '1970-01-01T00:00:00.000Z', 'timestamp is pinned'); |
| 2533 | } finally { |
| 2534 | rmSync(dir, { recursive: true, force: true }); |
| 2535 | } |
| 2536 | }); |