| @@ -4,6 +4,15 @@ Notable changes to TreeTrace. The format follows Keep a Changelog, and the proje | ||
| ## Unreleased | ||
| + | ### Security | |
| + | ||
| + | - Redaction now catches generic secret assignments whose quoted value contains escaped characters, such as the serialized JSON form `{"api_key":"line1\nline2"}` with a literal backslash, an escaped quote, an escaped tab, or an escaped backslash. Serialized JSON is a common way for multiline and escaped secret values to appear in transcripts, and these shapes previously reached written artifacts even under `--redact-auto`. | |
| + | ||
| + | ### Fixed | |
| + | ||
| + | - The hallucination detector no longer reports ordinary dotted code symbols such as `JSON.parse`, `params.name`, `test.skip`, and `describe.skip` as missing file paths. A dotted token with no slash is only treated as a file reference when its extension is a known file extension, so member expressions are left alone while genuine paths such as `src/missing.ts` are still flagged. | |
| + | - The hallucination detector now recognizes common extensionless file references, including `Dockerfile`, `Makefile`, `README`, `.env`, and slash-containing local paths such as `src/route`. Known filename words are only flagged when a file-operation verb is nearby, which keeps prose mentions from becoming false positives. | |
| + | ||
| ## 0.5.0 - 2026-06-13 | ||
| ### Added |
| @@ -172,7 +172,7 @@ TreeTrace runs inside the repository, so it can verify what the agent claimed ag | ||
| - `hallucinated_file_or_path` | ||
| - `hallucinated_import_or_package` | ||
| - | Each one becomes an eval candidate, for example "verify the file or import exists before editing." The checks are fully deterministic: file and path existence and import and package declaration. To avoid false positives, files the agent created during the session, relative paths, Node builtins, and Python standard library modules are excluded. | |
| + | Each one becomes an eval candidate, for example "verify the file or import exists before editing." The checks are fully deterministic: file and path existence and import and package declaration. File references include paths with a known extension, common extensionless files such as `Dockerfile`, `Makefile`, `README`, and `.env`, and slash-containing local paths such as `src/route`. To avoid false positives, files the agent created during the session, relative paths, Node builtins, and Python standard library modules are excluded, ordinary dotted code symbols such as `JSON.parse` or `test.skip` are not treated as paths, and known filename words are only flagged when a file-operation verb is nearby. | |
| This is honest about its limits. File, path, import, and package existence are solid. Per-symbol and per-API resolution inside a module is not attempted, because that would need an AST and a language toolchain, which would break the zero-dependency promise. TreeTrace does not claim to detect a hallucinated function or method on a real module. | ||
| @@ -19,10 +19,34 @@ const PY_STDLIB = new Set([ | ||
| 'html', 'email', 'warnings', 'contextlib', 'operator', 'weakref', 'gc', 'platform', 'signal', | ||
| ]); | ||
| + | const KNOWN_FILE_EXTENSIONS = new Set([ | |
| + | 'js', 'mjs', 'cjs', 'jsx', 'ts', 'tsx', 'mts', 'cts', 'd.ts', | |
| + | 'py', 'pyi', 'rb', 'go', 'rs', 'java', 'kt', 'kts', 'scala', 'clj', 'cljs', | |
| + | 'c', 'h', 'cc', 'cpp', 'cxx', 'hpp', 'hh', 'm', 'mm', 'swift', 'php', 'cs', | |
| + | 'lua', 'pl', 'pm', 'r', 'jl', 'dart', 'ex', 'exs', 'erl', 'hrl', 'elm', 'hs', | |
| + | 'json', 'jsonc', 'json5', 'yaml', 'yml', 'toml', 'ini', 'cfg', 'conf', 'env', | |
| + | 'xml', 'html', 'htm', 'css', 'scss', 'sass', 'less', 'svg', 'vue', 'svelte', 'astro', | |
| + | 'md', 'mdx', 'markdown', 'rst', 'txt', 'csv', 'tsv', 'sql', 'graphql', 'gql', | |
| + | 'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd', 'dockerfile', 'lock', 'gradle', | |
| + | 'gitignore', 'gitattributes', 'npmrc', 'nvmrc', 'editorconfig', 'eslintrc', 'prettierrc', | |
| + | 'png', 'jpg', 'jpeg', 'gif', 'webp', 'ico', 'pdf', 'proto', 'tf', 'tfvars', | |
| + | ]); | |
| + | ||
| + | const KNOWN_EXTENSIONLESS_FILES = new Set([ | |
| + | 'dockerfile', 'makefile', 'readme', 'license', 'licence', 'notice', 'changelog', | |
| + | 'authors', 'contributing', 'codeowners', 'procfile', 'rakefile', 'gemfile', | |
| + | 'pipfile', 'brewfile', 'vagrantfile', 'jenkinsfile', 'gnumakefile', | |
| + | '.env', '.gitignore', '.gitattributes', '.npmrc', '.nvmrc', '.editorconfig', | |
| + | '.dockerignore', '.eslintrc', '.prettierrc', '.babelrc', '.bashrc', '.zshrc', | |
| + | ]); | |
| + | ||
| const FILE_TOKEN_RE = /(?:[\w@./+-]*\/)?[\w@.+-]+\.[A-Za-z][A-Za-z0-9]{0,9}\b/g; | ||
| + | const PATHISH_TOKEN_RE = /(?:\.{0,2}\/)?[\w@.+-]+(?:\/[\w@.+-]+)+\/?/g; | |
| + | const BAREWORD_TOKEN_RE = /(?:^|[\s'"`([{])(\.?[A-Za-z][\w.-]*)(?=$|[\s'"`)\]},.;:])/g; | |
| const REL_PREFIX_RE = /^(?:\.\/|\.\.\/)/; | ||
| const URL_LIKE_RE = /:\/\//; | ||
| const VERSION_LIKE_RE = /^\d+(?:\.\d+)+$/; | ||
| + | const FILE_OP_VERB_RE = /\b(?:open|edit|read|cat|touch|create|write|delete|rm|view|append|chmod|mv|cp|run)\b/i; | |
| const JS_IMPORT_RE = | ||
| /\b(?:import|export)\b[^;\n]*?\bfrom\s*['"]([^'"\n]+)['"]|\brequire\(\s*['"]([^'"\n]+)['"]\s*\)|\bimport\(\s*['"]([^'"\n]+)['"]\s*\)/g; | ||
| const PY_IMPORT_RE = /^[ \t]*(?:from\s+([A-Za-z_][\w.]*)\s+import\b|import\s+([A-Za-z_][\w.]*(?:\s*,\s*[A-Za-z_][\w.]*)*))/gm; | ||
| @@ -114,13 +138,38 @@ function normalizeFileKey(p) { | ||
| return p.replace(/^\.?\//, '').replace(/\\/g, '/').toLowerCase(); | ||
| } | ||
| + | function tokenExtension(tok) { | |
| + | const dot = tok.lastIndexOf('.'); | |
| + | if (dot < 0) return ''; | |
| + | return tok.slice(dot + 1).toLowerCase(); | |
| + | } | |
| + | ||
| + | function hasSlash(tok) { | |
| + | return tok.includes('/'); | |
| + | } | |
| + | ||
| function looksLikeFileToken(tok) { | ||
| if (tok.length < 3 || tok.length > 200) return false; | ||
| if (URL_LIKE_RE.test(tok)) return false; | ||
| if (VERSION_LIKE_RE.test(tok)) return false; | ||
| - | const ext = tok.slice(tok.lastIndexOf('.') + 1).toLowerCase(); | |
| + | const ext = tokenExtension(tok); | |
| if (!ext || ext.length > 10) return false; | ||
| - | return true; | |
| + | if (hasSlash(tok)) return true; | |
| + | return KNOWN_FILE_EXTENSIONS.has(ext); | |
| + | } | |
| + | ||
| + | function looksLikeExtensionlessFile(tok, context) { | |
| + | if (tok.length < 3 || tok.length > 200) return false; | |
| + | if (URL_LIKE_RE.test(tok)) return false; | |
| + | const lower = tok.toLowerCase().replace(/^\.\//, ''); | |
| + | if (KNOWN_EXTENSIONLESS_FILES.has(lower)) { | |
| + | if (lower.startsWith('.')) return true; | |
| + | return FILE_OP_VERB_RE.test(context || ''); | |
| + | } | |
| + | if (hasSlash(tok) && !tokenExtension(tok)) { | |
| + | return /^(?:\.{0,2}\/)?[\w@.+-]+(?:\/[\w@.+-]+)+\/?$/.test(tok); | |
| + | } | |
| + | return false; | |
| } | ||
| function withinProjectDir(projectDir, target) { | ||
| @@ -169,13 +218,25 @@ function collectFileReferences(tree) { | ||
| seen.add(key); | ||
| refs.push({ token: tok, key, nodeId }); | ||
| }; | ||
| + | const pushExtensionless = (raw, nodeId, context) => { | |
| + | const tok = raw.trim().replace(/^['"`(]+|['"`),.;:]+$/g, ''); | |
| + | if (tokenExtension(tok) && !KNOWN_EXTENSIONLESS_FILES.has(tok.toLowerCase().replace(/^\.\//, ''))) return; | |
| + | if (!looksLikeExtensionlessFile(tok, context)) return; | |
| + | const key = normalizeFileKey(tok); | |
| + | if (seen.has(key)) return; | |
| + | seen.add(key); | |
| + | refs.push({ token: tok, key, nodeId }); | |
| + | }; | |
| for (const node of tree.nodes) { | ||
| if (node.status === 'abandoned') continue; | ||
| const text = String(node.text || '').slice(0, MAX_TEXT_SCAN); | ||
| for (const m of text.matchAll(FILE_TOKEN_RE)) push(m[0], node.id); | ||
| + | for (const m of text.matchAll(PATHISH_TOKEN_RE)) pushExtensionless(m[0], node.id, text); | |
| + | for (const m of text.matchAll(BAREWORD_TOKEN_RE)) pushExtensionless(m[1], node.id, text); | |
| for (const a of node.actions || []) { | ||
| const body = `${a.input || ''}`.slice(0, MAX_TEXT_SCAN); | ||
| for (const m of body.matchAll(FILE_TOKEN_RE)) push(m[0], node.id); | ||
| + | for (const m of body.matchAll(PATHISH_TOKEN_RE)) pushExtensionless(m[0], node.id, body); | |
| } | ||
| } | ||
| return refs; |
| @@ -25,7 +25,7 @@ export const RULES = [ | ||
| { id: 'wireguard-key', severity: 'medium', re: /\b(PrivateKey|PresharedKey)\s*=\s*[A-Za-z0-9+/]{42,44}=?/g }, | ||
| { id: 'url-basic-auth', severity: 'medium', re: /\b[a-z][a-z0-9+.-]{0,30}:\/\/[^/\s:@'"`]{2,256}:[^/\s@'"`]{2,256}@[^\s'"`]{1,512}/gi }, | ||
| { id: 'bearer-header', severity: 'medium', re: /\bBearer\s+[A-Za-z0-9._+/=-]{20,}\b/g }, | ||
| - | { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"[^"\\]{4,512}"|'[^'\\]{4,512}'|`[^`\\]{4,512}`|[^\s'"`,;){}]{6,512})/gi }, | |
| + | { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.){4,512}"|'(?:[^'\\]|\\.){4,512}'|`(?:[^`\\]|\\.){4,512}`|[^\s'"`,;){}]{6,512})/gi }, | |
| { id: 'email', severity: 'soft', re: /\b[A-Za-z0-9._%+-]+@(?!(?:users\.noreply\.github\.com|example\.(?:com|org)))[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g }, | ||
| { id: 'ipv4', severity: 'soft', re: /\b(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\b(?!\.\d)/g }, |
| @@ -122,6 +122,50 @@ test('redaction: rule coverage on known formats', () => { | ||
| } | ||
| }); | ||
| + | test('redaction: escaped characters inside quoted secret assignments are still caught', () => { | |
| + | const cases = [ | |
| + | ['escaped newline', '{"api_key":"line1\\nline2line2"}'], | |
| + | ['escaped tab', '{"api_key":"col1\\tcol2value"}'], | |
| + | ['escaped quote', '{"api_key":"abc\\"defghij"}'], | |
| + | ['escaped backslash', '{"api_key":"abc\\\\defghij"}'], | |
| + | ['single-quoted escaped newline', "{'password':'line1\\nline2value'}"], | |
| + | ['backtick escaped newline', 'const secret = `line1\\nline2value`;'], | |
| + | ]; | |
| + | for (const [label, sample] of cases) { | |
| + | const hits = scanText(sample).map((f) => f.ruleId); | |
| + | assert.ok( | |
| + | hits.includes('secret-assignment'), | |
| + | `${label}: escaped secret value should be caught (got ${JSON.stringify(hits)} for ${sample})` | |
| + | ); | |
| + | } | |
| + | }); | |
| + | ||
| + | test('redaction: end-to-end escaped-JSON secret leaves no raw value in any artifact', async () => { | |
| + | const rawValue = 'line1\\nline2line2line2'; | |
| + | const secretLine = `config is {"api_key":"${rawValue}"}`; | |
| + | const dir = mkdtempSync(join(tmpdir(), 'treetrace-esc-')); | |
| + | const file = join(dir, 'escconv.json'); | |
| + | const convo = [{ | |
| + | mapping: { | |
| + | r: { message: null, parent: null, children: ['u'] }, | |
| + | u: { message: { author: { role: 'user' }, content: { parts: [secretLine] }, create_time: 1.0 }, parent: 'r', children: ['a'] }, | |
| + | a: { message: { author: { role: 'assistant' }, content: { parts: ['ok'] }, create_time: 2.0 }, parent: 'u', children: [] }, | |
| + | }, | |
| + | }]; | |
| + | writeFileSync(file, JSON.stringify(convo)); | |
| + | try { | |
| + | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--report', '--analysis', '--redact-auto', '--quiet']); | |
| + | const artifacts = [ | |
| + | 'PROMPT_TREE.md', 'TREETRACE_REPORT.md', '.treetrace/tree.json', | |
| + | '.treetrace/failures.json', '.treetrace/lessons.md', '.treetrace/evals.jsonl', '.treetrace/agent-memory.md', | |
| + | ].filter((f) => existsSync(join(dir, f))).map((f) => readFileSync(join(dir, f), 'utf8')).join('\n'); | |
| + | assert.ok(!artifacts.includes(rawValue), 'raw escaped-JSON secret value leaked into an artifact'); | |
| + | assert.ok(artifacts.includes('[REDACTED:secret-assignment]'), 'expected a secret-assignment redaction marker'); | |
| + | } finally { | |
| + | rmSync(dir, { recursive: true, force: true }); | |
| + | } | |
| + | }); | |
| + | ||
| test('redaction: bare hex tokens (32+ chars) are detected, lower and upper case', async () => { | ||
| const lower = '6881f8290266f4cc939959917f893a2a88787eb24bbcb6b9c37594c72bf448c3'; | ||
| const upper = lower.toUpperCase(); | ||
| @@ -1154,6 +1198,53 @@ test('hallucinations: an Edit to a nonexistent file is flagged, a Write to a new | ||
| } | ||
| }); | ||
| + | test('hallucinations: dotted code symbols are not flagged as missing file paths', () => { | |
| + | const dir = tempProject(); | |
| + | try { | |
| + | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); | |
| + | for (const sym of ['JSON.parse', 'params.arguments', 'params.name', 'test.skip', 'describe.skip', 'obj.method', 'array.length']) { | |
| + | const refs = detectHallucinations(mk(sym), dir).hallucinations | |
| + | .filter((h) => h.category === 'hallucinated_file_or_path') | |
| + | .map((h) => h.reference); | |
| + | assert.deepEqual(refs, [], `code symbol "${sym}" should not be flagged as a missing path (got ${JSON.stringify(refs)})`); | |
| + | } | |
| + | const real = detectHallucinations(mk('open src/missing.ts'), dir).hallucinations | |
| + | .filter((h) => h.category === 'hallucinated_file_or_path') | |
| + | .map((h) => h.reference); | |
| + | assert.ok(real.includes('src/missing.ts'), 'a genuinely missing path with a known extension must still be flagged'); | |
| + | } finally { | |
| + | rmSync(dir, { recursive: true, force: true }); | |
| + | } | |
| + | }); | |
| + | ||
| + | test('hallucinations: missing extensionless files and local paths are flagged, existing ones are not', () => { | |
| + | const dir = tempProject(); | |
| + | try { | |
| + | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); | |
| + | const flagged = (text) => detectHallucinations(mk(text), dir).hallucinations | |
| + | .filter((h) => h.category === 'hallucinated_file_or_path') | |
| + | .map((h) => h.reference); | |
| + | ||
| + | assert.ok(flagged('open Dockerfile').includes('Dockerfile'), 'a missing Dockerfile should be flagged'); | |
| + | assert.ok(flagged('open .env').includes('.env'), 'a missing .env should be flagged'); | |
| + | assert.ok(flagged('open Makefile').includes('Makefile'), 'a missing Makefile should be flagged'); | |
| + | assert.ok(flagged('open src/route').includes('src/route'), 'a missing extensionless local path should be flagged'); | |
| + | ||
| + | writeFileSync(join(dir, 'Dockerfile'), 'FROM node:20\n'); | |
| + | writeFileSync(join(dir, '.env'), 'X=1\n'); | |
| + | assert.ok(!flagged('open Dockerfile and .env').includes('Dockerfile'), 'an existing Dockerfile must not be flagged'); | |
| + | assert.ok(!flagged('open Dockerfile and .env').includes('.env'), 'an existing .env must not be flagged'); | |
| + | ||
| + | const noise = detectHallucinations(mk('JSON.parse and test.skip and update the README section about CONTRIBUTING'), dir).hallucinations | |
| + | .filter((h) => h.category === 'hallucinated_file_or_path') | |
| + | .map((h) => h.reference); | |
| + | assert.ok(!noise.includes('JSON.parse') && !noise.includes('test.skip'), 'extensionless detection must not reintroduce code-symbol false positives'); | |
| + | assert.ok(!noise.includes('README') && !noise.includes('CONTRIBUTING'), 'a known filename word in prose without a file-op verb must not be flagged'); | |
| + | } finally { | |
| + | rmSync(dir, { recursive: true, force: true }); | |
| + | } | |
| + | }); | |
| + | ||
| test('discover: a recorded cwd that mismatches the project dir excludes a colliding session', () => { | ||
| const dir = mkdtempSync(join(tmpdir(), 'treetrace-cwd-')); | ||
| const matching = join(dir, 'match.jsonl'); |