| @@ -2,11 +2,27 @@ | ||
| Notable changes to TreeTrace. The format follows Keep a Changelog, and the project uses semantic versioning. | ||
| + | ## 0.9.0 - 2026-06-19 | |
| + | ||
| + | ### Added | |
| + | ||
| + | - `tree.json` now exports token totals (aggregate stats and per-session), a per-turn `model` field on each node, and a per-node `actions` array (tool invocations, file paths, and Bash commands). Claims in the README about token usage, model-per-turn, and tool/file capture now reflect what the schema exports. | |
| + | ||
| + | ### Changed | |
| + | ||
| + | - Redaction and detection hardened in this release cycle (see 0.8.x entries for detail). | |
| + | - Examples regenerated for v0.9.0; footers and `.treetrace/redactions.json` reflect the current schema version. | |
| + | ||
| + | ### Documentation | |
| + | ||
| + | - Signal-coverage matrix version label updated from v0.8.1 to v0.9.0. | |
| + | - Terminal output modes (`--graph`, `--full`, `--summary`) documented as early-return modes that do not compose with `--report` or `--analysis`. | |
| + | ||
| ## 0.8.1 - 2026-06-19 | ||
| ### Changed | ||
| - | - Relicensed from Apache-2.0 to the PolyForm Noncommercial License 1.0.0. TreeTrace is now free for any noncommercial purpose (personal, research, education, nonprofit, government) and commercial or for-profit use requires a separate license from the copyright holder (zionboggan@gmail.com). The relicense applies to this version onward; copies obtained under 0.8.0 and earlier remain under Apache-2.0 for those versions. `package.json` now declares `SEE LICENSE IN LICENSE`, and `LICENSE` is included in the published package files. | |
| + | - Relicensed from Apache-2.0 to the PolyForm Noncommercial License 1.0.0 (SPDX: `LicenseRef-PolyForm-Noncommercial-1.0.0`). TreeTrace is now free for any noncommercial purpose (personal, research, education, nonprofit, government) and commercial or for-profit use requires a separate license from the copyright holder (zionboggan@gmail.com). The relicense applies to this version onward; copies obtained under 0.8.0 and earlier remain under Apache-2.0 for those versions. `package.json` declares `LicenseRef-PolyForm-Noncommercial-1.0.0`, and `LICENSE` is included in the published package files. | |
| ## 0.8.0 - 2026-06-18 | ||
| @@ -110,7 +110,7 @@ TreeTrace reads coding and CLI agent sessions (Claude Code, Codex, Cursor, Copil | ||
| ### Signal coverage by adapter | ||
| - | Signal coverage depends on what each tool exports. The matrix below reflects the actual source code (v0.8.1); cells marked `--` are confirmed absent. | |
| + | Signal coverage depends on what each tool exports. The matrix below reflects the actual source code (v0.9.0); cells marked `--` are confirmed absent. | |
| | Signal | Claude Code | ChatGPT | Codex | Cursor | Copilot | Gemini | Grok | | ||
| |---|:---:|:---:|:---:|:---:|:---:|:---:|:---:| | ||
| @@ -191,6 +191,8 @@ Claude Code (native JSONL) is the richest source: it covers all rejection kinds, | ||
| For a Terminus, Codex CLI, Claude Code, or SSH session where you want the report in the terminal window, use `npx treetrace --report --redact-auto`. For both terminal output and an extra shell-captured copy, pipe it: `npx treetrace --report --redact-auto | tee treetrace-output.md`. | ||
| + | **Terminal output modes (`--graph`, `--full`, `--summary`):** These three flags activate a terminal graph mode that returns early after writing `PROMPT_TREE_GRAPH.md`. They do not compose with `--report` or `--analysis`: when any of them is present, the graph is written and the run stops -- other outputs are skipped. `--full` and `--summary` control graph detail level (full node expansion vs. spine-only summary), not which artifacts are written. Run the graph as its own separate invocation from any report or analysis pass. | |
| + | ||
| If you see a file literally named `output`, that usually came from `--out output` or shell redirection like `> output`. Prefer `TREETRACE_REPORT.md` for human reading and leave `.treetrace/*.json` / `.jsonl` for tools. | ||
| </details> | ||
| @@ -319,7 +321,7 @@ Verified means the adapter was validated against real session or real published | ||
| ## Examples | ||
| - | See [examples/](examples/) for generated artifacts produced by running the CLI with no hand-editing: | |
| + | See [examples/](examples/) for generated artifacts produced by running the CLI with no hand-editing. The checked-in examples are versioned snapshots regenerated for v0.9.0; footers, `.treetrace/redactions.json`, and any schema fields introduced since the previous version reflect the current release. | |
| - [examples/weather-dashboard](examples/weather-dashboard) shows lineage and the redaction gate on a clean session. | ||
| - [examples/api-key-auth](examples/api-key-auth) shows the [`--security` report](examples/api-key-auth/SECURITY_REPORT.md), [rejection capture](examples/api-key-auth/.treetrace/rejections.json), and [hallucination detection](examples/api-key-auth/.treetrace/hallucinations.json) lighting up on a session that touches auth, hardcodes a secret, skips tests, force-pushes, references a missing file, and imports an undeclared package. |
| @@ -20,16 +20,30 @@ Agent Trace answers "which code came from AI?" TreeTrace answers "how did the hu | ||
| ```jsonc | ||
| { | ||
| "schemaVersion": "0.3", | ||
| - | "generator": { "name": "treetrace", "version": "0.3.0", "url": "..." }, | |
| + | "generator": { "name": "treetrace", "version": "0.9.0", "url": "..." }, | |
| "project": { "name": "...", "generatedAt": "ISO-8601", "sourceType": "claude-code-jsonl" }, | ||
| - | "stats": { "prompts": 41, "sessions": 6, "days": 9, "corrections": 3, "rejections": 4 }, | |
| + | "stats": { | |
| + | "prompts": 41, "sessions": 6, "days": 9, | |
| + | "corrections": 3, "rejections": 4, | |
| + | "toolUses": 12, "filesTouched": 7, | |
| + | "inputTokens": 8400, "outputTokens": 2100, | |
| + | "models": ["claude-opus-4-8"], | |
| + | "firstTs": "ISO-8601", "lastTs": "ISO-8601" | |
| + | }, | |
| "analysis": { | ||
| "failureSignals": 11, | ||
| "correctionChains": 3, | ||
| "evalCandidates": 6, | ||
| "lessons": 7 | ||
| }, | ||
| - | "sessions": [ { "id": "...", "title": "...", "firstTs": "...", "lastTs": "...", "promptCount": 7 } ], | |
| + | "sessions": [ | |
| + | { | |
| + | "id": "...", "title": "...", | |
| + | "firstTs": "ISO-8601", "lastTs": "ISO-8601", | |
| + | "promptCount": 7, "isContinuation": false, | |
| + | "inputTokens": 8400, "outputTokens": 2100 | |
| + | } | |
| + | ], | |
| "nodes": [ /* PromptNode */ ], | ||
| "edges": [ /* Edge */ ], | ||
| "correctionChains": [ /* CorrectionChain */ ], | ||
| @@ -40,6 +54,43 @@ Agent Trace answers "which code came from AI?" TreeTrace answers "how did the hu | ||
| All v0.3 additions are optional and additive. Consumers that only understand v0.2 can keep reading `nodes` and `edges` and ignore `rejections`. | ||
| + | ## stats fields | |
| + | ||
| + | | Field | Type | Meaning | | |
| + | |-------|------|---------| | |
| + | | `prompts` | number | total classified prompt nodes | | |
| + | | `rawPrompts` | number | total raw prompt records across all sessions | | |
| + | | `sessions` | number | sessions that contained at least one prompt | | |
| + | | `days` | number | calendar days spanned | | |
| + | | `corrections` | number | nodes classified as `correction` | | |
| + | | `scopeChanges` | number | nodes classified as `scope-change` | | |
| + | | `checkpoints` | number | nodes classified as `checkpoint` | | |
| + | | `abandonedBranches` | number | distinct abandoned sub-trees | | |
| + | | `rejections` | number | total rejection/refusal/decline events | | |
| + | | `rejectionsByKind` | object | count per rejection kind | | |
| + | | `toolUses` | number | total tool invocations across all sessions | | |
| + | | `filesTouched` | number | distinct file paths referenced (Edit/Write paths and shell command paths) | | |
| + | | `inputTokens` | number | sum of input tokens across all sessions (0 when not available for the source format) | | |
| + | | `outputTokens` | number | sum of output tokens across all sessions (0 when not available for the source format) | | |
| + | | `models` | string[] | deduplicated list of model identifiers seen across all sessions | | |
| + | | `firstTs` | string \| null | ISO-8601 timestamp of the earliest record | | |
| + | | `lastTs` | string \| null | ISO-8601 timestamp of the latest record | | |
| + | ||
| + | Token coverage by source: Claude Code JSONL (full), Codex rollout (full), Gemini CLI (full), ChatGPT export (none), Copilot (none), Cursor (none), Grok (none), plain transcript (none). | |
| + | ||
| + | ## sessions[] fields | |
| + | ||
| + | | Field | Type | Meaning | | |
| + | |-------|------|---------| | |
| + | | `id` | string | session identifier | | |
| + | | `title` | string \| null | session title if captured | | |
| + | | `firstTs` | string \| null | ISO-8601 | | |
| + | | `lastTs` | string \| null | ISO-8601 | | |
| + | | `promptCount` | number | classified prompts in this session | | |
| + | | `isContinuation` | boolean | session resumed from a prior compact summary | | |
| + | | `inputTokens` | number | input tokens for this session (0 when not available) | | |
| + | | `outputTokens` | number | output tokens for this session (0 when not available) | | |
| + | ||
| ## PromptNode | ||
| | Field | Type | Meaning | | ||
| @@ -55,12 +106,29 @@ All v0.3 additions are optional and additive. Consumers that only understand v0. | ||
| | `reruns` | number | repeated instruction re-issues folded into this node | | ||
| | `session` | string | session id this prompt came from | | ||
| | `timestamp` | string \| null | ISO-8601 | | ||
| + | | `model` | string \| null | model that handled this turn (from the first action on the turn; null if not available) | | |
| + | | `actions` | Action[] | tool invocations made in response to this prompt, after redaction | | |
| | `failureSignals` | FailureSignal[] | optional v0.2 failure labels attached to this node | | ||
| | `evalCandidate` | boolean | whether this node contributes to an eval candidate | | ||
| | `lessonIds` | string[] | lessons derived from this node | | ||
| | `rejections` | Rejection[] | optional v0.3 typed rejection/refusal/decline events captured on this turn | | ||
| | `sourceEventIds` | string[] | local transcript record UUIDs; raw transcripts are never exported | | ||
| + | ## Action | |
| + | ||
| + | ```jsonc | |
| + | { "tool": "Edit", "file": "/src/auth.js", "command": null, "model": "claude-opus-4-8" } | |
| + | ``` | |
| + | ||
| + | | Field | Type | Meaning | | |
| + | |-------|------|---------| | |
| + | | `tool` | string \| null | tool name (`Bash`, `Edit`, `Write`, `Read`, etc.) | | |
| + | | `file` | string \| null | file path from a structured `file_path` input; redacted | | |
| + | | `command` | string \| null | shell command string for `Bash` tool calls; redacted | | |
| + | | `model` | string \| null | model that issued this tool call; null when not available | | |
| + | ||
| + | `file` and `command` values are run through the same redaction gate as `node.text`. An `action` whose `command` or `file` contains a secret will have that value replaced with a `[REDACTED:rule-id]` marker before export. | |
| + | ||
| The `rejection` kind (v0.3) is assigned to synthetic nodes that exist only to carry a rejection signal, e.g. a tool-result rejection that arrived before any human-typed prompt. Such nodes have empty `text`, a `title` derived from the rejection kind(s), and one or more entries in `rejections`. | ||
| ## FailureSignal | ||
| @@ -80,7 +148,7 @@ Initial `type` values: | ||
| - `misunderstood_goal` | ||
| - `scope_drift` | ||
| - `wrong_tool_choice` | ||
| - | - `hallucinated_file_or_api` | |
| + | - `hallucinated_file_or_path` (also written as `hallucinated_file_or_api` in older exports; treat as equivalent) | |
| - `repeated_failed_fix` | ||
| - `overbuilt_solution` | ||
| - `underbuilt_solution` | ||
| @@ -201,6 +269,49 @@ Initial eval `type` values: | ||
| - `tool_error_recovery` (v0.3) | ||
| - `refusal_handling` (v0.3) | ||
| + | ## hallucinations.json (--security) | |
| + | ||
| + | Written to `.treetrace/hallucinations.json` when `--security` is passed. Requires a `--dir` that points to a real project tree so file existence and package manifests can be checked. | |
| + | ||
| + | ```jsonc | |
| + | { | |
| + | "schemaVersion": "0.3", | |
| + | "project": { "name": "...", "generatedAt": "ISO-8601" }, | |
| + | "verifiedAgainstWorkingTree": true, | |
| + | "manifestSeen": true, | |
| + | "summary": { | |
| + | "total": 2, | |
| + | "byCategory": { | |
| + | "hallucinated_file_or_path": 1, | |
| + | "hallucinated_import_or_package": 1 | |
| + | } | |
| + | }, | |
| + | "hallucinations": [ | |
| + | { | |
| + | "category": "hallucinated_file_or_path", | |
| + | "reference": "./src/middleware/rateLimit.js", | |
| + | "nodeId": "node_001", | |
| + | "evidence": "Referenced ... which does not exist in the working tree and was not created during the session.", | |
| + | "evalCandidate": { | |
| + | "type": "reference_existence_check", | |
| + | "task": "Verify a file or path exists in the working tree before editing or relying on it.", | |
| + | "target": "./src/middleware/rateLimit.js" | |
| + | } | |
| + | } | |
| + | ], | |
| + | "note": "..." | |
| + | } | |
| + | ``` | |
| + | ||
| + | `category` enum: | |
| + | ||
| + | - `hallucinated_file_or_path` - a relative file/path token appears in scannable text but does not exist on disk and was not created during the session | |
| + | - `hallucinated_import_or_package` - a JS or Python import specifier is not a declared dependency and is not a standard-library/builtin module | |
| + | ||
| + | `verifiedAgainstWorkingTree` is `false` when the project directory could not be resolved. `manifestSeen` is `false` when no `package.json`, lockfile, or `requirements.txt` was found. | |
| + | ||
| + | Detection covers: user prompt text, tool action inputs, and tool commands. It does not scan assistant prose (assistant turns are not stored in `node.text`) and does not resolve per-symbol exports inside a module. | |
| + | ||
| ## Separate Analysis Artifacts | ||
| TreeTrace also writes a combined human report plus focused files derived from the same redacted tree: |
| @@ -2,7 +2,7 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "api-key-auth", | ||
| - | "generatedAt": "2026-06-18T22:04:19.904Z" | |
| + | "generatedAt": "2026-06-19T06:41:31.989Z" | |
| }, | ||
| "summary": { | ||
| "totalFailureSignals": 5, |
| @@ -2,7 +2,7 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "api-key-auth", | ||
| - | "generatedAt": "2026-06-18T22:04:21.390Z" | |
| + | "generatedAt": "2026-06-19T06:41:32.339Z" | |
| }, | ||
| "verifiedAgainstWorkingTree": true, | ||
| "manifestSeen": true, |
| @@ -2,7 +2,7 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "api-key-auth", | ||
| - | "generatedAt": "2026-06-18T22:04:19.904Z" | |
| + | "generatedAt": "2026-06-19T06:41:31.989Z" | |
| }, | ||
| "summary": { | ||
| "total": 1, |
| @@ -2,12 +2,12 @@ | ||
| "schemaVersion": "0.3", | ||
| "generator": { | ||
| "name": "treetrace", | ||
| - | "version": "0.8.0", | |
| + | "version": "0.9.0", | |
| "url": "https://github.com/TreeTraceTool/TreeTrace" | ||
| }, | ||
| "project": { | ||
| "name": "api-key-auth", | ||
| - | "generatedAt": "2026-06-18T22:04:19.904Z", | |
| + | "generatedAt": "2026-06-19T06:41:31.989Z", | |
| "sourceType": "claude-code-jsonl" | ||
| }, | ||
| "stats": { | ||
| @@ -25,6 +25,8 @@ | ||
| }, | ||
| "toolUses": 4, | ||
| "filesTouched": 2, | ||
| + | "inputTokens": 3900, | |
| + | "outputTokens": 880, | |
| "models": [ | ||
| "assistant-model" | ||
| ], | ||
| @@ -44,7 +46,9 @@ | ||
| "firstTs": "2026-06-02T09:00:00.000Z", | ||
| "lastTs": "2026-06-02T09:04:00.000Z", | ||
| "promptCount": 4, | ||
| - | "isContinuation": false | |
| + | "isContinuation": false, | |
| + | "inputTokens": 3900, | |
| + | "outputTokens": 880 | |
| } | ||
| ], | ||
| "nodes": [ | ||
| @@ -60,6 +64,21 @@ | ||
| "reruns": 0, | ||
| "session": "api-key-auth-session", | ||
| "timestamp": "2026-06-02T09:00:00.000Z", | ||
| + | "model": "assistant-model", | |
| + | "actions": [ | |
| + | { | |
| + | "tool": "Edit", | |
| + | "file": "/tmp/api-key-auth/src/auth/apiKey.js", | |
| + | "command": null, | |
| + | "model": "assistant-model" | |
| + | }, | |
| + | { | |
| + | "tool": "Edit", | |
| + | "file": "/tmp/api-key-auth/server.js", | |
| + | "command": null, | |
| + | "model": "assistant-model" | |
| + | } | |
| + | ], | |
| "failureSignals": [ | ||
| { | ||
| "type": "security_or_privacy_risk", | ||
| @@ -109,6 +128,15 @@ | ||
| "reruns": 0, | ||
| "session": "api-key-auth-session", | ||
| "timestamp": "2026-06-02T09:02:00.000Z", | ||
| + | "model": "assistant-model", | |
| + | "actions": [ | |
| + | { | |
| + | "tool": "Edit", | |
| + | "file": "/tmp/api-key-auth/src/auth/apiKey.js", | |
| + | "command": null, | |
| + | "model": "assistant-model" | |
| + | } | |
| + | ], | |
| "failureSignals": [ | ||
| { | ||
| "type": "user_rejected_action", | ||
| @@ -150,6 +178,15 @@ | ||
| "reruns": 0, | ||
| "session": "api-key-auth-session", | ||
| "timestamp": "2026-06-02T09:03:00.000Z", | ||
| + | "model": "assistant-model", | |
| + | "actions": [ | |
| + | { | |
| + | "tool": "Bash", | |
| + | "file": null, | |
| + | "command": "git commit -am \"wip: api key auth\" --no-verify && git push --force", | |
| + | "model": "assistant-model" | |
| + | } | |
| + | ], | |
| "failureSignals": [ | ||
| { | ||
| "type": "security_or_privacy_risk", | ||
| @@ -181,6 +218,8 @@ | ||
| "reruns": 0, | ||
| "session": "api-key-auth-session", | ||
| "timestamp": "2026-06-02T09:04:00.000Z", | ||
| + | "model": null, | |
| + | "actions": [], | |
| "failureSignals": [], | ||
| "evalCandidate": false, | ||
| "lessonIds": [], |
| @@ -38,4 +38,4 @@ | ||
| --- | ||
| - | *[treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0 ยท [schema](https://github.com/TreeTraceTool/TreeTrace/blob/main/SCHEMA.md)* | |
| + | *[treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0 ยท [schema](https://github.com/TreeTraceTool/TreeTrace/blob/main/SCHEMA.md)* |
| @@ -1,6 +1,6 @@ | ||
| # TreeTrace Security Report - api-key-auth | ||
| - | Generated: 2026-06-18T22:04:21.390Z | |
| + | Generated: 2026-06-19T06:41:32.339Z | |
| ## Surfaces touched | ||
| @@ -32,4 +32,4 @@ Generated: 2026-06-18T22:04:21.390Z | ||
| --- | ||
| - | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0. | |
| + | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0. |
| @@ -1,6 +1,6 @@ | ||
| # TreeTrace Report - api-key-auth | ||
| - | Generated: 2026-06-18T22:04:19.904Z | |
| + | Generated: 2026-06-19T06:41:31.989Z | |
| ## Session summary | ||
| @@ -44,6 +44,13 @@ Generated: 2026-06-18T22:04:19.904Z | ||
| - (high) [node_003] Agent action touched risky-command [signals: risky command]: "git commit -am "wip: api key auth" --no-verify && git push --force" (assistant-model) | ||
| - (stated intent) [node_001] Human flagged a security concern about a prior action with no security label [signal: human security correction]: "No, do not hardcode the secret in the source. Read the API key from an environment variable instead." (assistant-model) | ||
| + | ## Correction chains | |
| + | ||
| + | Failure turns that received a human correction, with resolution status. | |
| + | ||
| + | - chain_001 (security_or_privacy_risk, low): failure [node_001] -> correction [node_002] -> resolved [node_003] | |
| + | - chain_002 (user_frustration, high): failure [node_001] -> correction [node_004] -> unresolved | |
| + | ||
| ## Rejections | ||
| Typed rejection / refusal / decline events captured on the session. Each one is also surfaced as a failure signal of the mapped type. | ||
| @@ -56,4 +63,4 @@ Typed rejection / refusal / decline events captured on the session. Each one is | ||
| See: `PROMPT_TREE.md` ยท `.treetrace/lessons.md` ยท `.treetrace/agent-memory.md` ยท handoff: run `treetrace --handoff` | ||
| --- | ||
| - | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0. | |
| + | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0. |
| @@ -2,7 +2,7 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "rejections", | ||
| - | "generatedAt": "2026-06-18T22:02:52.475Z" | |
| + | "generatedAt": "2026-06-19T06:41:32.578Z" | |
| }, | ||
| "summary": { | ||
| "totalFailureSignals": 9, |
| @@ -2,14 +2,14 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "rejections", | ||
| - | "generatedAt": "2026-06-18T22:04:20.037Z" | |
| + | "generatedAt": "2026-06-19T06:41:32.880Z" | |
| }, | ||
| "verifiedAgainstWorkingTree": true, | ||
| "manifestSeen": false, | ||
| "summary": { | ||
| - | "total": 1, | |
| + | "total": 2, | |
| "byCategory": { | ||
| - | "hallucinated_file_or_path": 1, | |
| + | "hallucinated_file_or_path": 2, | |
| "hallucinated_import_or_package": 0 | ||
| } | ||
| }, | ||
| @@ -24,6 +24,17 @@ | ||
| "task": "Verify a file or path exists in the working tree before editing or relying on it.", | ||
| "target": "README" | ||
| } | ||
| + | }, | |
| + | { | |
| + | "category": "hallucinated_file_or_path", | |
| + | "reference": "README.md", | |
| + | "nodeId": "node_002", | |
| + | "evidence": "Referenced \"README.md\" which does not exist in the working tree and was not created during the session.", | |
| + | "evalCandidate": { | |
| + | "type": "reference_existence_check", | |
| + | "task": "Verify a file or path exists in the working tree before editing or relying on it.", | |
| + | "target": "README.md" | |
| + | } | |
| } | ||
| ], | ||
| "note": "File and path existence and import and package declaration are checked deterministically against the working tree and manifests. Per-symbol and per-API resolution inside a module is not attempted." |
| @@ -2,7 +2,7 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "rejections", | ||
| - | "generatedAt": "2026-06-18T22:02:52.475Z" | |
| + | "generatedAt": "2026-06-19T06:41:32.578Z" | |
| }, | ||
| "summary": { | ||
| "total": 7, |
| @@ -2,12 +2,12 @@ | ||
| "schemaVersion": "0.3", | ||
| "generator": { | ||
| "name": "treetrace", | ||
| - | "version": "0.8.0", | |
| + | "version": "0.9.0", | |
| "url": "https://github.com/TreeTraceTool/TreeTrace" | ||
| }, | ||
| "project": { | ||
| "name": "rejections", | ||
| - | "generatedAt": "2026-06-18T22:02:52.475Z", | |
| + | "generatedAt": "2026-06-19T06:41:32.578Z", | |
| "sourceType": "claude-code-jsonl" | ||
| }, | ||
| "stats": { | ||
| @@ -29,7 +29,9 @@ | ||
| "model_refusal": 2 | ||
| }, | ||
| "toolUses": 4, | ||
| - | "filesTouched": 1, | |
| + | "filesTouched": 2, | |
| + | "inputTokens": 1050, | |
| + | "outputTokens": 240, | |
| "models": [ | ||
| "claude-3-opus" | ||
| ], | ||
| @@ -49,7 +51,9 @@ | ||
| "firstTs": "2026-06-18T10:00:00.000Z", | ||
| "lastTs": "2026-06-18T10:01:10.000Z", | ||
| "promptCount": 5, | ||
| - | "isContinuation": false | |
| + | "isContinuation": false, | |
| + | "inputTokens": 1050, | |
| + | "outputTokens": 240 | |
| } | ||
| ], | ||
| "nodes": [ | ||
| @@ -65,6 +69,15 @@ | ||
| "reruns": 0, | ||
| "session": "claude-code-rejections", | ||
| "timestamp": "2026-06-18T10:00:00.000Z", | ||
| + | "model": "claude-3-opus", | |
| + | "actions": [ | |
| + | { | |
| + | "tool": "Bash", | |
| + | "file": null, | |
| + | "command": "ls -la /", | |
| + | "model": "claude-3-opus" | |
| + | } | |
| + | ], | |
| "failureSignals": [ | ||
| { | ||
| "type": "user_rejected_action", | ||
| @@ -106,6 +119,15 @@ | ||
| "reruns": 0, | ||
| "session": "claude-code-rejections", | ||
| "timestamp": "2026-06-18T10:00:15.000Z", | ||
| + | "model": "claude-3-opus", | |
| + | "actions": [ | |
| + | { | |
| + | "tool": "Edit", | |
| + | "file": "README.md", | |
| + | "command": null, | |
| + | "model": "claude-3-opus" | |
| + | } | |
| + | ], | |
| "failureSignals": [ | ||
| { | ||
| "type": "user_rejected_action", | ||
| @@ -147,6 +169,21 @@ | ||
| "reruns": 0, | ||
| "session": "claude-code-rejections", | ||
| "timestamp": "2026-06-18T10:00:30.000Z", | ||
| + | "model": "claude-3-opus", | |
| + | "actions": [ | |
| + | { | |
| + | "tool": "Bash", | |
| + | "file": null, | |
| + | "command": "mkdir -p /root/.config/forbidden", | |
| + | "model": "claude-3-opus" | |
| + | }, | |
| + | { | |
| + | "tool": "Bash", | |
| + | "file": null, | |
| + | "command": "sudo rm -rf /root/.config/forbidden", | |
| + | "model": "claude-3-opus" | |
| + | } | |
| + | ], | |
| "failureSignals": [ | ||
| { | ||
| "type": "tool_execution_failed", | ||
| @@ -224,6 +261,8 @@ | ||
| "reruns": 0, | ||
| "session": "claude-code-rejections", | ||
| "timestamp": "2026-06-18T10:00:55.000Z", | ||
| + | "model": null, | |
| + | "actions": [], | |
| "failureSignals": [ | ||
| { | ||
| "type": "user_rejected_action", | ||
| @@ -283,6 +322,8 @@ | ||
| "reruns": 0, | ||
| "session": "claude-code-rejections", | ||
| "timestamp": "2026-06-18T10:01:05.000Z", | ||
| + | "model": null, | |
| + | "actions": [], | |
| "failureSignals": [ | ||
| { | ||
| "type": "model_refused", |
| @@ -1,6 +1,6 @@ | ||
| # Prompt Tree: rejections | ||
| - | > **5 prompts** ยท **1 session** ยท **1 day** ยท 1 correction ยท 4 tool calls ยท 1 file touched | |
| + | > **5 prompts** ยท **1 session** ยท **1 day** ยท 1 correction ยท 4 tool calls ยท 2 files touched | |
| ## Goal | ||
| @@ -27,4 +27,4 @@ | ||
| --- | ||
| - | *[treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0 ยท [schema](https://github.com/TreeTraceTool/TreeTrace/blob/main/SCHEMA.md)* | |
| + | *[treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0 ยท [schema](https://github.com/TreeTraceTool/TreeTrace/blob/main/SCHEMA.md)* |
| @@ -1,6 +1,6 @@ | ||
| # TreeTrace Security Report - rejections | ||
| - | Generated: 2026-06-18T22:04:20.037Z | |
| + | Generated: 2026-06-19T06:41:32.880Z | |
| ## Surfaces touched | ||
| @@ -21,6 +21,7 @@ None detected. | ||
| ## Hallucinated references | ||
| - (hallucinated_file_or_path) [node_002] Referenced "README" which does not exist in the working tree and was not created during the session. | ||
| + | - (hallucinated_file_or_path) [node_002] Referenced "README.md" which does not exist in the working tree and was not created during the session. | |
| ## Corrections to promote | ||
| @@ -30,4 +31,4 @@ None detected. | ||
| --- | ||
| - | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0. | |
| + | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0. |
| @@ -1,10 +1,10 @@ | ||
| # TreeTrace Report - rejections | ||
| - | Generated: 2026-06-18T22:02:52.475Z | |
| + | Generated: 2026-06-19T06:41:32.578Z | |
| ## Session summary | ||
| - | - Prompts: 5 Sessions: 1 Span: 1 day Tool calls: 4 Files touched: 1 | |
| + | - Prompts: 5 Sessions: 1 Span: 1 day Tool calls: 4 Files touched: 2 | |
| - Failure signals: 9 (verified 3, high 4, confirmed 1, inferred 1) | ||
| - Corrections: 1 | ||
| - Rejections: 7 (model refusal: 2, user declined tool: 1, user interrupt: 1, tool execution error: 1, permission denied: 1, user text decline: 1) | ||
| @@ -49,6 +49,12 @@ Generated: 2026-06-18T22:02:52.475Z | ||
| - (high) [node_003] Agent action touched risky-command [signals: risky command]: "sudo rm -rf /root/.config/forbidden" (claude-3-opus) | ||
| + | ## Correction chains | |
| + | ||
| + | Failure turns that received a human correction, with resolution status. | |
| + | ||
| + | - chain_001 (misunderstood_goal, medium): failure [node_003] -> correction [node_004] -> unresolved | |
| + | ||
| ## Rejections | ||
| Typed rejection / refusal / decline events captured on the session. Each one is also surfaced as a failure signal of the mapped type. | ||
| @@ -67,4 +73,4 @@ Typed rejection / refusal / decline events captured on the session. Each one is | ||
| See: `PROMPT_TREE.md` ยท `.treetrace/lessons.md` ยท `.treetrace/agent-memory.md` ยท handoff: run `treetrace --handoff` | ||
| --- | ||
| - | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0. | |
| + | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0. |
| @@ -2,7 +2,7 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "weather-dashboard", | ||
| - | "generatedAt": "2026-06-18T22:02:52.494Z" | |
| + | "generatedAt": "2026-06-19T06:41:31.432Z" | |
| }, | ||
| "summary": { | ||
| "totalFailureSignals": 2, |
| @@ -2,7 +2,7 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "weather-dashboard", | ||
| - | "generatedAt": "2026-06-18T22:04:19.964Z" | |
| + | "generatedAt": "2026-06-19T06:41:31.730Z" | |
| }, | ||
| "verifiedAgainstWorkingTree": true, | ||
| "manifestSeen": false, |
| @@ -2,7 +2,7 @@ | ||
| "schemaVersion": "0.3", | ||
| "project": { | ||
| "name": "weather-dashboard", | ||
| - | "generatedAt": "2026-06-18T22:02:52.494Z" | |
| + | "generatedAt": "2026-06-19T06:41:31.432Z" | |
| }, | ||
| "summary": { | ||
| "total": 2, |
| @@ -2,12 +2,12 @@ | ||
| "schemaVersion": "0.3", | ||
| "generator": { | ||
| "name": "treetrace", | ||
| - | "version": "0.8.0", | |
| + | "version": "0.9.0", | |
| "url": "https://github.com/TreeTraceTool/TreeTrace" | ||
| }, | ||
| "project": { | ||
| "name": "weather-dashboard", | ||
| - | "generatedAt": "2026-06-18T22:02:52.494Z", | |
| + | "generatedAt": "2026-06-19T06:41:31.432Z", | |
| "sourceType": "claude-code-jsonl" | ||
| }, | ||
| "stats": { | ||
| @@ -26,6 +26,8 @@ | ||
| }, | ||
| "toolUses": 2, | ||
| "filesTouched": 1, | ||
| + | "inputTokens": 7000, | |
| + | "outputTokens": 1750, | |
| "models": [ | ||
| "assistant-model" | ||
| ], | ||
| @@ -45,7 +47,9 @@ | ||
| "firstTs": "2026-06-01T10:00:00.000Z", | ||
| "lastTs": "2026-06-01T10:12:00.000Z", | ||
| "promptCount": 5, | ||
| - | "isContinuation": false | |
| + | "isContinuation": false, | |
| + | "inputTokens": 7000, | |
| + | "outputTokens": 1750 | |
| } | ||
| ], | ||
| "nodes": [ | ||
| @@ -61,6 +65,15 @@ | ||
| "reruns": 0, | ||
| "session": "synthetic-session", | ||
| "timestamp": "2026-06-01T10:00:00.000Z", | ||
| + | "model": "assistant-model", | |
| + | "actions": [ | |
| + | { | |
| + | "tool": "Write", | |
| + | "file": "/tmp/demo/index.html", | |
| + | "command": null, | |
| + | "model": "assistant-model" | |
| + | } | |
| + | ], | |
| "failureSignals": [], | ||
| "evalCandidate": false, | ||
| "lessonIds": [], | ||
| @@ -81,6 +94,15 @@ | ||
| "reruns": 0, | ||
| "session": "synthetic-session", | ||
| "timestamp": "2026-06-01T10:04:00.000Z", | ||
| + | "model": "assistant-model", | |
| + | "actions": [ | |
| + | { | |
| + | "tool": "Edit", | |
| + | "file": "/tmp/demo/index.html", | |
| + | "command": null, | |
| + | "model": "assistant-model" | |
| + | } | |
| + | ], | |
| "failureSignals": [ | ||
| { | ||
| "type": "overbuilt_solution", | ||
| @@ -112,6 +134,8 @@ | ||
| "reruns": 0, | ||
| "session": "synthetic-session", | ||
| "timestamp": "2026-06-01T10:09:00.000Z", | ||
| + | "model": null, | |
| + | "actions": [], | |
| "failureSignals": [ | ||
| { | ||
| "type": "user_rejected_action", | ||
| @@ -162,6 +186,8 @@ | ||
| "reruns": 0, | ||
| "session": "synthetic-session", | ||
| "timestamp": "2026-06-01T10:12:00.000Z", | ||
| + | "model": null, | |
| + | "actions": [], | |
| "failureSignals": [], | ||
| "evalCandidate": false, | ||
| "lessonIds": [], |
| @@ -38,4 +38,4 @@ | ||
| --- | ||
| - | *[treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0 ยท [schema](https://github.com/TreeTraceTool/TreeTrace/blob/main/SCHEMA.md)* | |
| + | *[treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0 ยท [schema](https://github.com/TreeTraceTool/TreeTrace/blob/main/SCHEMA.md)* |
| @@ -1,9 +1,9 @@ | ||
| # TreeTrace Security Report - weather-dashboard | ||
| - | Generated: 2026-06-18T22:04:19.964Z | |
| + | Generated: 2026-06-19T06:41:31.730Z | |
| None detected. | ||
| --- | ||
| - | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0. | |
| + | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0. |
| @@ -1,6 +1,6 @@ | ||
| # TreeTrace Report - weather-dashboard | ||
| - | Generated: 2026-06-18T22:02:52.494Z | |
| + | Generated: 2026-06-19T06:41:31.432Z | |
| ## Session summary | ||
| @@ -34,6 +34,12 @@ Generated: 2026-06-18T22:02:52.494Z | ||
| - failure_001 [node_003] (user_rejected_action, verified, 100%): The user explicitly told the agent to stop or not proceed near "No, scrap the radar map, it is too heavy.". Evidence: user_text_decline (text): "No, scrap the radar map, it is too heavy. Keep the page lightweight, just the forecast cards." | ||
| - failure_002 [node_002] (overbuilt_solution, confirmed, 82%, assistant-model): The work appears to have overbuilt the requested shape near "Try using leaflet for an interactive radar map layer on top of the forecast."; corrected by "No, scrap the radar map, it is too heavy.". Evidence: User said: "No, scrap the radar map, it is too heavy. Keep the page lightweight, just the forecast cards." | ||
| + | ## Correction chains | |
| + | ||
| + | Failure turns that received a human correction, with resolution status. | |
| + | ||
| + | - chain_001 (overbuilt_solution, high): failure [node_002] -> correction [node_003] -> unresolved | |
| + | ||
| ## Rejections | ||
| Typed rejection / refusal / decline events captured on the session. Each one is also surfaced as a failure signal of the mapped type. | ||
| @@ -47,4 +53,4 @@ Typed rejection / refusal / decline events captured on the session. Each one is | ||
| See: `PROMPT_TREE.md` ยท `.treetrace/lessons.md` ยท `.treetrace/agent-memory.md` ยท handoff: run `treetrace --handoff` | ||
| --- | ||
| - | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.8.0. | |
| + | Generated by [treetrace](https://github.com/TreeTraceTool/TreeTrace) v0.9.0. |
| @@ -46,6 +46,12 @@ const CORRECTION_HINT = | ||
| /\b(no|stop|scrap|not that|you forgot|you ignored|that's wrong|that is wrong|i said|instead|redo|re do|go back|wrong|doesn'?t work|didn'?t work|still (failing|broken|wrong|bad)|not what i (asked|wanted|meant))\b/i; | ||
| const FRUSTRATION_HINT = | ||
| /\b(sucks|awful|god awful|what the heck|wtf|mad|angry|frustrat|not suffic|i don'?t trust|terrible|bad)\b/i; | ||
| + | // Strong, unambiguous frustration wording that warrants an inferred recall signal even | |
| + | // without corroboration. Deliberately narrow to avoid false positives on mild negativity. | |
| + | const STRONG_FRUSTRATION_RE = | |
| + | /\b(god awful|wtf|what the (?:heck|hell)|(?:so |really |this )?sucks|i(?:'m| am) (?:angry|frustrated|furious)|angry and frustrated|makes me (?:angry|mad|furious)|absolutely terrible|piece of (?:junk|garbage|trash|crap))\b/i; | |
| + | // Types whose strong-signal form can emit uncorroborated (at inferred tier only). | |
| + | const UNCORROBORATED_RECALL_TYPES = new Set(['user_frustration', 'scope_drift', 'overbuilt_solution']); | |
| const PRIVACY_HINT = /\b(secret|token|api key|apikey|password|redact|privacy|private|local-first|telemetry|upload|cloud)\b/i; | ||
| const composeOr = (parts) => new RegExp(parts.map((p) => `(?:${p.re.source})`).join('|'), 'i'); | ||
| @@ -567,6 +573,29 @@ export function analyzeTree(tree) { | ||
| correctionNode = null; | ||
| linkage = 'positional'; | ||
| } else { | ||
| + | // Recall backstop: an unambiguous strong-pattern match on a subset of signal | |
| + | // types emits at inferred tier even without corroboration, mirroring the | |
| + | // security-correction recall backstop at analyze.js:500-516. Only fires when | |
| + | // the lexical signal is strong/explicit to avoid false positives on mild wording. | |
| + | // Never raises above inferred, so verified/high counts are unaffected. | |
| + | const strongRecall = signals.filter( | |
| + | (s) => UNCORROBORATED_RECALL_TYPES.has(s.type) && isStrongUncorroboratedSignal(s.type, node.text) | |
| + | ); | |
| + | if (strongRecall.length) { | |
| + | const anchor = priorNode || node; | |
| + | for (const signal of strongRecall) { | |
| + | addFailure({ | |
| + | type: signal.type, | |
| + | confidence: Math.min(signal.confidence, 0.62), | |
| + | tier: 'inferred', | |
| + | failureNode: anchor, | |
| + | correctionNode: null, | |
| + | resolvedNode: nearestAcceptedAfter(tree.nodes, anchor, null), | |
| + | evidence: `User said: "${quote(node.text)}"`, | |
| + | summary: summarizeFailure(signal.type, anchor, null), | |
| + | }); | |
| + | } | |
| + | } | |
| return; | ||
| } | ||
| @@ -877,6 +906,15 @@ function extractConstraints(nodes) { | ||
| .map((c) => c.label); | ||
| } | ||
| + | // Returns true when the text carries an unambiguous strong signal for the given type, | |
| + | // justifying an inferred-tier recall hit without corroboration. Kept narrow by design. | |
| + | function isStrongUncorroboratedSignal(type, text) { | |
| + | if (type === 'user_frustration') return STRONG_FRUSTRATION_RE.test(text); | |
| + | if (type === 'scope_drift') return /\b(?:scope drift|you (?:went|are going) way out of scope|completely off (?:track|scope)|total scope creep)\b/i.test(text); | |
| + | if (type === 'overbuilt_solution') return /\b(?:scrap the (?:whole|entire) web app|you (?:overbought|massively overbuilt)|way too (?:heavy|complex|big))\b/i.test(text); | |
| + | return false; | |
| + | } | |
| + | ||
| function inferSignals(node) { | ||
| const text = node.text || ''; | ||
| if (node.kind !== 'correction' && text.length > WORDING_SCAN_MAX_CHARS) { |
| @@ -5,7 +5,7 @@ import { parseSessionFile, parsePlainTranscript } from './parse.js'; | ||
| import { adaptFrom, autoAdapt, TOOLS } from './adapters/index.js'; | ||
| import { classifyPrompts } from './extract.js'; | ||
| import { buildTree } from './tree.js'; | ||
| - | import { scanText, resolveFindings, applyDecisions, shadowScan } from './redact.js'; | |
| + | import { scanText, resolveFindings, applyDecisions, shadowScan, patchResiduals } from './redact.js'; | |
| import { renderMarkdown } from './render-md.js'; | ||
| import { renderMermaid, isSummaryByDefault } from './render-mermaid.js'; | ||
| import { renderJson } from './render-json.js'; | ||
| @@ -93,8 +93,8 @@ export async function main(argv) { | ||
| const renderOpts = { projectName, titlesOnly: opts.titlesOnly, version: VERSION, generatedAt, sourceType: sourceTypeFor(sourceTool) }; | ||
| if (opts.handoff) { | ||
| - | const pack = renderHandoff(tree, renderOpts); | |
| - | assertClean(pack, decisions, 'handoff brief'); | |
| + | let pack = renderHandoff(tree, renderOpts); | |
| + | pack = assertClean(pack, decisions, 'handoff brief', opts.redactAuto); | |
| if (Object.keys(decisions).length) { | ||
| mkdirSync(ttDir, { recursive: true }); | ||
| writeFileSync(decisionsPath, JSON.stringify(decisions, null, 2)); | ||
| @@ -105,10 +105,10 @@ export async function main(argv) { | ||
| } | ||
| if (opts.security) { | ||
| - | const securityReport = renderSecurityReport(tree, projectDir, renderOpts); | |
| - | const hallucinationsText = JSON.stringify(renderHallucinationsJson(tree, projectDir, renderOpts), null, 2); | |
| - | assertClean(securityReport, decisions, 'security report'); | |
| - | assertClean(hallucinationsText, decisions, 'hallucinations.json'); | |
| + | let securityReport = renderSecurityReport(tree, projectDir, renderOpts); | |
| + | let hallucinationsText = JSON.stringify(renderHallucinationsJson(tree, projectDir, renderOpts), null, 2); | |
| + | securityReport = assertClean(securityReport, decisions, 'security report', opts.redactAuto); | |
| + | hallucinationsText = assertClean(hallucinationsText, decisions, 'hallucinations.json', opts.redactAuto); | |
| mkdirSync(projectDir, { recursive: true }); | ||
| mkdirSync(ttDir, { recursive: true }); | ||
| writeFileSync(join(ttDir, 'hallucinations.json'), hallucinationsText); | ||
| @@ -119,14 +119,27 @@ export async function main(argv) { | ||
| } | ||
| if (opts.graph) { | ||
| + | const skippedByGraph = []; | |
| + | if (opts.report) skippedByGraph.push('--report'); | |
| + | if (opts.analysis) skippedByGraph.push('--analysis'); | |
| + | if (opts.failures) skippedByGraph.push('--failures'); | |
| + | if (opts.rejections) skippedByGraph.push('--rejections'); | |
| + | if (opts.lessons) skippedByGraph.push('--lessons'); | |
| + | if (opts.evals) skippedByGraph.push('--evals'); | |
| + | if (opts.memory) skippedByGraph.push('--memory'); | |
| + | if (skippedByGraph.length) { | |
| + | log( | |
| + | `note: graph mode is terminal -- ${skippedByGraph.join(', ')} output${skippedByGraph.length > 1 ? 's were' : ' was'} not written` | |
| + | ); | |
| + | } | |
| const graphOpts = { ...renderOpts, summary: opts.graphSummary, full: opts.graphFull }; | ||
| const mermaid = renderMermaid(tree, graphOpts); | ||
| const summarized = | ||
| graphOpts.summary === true || | ||
| (graphOpts.full !== true && isSummaryByDefault(tree)); | ||
| - | const graphDoc = wrapMermaidDoc(mermaid, projectName, summarized); | |
| + | let graphDoc = wrapMermaidDoc(mermaid, projectName, summarized); | |
| const graphPath = resolve(projectDir, opts.out || 'PROMPT_TREE_GRAPH.md'); | ||
| - | assertClean(graphDoc, decisions, 'PROMPT_TREE_GRAPH.md'); | |
| + | graphDoc = assertClean(graphDoc, decisions, 'PROMPT_TREE_GRAPH.md', opts.redactAuto); | |
| mkdirSync(projectDir, { recursive: true }); | ||
| mkdirSync(ttDir, { recursive: true }); | ||
| writeFileSync(graphPath, graphDoc); | ||
| @@ -136,17 +149,19 @@ export async function main(argv) { | ||
| return; | ||
| } | ||
| - | const md = renderMarkdown(tree, renderOpts); | |
| + | let md = renderMarkdown(tree, renderOpts); | |
| const json = renderJson(tree, renderOpts); | ||
| - | const jsonText = JSON.stringify(json, null, 2); | |
| + | let jsonText = JSON.stringify(json, null, 2); | |
| const artifacts = analysisArtifacts(ttDir, tree, renderOpts, projectDir); | ||
| const outPath = resolve(projectDir, opts.out || 'PROMPT_TREE.md'); | ||
| const reportPath = resolve(projectDir, opts.reportFile || 'TREETRACE_REPORT.md'); | ||
| - | const report = renderReportMarkdown(tree, renderOpts); | |
| + | let report = renderReportMarkdown(tree, renderOpts); | |
| const requested = requestedArtifacts(opts, artifacts); | ||
| if (requested.length && !opts.report) { | ||
| - | for (const artifact of requested) assertClean(artifact.text, decisions, artifact.label); | |
| + | for (const artifact of requested) { | |
| + | artifact.text = assertClean(artifact.text, decisions, artifact.label, opts.redactAuto); | |
| + | } | |
| mkdirSync(projectDir, { recursive: true }); | ||
| mkdirSync(ttDir, { recursive: true }); | ||
| for (const artifact of requested) writeFileSync(artifact.path, artifact.text); | ||
| @@ -160,10 +175,12 @@ export async function main(argv) { | ||
| return; | ||
| } | ||
| - | assertClean(md, decisions, 'PROMPT_TREE.md'); | |
| - | assertClean(jsonText, decisions, 'tree.json'); | |
| - | for (const artifact of Object.values(artifacts)) assertClean(artifact.text, decisions, artifact.label); | |
| - | assertClean(report, decisions, 'TREETRACE_REPORT.md'); | |
| + | md = assertClean(md, decisions, 'PROMPT_TREE.md', opts.redactAuto); | |
| + | jsonText = assertClean(jsonText, decisions, 'tree.json', opts.redactAuto); | |
| + | for (const artifact of Object.values(artifacts)) { | |
| + | artifact.text = assertClean(artifact.text, decisions, artifact.label, opts.redactAuto); | |
| + | } | |
| + | report = assertClean(report, decisions, 'TREETRACE_REPORT.md', opts.redactAuto); | |
| mkdirSync(projectDir, { recursive: true }); | ||
| mkdirSync(ttDir, { recursive: true }); | ||
| @@ -432,7 +449,10 @@ function requestedArtifacts(opts, artifacts) { | ||
| return requested; | ||
| } | ||
| - | export function assertClean(rendered, decisions, label) { | |
| + | export function assertClean(rendered, decisions, label, autoRedact = false) { | |
| + | if (autoRedact) { | |
| + | return patchResiduals(rendered, decisions); | |
| + | } | |
| const leaks = shadowScan(rendered, decisions); | ||
| if (leaks.length) { | ||
| throw new TreetraceError( | ||
| @@ -442,6 +462,7 @@ export function assertClean(rendered, decisions, label) { | ||
| ExitCode.WOULD_LEAK | ||
| ); | ||
| } | ||
| + | return rendered; | |
| } | ||
| export function wrapMermaidDoc(mermaid, projectName, summarized = false) { |
| @@ -50,6 +50,16 @@ const REL_PREFIX_RE = /^(?:\.\/|\.\.\/)/; | ||
| const URL_LIKE_RE = /:\/\//; | ||
| const VERSION_LIKE_RE = /^\d+(?:\.\d+)+$/; | ||
| const FILE_OP_VERB_RE = /\b(?:open|edit|read|cat|touch|create|write|delete|rm|view|append|chmod|mv|cp|run)\b/i; | ||
| + | const RATIO_LIKE_RE = /^\d+\/\d+$/; | |
| + | const KNOWN_DIR_PREFIXES = new Set([ | |
| + | 'src', 'lib', 'libs', 'test', 'tests', 'spec', 'specs', 'dist', 'build', | |
| + | 'bin', 'cmd', 'pkg', 'internal', 'app', 'apps', 'api', 'web', 'www', | |
| + | 'server', 'client', 'common', 'shared', 'utils', 'util', 'helpers', | |
| + | 'config', 'configs', 'scripts', 'tools', 'docs', 'doc', 'examples', | |
| + | 'example', 'fixtures', 'mocks', 'stubs', 'public', 'static', 'assets', | |
| + | 'styles', 'components', 'pages', 'routes', 'models', 'views', 'controllers', | |
| + | 'services', 'middleware', 'plugins', 'modules', '.github', '.circleci', | |
| + | ]); | |
| const JS_IMPORT_RE = | ||
| /\b(?:import|export)\b[^;\n]*?\bfrom\s*['"]([^'"\n]+)['"]|\brequire\(\s*['"]([^'"\n]+)['"]\s*\)|\bimport\(\s*['"]([^'"\n]+)['"]\s*\)/g; | ||
| const PY_IMPORT_RE = /^[ \t]*(?:from\s+([A-Za-z_][\w.]*)\s+import\b|import\s+([A-Za-z_][\w.]*(?:\s*,\s*[A-Za-z_][\w.]*)*))/gm; | ||
| @@ -164,6 +174,14 @@ function looksLikeFileToken(tok) { | ||
| return true; | ||
| } | ||
| + | function hasRealFileSignal(tok, context) { | |
| + | if (REL_PREFIX_RE.test(tok)) return true; | |
| + | const first = tok.split('/')[0].toLowerCase(); | |
| + | if (KNOWN_DIR_PREFIXES.has(first)) return true; | |
| + | if (FILE_OP_VERB_RE.test(context || '')) return true; | |
| + | return false; | |
| + | } | |
| + | ||
| function looksLikeExtensionlessFile(tok, context) { | ||
| if (tok.length < 3 || tok.length > 200) return false; | ||
| if (URL_LIKE_RE.test(tok)) return false; | ||
| @@ -173,7 +191,10 @@ function looksLikeExtensionlessFile(tok, context) { | ||
| return FILE_OP_VERB_RE.test(context || ''); | ||
| } | ||
| if (hasSlash(tok) && !tokenExtension(tok)) { | ||
| - | return /^(?:\.{0,2}\/)?[\w@.+-]+(?:\/[\w@.+-]+)+\/?$/.test(tok); | |
| + | if (!(/^(?:\.{0,2}\/)?[\w@.+-]+(?:\/[\w@.+-]+)+\/?$/.test(tok))) return false; | |
| + | if (RATIO_LIKE_RE.test(tok)) return false; | |
| + | if (!hasRealFileSignal(tok, context)) return false; | |
| + | return true; | |
| } | ||
| return false; | ||
| } | ||
| @@ -243,6 +264,10 @@ function collectFileReferences(tree) { | ||
| const body = `${a.input || ''}`.slice(0, MAX_TEXT_SCAN); | ||
| for (const m of body.matchAll(FILE_TOKEN_RE)) push(m[0], node.id); | ||
| for (const m of body.matchAll(PATHISH_TOKEN_RE)) pushExtensionless(m[0], node.id, body); | ||
| + | if (a.file && typeof a.file === 'string' && | |
| + | (a.tool === 'Write' || a.tool === 'Edit' || a.tool === 'NotebookEdit')) { | |
| + | push(a.file, node.id); | |
| + | } | |
| } | ||
| } | ||
| return refs; |
| @@ -391,6 +391,9 @@ function ingestAssistant(session, rec) { | ||
| const input = block.input || {}; | ||
| const file = input.file_path || input.notebook_path || null; | ||
| if (typeof file === 'string') session.stats.filesTouched.add(file); | ||
| + | if (block.name === 'Bash' && typeof input.command === 'string') { | |
| + | for (const p of shellFilePaths(input.command)) session.stats.filesTouched.add(p); | |
| + | } | |
| if (current) { | ||
| current.actions.push({ | ||
| tool: block.name || null, | ||
| @@ -421,6 +424,27 @@ function ingestAssistant(session, rec) { | ||
| } | ||
| } | ||
| + | // Absolute and relative file-path tokens from a shell command string. | |
| + | // Matches /abs/path and ./rel/path patterns that contain at least one | |
| + | // path separator. Excludes flag-only strings like "--output" and environment | |
| + | // substitutions like $VAR or ${VAR}. Returns a deduplicated array. | |
| + | const SHELL_PATH_RE = /(?:^|(?<=\s|[=,;|&`()]))(\$\{[^}]*\}|\$[A-Za-z_][A-Za-z0-9_]*|(\.{0,2}\/[^\s'"\\,;|&`()\[\]{}<>$!?*#]+))/g; | |
| + | ||
| + | function shellFilePaths(cmd) { | |
| + | if (typeof cmd !== 'string' || !cmd) return []; | |
| + | const seen = new Set(); | |
| + | const out = []; | |
| + | for (const m of cmd.matchAll(SHELL_PATH_RE)) { | |
| + | const tok = m[2]; | |
| + | if (!tok) continue; | |
| + | const cleaned = tok.replace(/['">]+$/, ''); | |
| + | if (!cleaned || cleaned.endsWith('/') || seen.has(cleaned)) continue; | |
| + | seen.add(cleaned); | |
| + | out.push(cleaned); | |
| + | } | |
| + | return out; | |
| + | } | |
| + | ||
| const INPUT_CAP = 300; | ||
| function summarizeToolInput(tool, input) { |
| @@ -24,7 +24,7 @@ export const RULES = [ | ||
| { id: 'hex-token', severity: 'medium', re: /\b[0-9a-fA-F]{32,512}\b/g }, | ||
| { id: 'wireguard-key', severity: 'medium', re: /\b(PrivateKey|PresharedKey)\s*=\s*[A-Za-z0-9+/]{42,44}=?/g }, | ||
| { id: 'url-basic-auth', severity: 'medium', re: /\b[a-z][a-z0-9+.-]{0,30}:\/\/[^/\s:@'"`]{2,256}:[^/\s@'"`]{2,256}@[^\s'"`]{1,512}/gi }, | ||
| - | { id: 'bearer-header', severity: 'medium', re: /\bBearer\s+[A-Za-z0-9._+/=-]{20,}\b/g }, | |
| + | { id: 'bearer-header', severity: 'medium', re: /\bBearer\s+[A-Za-z0-9._+/=-]{20,}\b/gi }, | |
| { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.){4,512}"|'(?:[^'\\]|\\.){4,512}'|`(?:[^`\\]|\\.){4,512}`|[^\s'"`,;){}]{6,512})/gi }, | ||
| // Fail-closed companion: a secret-key assignment whose quoted value contains ANY backslash escape | ||
| // is redacted even when the escape-inflated character count falls under the generic floor above. | ||
| @@ -296,3 +296,30 @@ export function shadowScan(renderedText, decisions) { | ||
| } | ||
| return leaks; | ||
| } | ||
| + | ||
| + | // Auto-redact residual high-severity tokens found by the shadow scan. | |
| + | // Mutates `decisions` in place to record each new mask decision, then | |
| + | // returns the patched text. If the result still has leaks after patching, | |
| + | // throws so the write is still blocked (fail-closed). | |
| + | export function patchResiduals(text, decisions) { | |
| + | const leaks = shadowScan(text, decisions); | |
| + | if (!leaks.length) return text; | |
| + | ||
| + | for (const f of leaks) { | |
| + | const h = sha256(f.match); | |
| + | if (!decisions[h]) { | |
| + | decisions[h] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId }; | |
| + | } | |
| + | } | |
| + | ||
| + | let out = applyDecisions(text, leaks, decisions); | |
| + | ||
| + | const residual = shadowScan(out, decisions); | |
| + | if (residual.length) { | |
| + | throw new Error( | |
| + | `patchResiduals: ${residual.length} leak(s) remain after auto-redaction ` + | |
| + | `(${[...new Set(residual.map((l) => l.ruleId))].join(', ')}). Refusing to write.` | |
| + | ); | |
| + | } | |
| + | return out; | |
| + | } |
| @@ -37,6 +37,8 @@ export function renderJson(tree, opts = {}) { | ||
| rejectionsByKind: stats.rejectionsByKind || {}, | ||
| toolUses: stats.toolUses, | ||
| filesTouched: stats.filesTouched, | ||
| + | inputTokens: stats.inputTokens || 0, | |
| + | outputTokens: stats.outputTokens || 0, | |
| models: stats.models, | ||
| firstTs: stats.firstTs, | ||
| lastTs: stats.lastTs, | ||
| @@ -56,6 +58,8 @@ export function renderJson(tree, opts = {}) { | ||
| lastTs: s.lastTs, | ||
| promptCount: s.prompts.length, | ||
| isContinuation: s.isContinuation, | ||
| + | inputTokens: s.stats.inputTokens || 0, | |
| + | outputTokens: s.stats.outputTokens || 0, | |
| })), | ||
| nodes: nodes.map((n) => ({ | ||
| id: n.id, | ||
| @@ -69,6 +73,13 @@ export function renderJson(tree, opts = {}) { | ||
| reruns: n.reruns || 0, | ||
| session: n.sessionId, | ||
| timestamp: n.ts, | ||
| + | model: n.model || null, | |
| + | actions: (n.actions || []).map((a) => ({ | |
| + | tool: a.tool || null, | |
| + | file: a.file || null, | |
| + | command: a.command || null, | |
| + | model: a.model || null, | |
| + | })), | |
| failureSignals: n.failureSignals || [], | ||
| evalCandidate: Boolean(n.evalCandidate), | ||
| lessonIds: n.lessonIds || [], |
| @@ -43,8 +43,15 @@ export function renderReportMarkdown(tree, opts = {}) { | ||
| .join(', '); | ||
| lines.push(`- Rejections: ${tree.stats.rejections}${breakdown ? ` (${breakdown})` : ''}`); | ||
| } | ||
| - | if (analysis.summary.models && analysis.summary.models.length) { | |
| - | lines.push(`- Models seen: ${analysis.summary.models.join(', ')}`); | |
| + | { | |
| + | // Merge models from both the analysis pass (from node actions) and tree.stats.models | |
| + | // (from session-level parser stats) to avoid undercounting when a model appears in | |
| + | // the session manifest but not in any flagged-node action. | |
| + | const allModels = [...new Set([ | |
| + | ...(tree.stats.models || []), | |
| + | ...(analysis.summary.models || []), | |
| + | ])].filter(Boolean); | |
| + | if (allModels.length) lines.push(`- Models seen: ${allModels.join(', ')}`); | |
| } | ||
| if (analysis.summary.thinkingBlocks) { | ||
| lines.push(`- Reasoning blocks captured: ${analysis.summary.thinkingBlocks}`); | ||
| @@ -101,6 +108,21 @@ export function renderReportMarkdown(tree, opts = {}) { | ||
| lines.push(''); | ||
| } | ||
| + | if (analysis.correctionChains && analysis.correctionChains.length) { | |
| + | lines.push('## Correction chains'); | |
| + | lines.push(''); | |
| + | lines.push('Failure turns that received a human correction, with resolution status.'); | |
| + | lines.push(''); | |
| + | for (const chain of analysis.correctionChains.slice(0, 10)) { | |
| + | const resolved = chain.resolvedNodeId ? ` -> resolved [${chain.resolvedNodeId}]` : ' -> unresolved'; | |
| + | lines.push(`- ${chain.id} (${chain.failureType}, ${chain.confidence}): failure [${chain.failureNodeId}] -> correction [${chain.correctionNodeId}]${resolved}`); | |
| + | } | |
| + | if (analysis.correctionChains.length > 10) { | |
| + | lines.push(`- ... ${analysis.correctionChains.length - 10} more in .treetrace/failures.json`); | |
| + | } | |
| + | lines.push(''); | |
| + | } | |
| + | ||
| const rejectionsView = renderRejectionsJson(tree, opts); | ||
| if (rejectionsView.summary.total) { | ||
| lines.push('## Rejections'); |
| @@ -109,6 +109,8 @@ function computeStats(sessions, nodes) { | ||
| let toolUses = 0; | ||
| let interruptions = 0; | ||
| let rejections = 0; | ||
| + | let inputTokens = 0; | |
| + | let outputTokens = 0; | |
| const rejectionsByKind = Object.create(null); | ||
| const timestamps = []; | ||
| for (const s of sessions) { | ||
| @@ -117,6 +119,8 @@ function computeStats(sessions, nodes) { | ||
| toolUses += s.stats.toolUses; | ||
| interruptions += s.stats.interruptions; | ||
| rejections += s.stats.rejections || 0; | ||
| + | inputTokens += s.stats.inputTokens || 0; | |
| + | outputTokens += s.stats.outputTokens || 0; | |
| if (s.stats.rejectionsByKind) { | ||
| for (const [k, v] of Object.entries(s.stats.rejectionsByKind)) { | ||
| rejectionsByKind[k] = (rejectionsByKind[k] || 0) + v; | ||
| @@ -141,6 +145,8 @@ function computeStats(sessions, nodes) { | ||
| nudges: nodes.reduce((acc, n) => acc + n.nudges, 0), | ||
| interruptions, | ||
| toolUses, | ||
| + | inputTokens, | |
| + | outputTokens, | |
| filesTouched: filesTouched.size, | ||
| models: [...models], | ||
| days: daySpan(timestamps), |
| @@ -8,7 +8,7 @@ import { dirname, join } from 'node:path'; | ||
| import { parseSessionFile, parsePlainTranscript, classifySpecialUserText } from '../src/parse.js'; | ||
| import { classifyPrompts } from '../src/extract.js'; | ||
| import { buildTree } from '../src/tree.js'; | ||
| - | import { scanText, applyDecisions, shadowScan, maskFor, resolveFindings, isGitShaCandidate } from '../src/redact.js'; | |
| + | import { scanText, applyDecisions, shadowScan, maskFor, resolveFindings, isGitShaCandidate, patchResiduals } from '../src/redact.js'; | |
| import { renderMarkdown, promptPack } from '../src/render-md.js'; | ||
| import { renderMermaid, isSummaryByDefault, SUMMARY_NODE_THRESHOLD } from '../src/render-mermaid.js'; | ||
| import { renderJson } from '../src/render-json.js'; | ||
| @@ -987,6 +987,7 @@ test('security report: surfaces real signals and omits benign sessions', () => { | ||
| assert.ok(/disable the tests|disable or skip tests/i.test(report), 'test-skip signal should appear'); | ||
| assert.ok(/do not disable the tests/i.test(report), 'the human correction should surface as an eval/memory candidate'); | ||
| + | writeFileSync(join(dir, 'README.md'), '# demo\n'); | |
| const benign = { | ||
| id: 'node_001', kind: 'root', status: 'accepted', parent: null, | ||
| text: 'add a markdown table to the README', title: 'add a table', | ||
| @@ -2244,3 +2245,263 @@ test('rejections: --from claude works as an explicit --from value (Phase 0 false | ||
| rmSync(dir, { recursive: true, force: true }); | ||
| } | ||
| }); | ||
| + | ||
| + | test('schema-export: token totals appear in stats and per-session in tree.json', async () => { | |
| + | const { tree } = await fixtureTree(); | |
| + | const json = renderJson(tree, { projectName: 'demo' }); | |
| + | assert.ok(typeof json.stats.inputTokens === 'number', 'stats.inputTokens must be a number'); | |
| + | assert.ok(typeof json.stats.outputTokens === 'number', 'stats.outputTokens must be a number'); | |
| + | assert.ok(json.stats.inputTokens > 0, 'stats.inputTokens should be non-zero for this fixture'); | |
| + | assert.ok(json.stats.outputTokens > 0, 'stats.outputTokens should be non-zero for this fixture'); | |
| + | assert.ok(json.sessions.length > 0, 'must have at least one session'); | |
| + | assert.ok(typeof json.sessions[0].inputTokens === 'number', 'sessions[0].inputTokens must be a number'); | |
| + | assert.ok(typeof json.sessions[0].outputTokens === 'number', 'sessions[0].outputTokens must be a number'); | |
| + | assert.equal(json.sessions[0].inputTokens, json.stats.inputTokens, 'single-session fixture: session tokens must equal stats tokens'); | |
| + | }); | |
| + | ||
| + | test('schema-export: per-node model and actions appear in every node in tree.json', async () => { | |
| + | const { tree } = await fixtureTree(); | |
| + | const json = renderJson(tree, { projectName: 'demo' }); | |
| + | assert.ok(json.nodes.length > 0, 'must have at least one node'); | |
| + | assert.ok(json.nodes.every((n) => 'model' in n), 'every node must have a model field'); | |
| + | assert.ok(json.nodes.every((n) => Array.isArray(n.actions)), 'every node must have an actions array'); | |
| + | const nodeWithAction = json.nodes.find((n) => n.actions.length > 0); | |
| + | assert.ok(nodeWithAction, 'at least one node should have an action'); | |
| + | const action = nodeWithAction.actions[0]; | |
| + | assert.ok('tool' in action, 'action must have tool'); | |
| + | assert.ok('file' in action, 'action must have file'); | |
| + | assert.ok('command' in action, 'action must have command'); | |
| + | assert.ok('model' in action, 'action must have model'); | |
| + | const rootNode = json.nodes.find((n) => n.kind === 'root'); | |
| + | assert.ok(rootNode, 'root node must exist'); | |
| + | assert.equal(rootNode.model, 'assistant-model', 'root node model attribution must match fixture'); | |
| + | }); | |
| + | ||
| + | test('schema-export: shell-command file paths appear in filesTouched', async () => { | |
| + | const REJECTIONS_FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'claude-code-rejections.jsonl'); | |
| + | const { parseSessionFile: ps } = await import('../src/parse.js'); | |
| + | const session = await ps(REJECTIONS_FIXTURE, { sessionId: 'rej-shell' }); | |
| + | const touched = session.stats.filesTouched; | |
| + | assert.ok(touched.includes('README.md'), 'Edit tool file_path must appear in filesTouched'); | |
| + | assert.ok(touched.some((f) => f.includes('.config/forbidden')), 'Bash command /root/.config/forbidden must appear in filesTouched'); | |
| + | }); | |
| + | ||
| + | test('analyze: uncorroborated strong frustration turn emits inferred user_frustration signal via recall backstop', () => { | |
| + | // A pure-frustration turn that is not a correction and shares only one token with the | |
| + | // prior node. Under the old gate this would emit no signal. The recall backstop must | |
| + | // fire at inferred tier without inflating verified/high counts. | |
| + | const prior = { | |
| + | id: 'node_001', text: 'add a leaflet map to the dashboard', title: 'leaflet map', kind: 'root', | |
| + | status: 'accepted', parent: null, | |
| + | actions: [{ tool: 'Edit', file: 'src/map.js', input: '', command: null, model: 'm' }], | |
| + | }; | |
| + | // Frustration turn: names the file once ("helper") - shares 1 token (< 3 needed for | |
| + | // sharesEvidence); not a correction kind; strong frustration wording triggers backstop. | |
| + | const frustration = { | |
| + | id: 'node_002', | |
| + | text: 'this sucks, the helper.js you wrote is god awful and terrible, i am angry and frustrated', | |
| + | title: 'frustrated', kind: 'direction', status: 'accepted', parent: prior, | |
| + | actions: [], | |
| + | }; | |
| + | const analysis = analyzeTree({ nodes: [prior, frustration] }); | |
| + | const frustSignals = analysis.failures.filter((f) => f.type === 'user_frustration'); | |
| + | assert.ok(frustSignals.length >= 1, 'recall backstop must fire at least one user_frustration signal'); | |
| + | assert.ok( | |
| + | frustSignals.every((f) => f.tier === 'inferred'), | |
| + | 'backstop signals must stay at inferred tier' | |
| + | ); | |
| + | // Must not inflate verified or high counts. | |
| + | const tc = analysis.summary.tierCounts; | |
| + | assert.equal(tc.verified, 0, 'no verified signals from a pure uncorroborated frustration turn'); | |
| + | assert.equal(tc.high, 0, 'no high signals from a pure uncorroborated frustration turn'); | |
| + | }); | |
| + | ||
| + | test('analyze: clean weather-dashboard fixture does not gain spurious frustration signals from recall backstop', async () => { | |
| + | // The synthetic session has no strong frustration wording; the backstop must not fire. | |
| + | const { tree } = await fixtureTree(); | |
| + | const analysis = analyzeTree(tree); | |
| + | const frustSignals = analysis.failures.filter((f) => f.type === 'user_frustration'); | |
| + | assert.equal(frustSignals.length, 0, 'clean synthetic fixture must produce zero user_frustration signals'); | |
| + | }); | |
| + | ||
| + | test('report: Models seen reflects full stats.models set, not just analysis-pass models', () => { | |
| + | // A tree where stats.models has two models but node.actions only carries one of them. | |
| + | // The report must list both. | |
| + | const node = { | |
| + | id: 'node_001', text: 'build a chart', title: 'chart', kind: 'root', status: 'accepted', parent: null, | |
| + | actions: [{ tool: 'Edit', file: 'src/chart.js', input: '', command: null, model: 'model-a' }], | |
| + | }; | |
| + | const tree = { | |
| + | stats: { models: ['model-a', 'model-b'], promptCount: 1, sessionCount: 1 }, | |
| + | nodes: [node], | |
| + | sessions: [], | |
| + | }; | |
| + | const report = renderReportMarkdown(tree, { projectName: 'test' }); | |
| + | assert.ok(report.includes('model-a'), 'report must include model-a'); | |
| + | assert.ok(report.includes('model-b'), 'report must include model-b from stats.models'); | |
| + | }); | |
| + | ||
| + | test('report: correction chains section appears when chains exist', () => { | |
| + | // Build a tree with a correction that shares a file with the prior node so a chain is formed. | |
| + | const failure = { | |
| + | id: 'node_001', text: 'write the config parser', title: 'config parser', kind: 'root', status: 'accepted', parent: null, | |
| + | ts: '2026-06-12T10:00:00.000Z', | |
| + | actions: [{ tool: 'Edit', file: 'src/config.js', input: '', command: null, model: 'm' }], | |
| + | }; | |
| + | const correction = { | |
| + | id: 'node_002', text: 'no that is wrong, redo the config parser logic', title: 'redo config', kind: 'correction', status: 'accepted', parent: failure, | |
| + | ts: '2026-06-12T10:30:00.000Z', | |
| + | actions: [{ tool: 'Edit', file: 'src/config.js', input: '', command: null, model: 'm' }], | |
| + | }; | |
| + | const tree = { | |
| + | stats: { models: ['m'], promptCount: 2, sessionCount: 1, corrections: 1 }, | |
| + | nodes: [failure, correction], | |
| + | sessions: [], | |
| + | }; | |
| + | const report = renderReportMarkdown(tree, { projectName: 'test' }); | |
| + | assert.ok(report.includes('## Correction chains'), 'report must include Correction chains section'); | |
| + | assert.ok(report.includes('node_001'), 'report must reference the failure node'); | |
| + | assert.ok(report.includes('node_002'), 'report must reference the correction node'); | |
| + | }); | |
| + | ||
| + | test('schema-export: new exported fields pass the redaction / assertClean guard', async () => { | |
| + | const API_KEY_FIXTURE = join(dirname(fileURLToPath(import.meta.url)), 'fixtures', 'api-key-auth-session.jsonl'); | |
| + | const dir = mkdtempSync(join(tmpdir(), 'treetrace-schema-redact-')); | |
| + | try { | |
| + | await main(['--from', 'claude', '--file', API_KEY_FIXTURE, '--dir', dir, '--redact-auto', '--quiet']); | |
| + | const treeJson = readFileSync(join(dir, '.treetrace', 'tree.json'), 'utf8'); | |
| + | const parsed = JSON.parse(treeJson); | |
| + | assert.ok(typeof parsed.stats.inputTokens === 'number', 'stats.inputTokens present after redact gate'); | |
| + | assert.ok(typeof parsed.stats.outputTokens === 'number', 'stats.outputTokens present after redact gate'); | |
| + | assert.ok(parsed.nodes.every((n) => Array.isArray(n.actions)), 'every node has actions after redact gate'); | |
| + | const secretPatterns = [/ghp_/, /sk-ant-/, /AKIA/, /-----BEGIN/, /eyJ[A-Za-z]/, /xox[baprs]-/]; | |
| + | for (const pat of secretPatterns) { | |
| + | assert.ok(!pat.test(treeJson), `secret pattern ${pat} must not appear in tree.json`); | |
| + | } | |
| + | } finally { | |
| + | rmSync(dir, { recursive: true, force: true }); | |
| + | } | |
| + | }); | |
| + | ||
| + | test('hallucinations: prose-slash phrases produce no file-path flag', () => { | |
| + | const dir = tempProject(); | |
| + | try { | |
| + | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); | |
| + | const proseFragments = [ | |
| + | 'admin/analyst/viewer', | |
| + | 'lat/lon', | |
| + | 'make/model/color', | |
| + | '16/9', | |
| + | 'none/low/medium/high', | |
| + | 'RTSP/HTTP', | |
| + | 'application/json', | |
| + | ]; | |
| + | for (const phrase of proseFragments) { | |
| + | const flags = detectHallucinations(mk(`use ${phrase} as an enum`), dir).hallucinations | |
| + | .filter((h) => h.category === 'hallucinated_file_or_path') | |
| + | .map((h) => h.reference); | |
| + | assert.deepEqual(flags, [], `prose phrase "${phrase}" must not be flagged as a missing file path`); | |
| + | } | |
| + | } finally { | |
| + | rmSync(dir, { recursive: true, force: true }); | |
| + | } | |
| + | }); | |
| + | ||
| + | test('hallucinations: true positive ./src/middleware/rateLimit.js still fires', () => { | |
| + | const dir = tempProject(); | |
| + | try { | |
| + | const mk = (text) => ({ nodes: [{ id: 'n1', kind: 'root', status: 'accepted', parent: null, text, title: 't', actions: [] }] }); | |
| + | const flags = detectHallucinations(mk('update ./src/middleware/rateLimit.js for the new rate limiting logic'), dir).hallucinations | |
| + | .filter((h) => h.category === 'hallucinated_file_or_path') | |
| + | .map((h) => h.reference); | |
| + | assert.ok(flags.some((r) => r.includes('rateLimit.js')), 'invented path ./src/middleware/rateLimit.js must still be flagged'); | |
| + | const flags2 = detectHallucinations(mk('edit src/middleware/rateLimit.js'), dir).hallucinations | |
| + | .filter((h) => h.category === 'hallucinated_file_or_path') | |
| + | .map((h) => h.reference); | |
| + | assert.ok(flags2.some((r) => r.includes('rateLimit.js')), 'src/ prefixed invented path must still be flagged'); | |
| + | } finally { | |
| + | rmSync(dir, { recursive: true, force: true }); | |
| + | } | |
| + | }); | |
| + | ||
| + | test('hallucinations: Edit to nonexistent file is flagged via action.file alone', () => { | |
| + | const dir = tempProject(); | |
| + | try { | |
| + | const tree = { | |
| + | nodes: [{ | |
| + | id: 'n1', kind: 'root', status: 'accepted', parent: null, | |
| + | text: 'update the config', | |
| + | title: 't', | |
| + | actions: [{ tool: 'Edit', file: 'src/nonexistent-only-in-action-file.js', input: '', command: null }], | |
| + | }], | |
| + | }; | |
| + | const flags = detectHallucinations(tree, dir).hallucinations | |
| + | .filter((h) => h.category === 'hallucinated_file_or_path') | |
| + | .map((h) => h.reference); | |
| + | assert.ok( | |
| + | flags.some((r) => r.includes('nonexistent-only-in-action-file.js')), | |
| + | 'Edit to a nonexistent file must be caught via action.file even when path is absent from node.text' | |
| + | ); | |
| + | } finally { | |
| + | rmSync(dir, { recursive: true, force: true }); | |
| + | } | |
| + | }); | |
| + | ||
| + | test('redaction: lowercase bearer token is caught by bearer-header rule', () => { | |
| + | const token = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.lowentropy1234'; | |
| + | const text = `Authorization: bearer ${token}`; | |
| + | const hits = scanText(text).map((f) => f.ruleId); | |
| + | assert.ok(hits.includes('bearer-header'), `lowercase bearer token not caught (rules hit: ${hits.join(', ')})`); | |
| + | const decisions = {}; | |
| + | const findings = scanText(text); | |
| + | for (const f of findings) { | |
| + | if (f.ruleId === 'bearer-header') { | |
| + | decisions[sha256(f.match)] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId }; | |
| + | } | |
| + | } | |
| + | const cleaned = applyDecisions(text, findings, decisions); | |
| + | assert.ok(!cleaned.includes(token), 'raw token still present after redaction'); | |
| + | assert.ok(cleaned.includes('[REDACTED:bearer-header]'), 'expected bearer-header redaction marker'); | |
| + | }); | |
| + | ||
| + | test('redaction: --redact-auto resolves high-entropy shadow-scan residuals and writes clean artifacts', async () => { | |
| + | const highEntropyToken = 'Xk9mQ2vR7nLpZ4wY8sA3cB6eF1hJ0uT5iG2dN'; | |
| + | const dir = mkdtempSync(join(tmpdir(), 'treetrace-entropy-auto-')); | |
| + | const file = join(dir, 'conv.json'); | |
| + | const convo = [{ | |
| + | mapping: { | |
| + | r: { message: null, parent: null, children: ['u'] }, | |
| + | u: { | |
| + | message: { | |
| + | author: { role: 'user' }, | |
| + | content: { parts: [`check the session token ${highEntropyToken} for issues`] }, | |
| + | create_time: 1.0, | |
| + | }, | |
| + | parent: 'r', | |
| + | children: ['a'], | |
| + | }, | |
| + | a: { | |
| + | message: { | |
| + | author: { role: 'assistant' }, | |
| + | content: { parts: ['done'] }, | |
| + | create_time: 2.0, | |
| + | }, | |
| + | parent: 'u', | |
| + | children: [], | |
| + | }, | |
| + | }, | |
| + | }]; | |
| + | writeFileSync(file, JSON.stringify(convo)); | |
| + | try { | |
| + | await main(['--from', 'chatgpt', '--file', file, '--dir', dir, '--redact-auto', '--quiet']); | |
| + | const treeJson = readFileSync(join(dir, '.treetrace', 'tree.json'), 'utf8'); | |
| + | assert.ok(!treeJson.includes(highEntropyToken), 'raw high-entropy token leaked into tree.json'); | |
| + | assert.equal( | |
| + | shadowScan(treeJson, {}).filter((f) => f.severity !== 'soft').length, | |
| + | 0, | |
| + | 'tree.json still has residual high-entropy tokens after --redact-auto' | |
| + | ); | |
| + | } finally { | |
| + | rmSync(dir, { recursive: true, force: true }); | |
| + | } | |
| + | }); |