Zion Boggan zionboggan.com ↗

Strengthen analysis-layer signal detection accuracy

Replace several keyword-bound heuristics in the analysis layer with structural
detectors that generalize across phrasing, while tightening precision:

- Corrections and declines: recognize fresh-form redirects, reversals, and
  goal-mismatch turns structurally (a back-reference to a prior action plus a
  contrast or reversal cue) rather than by fixed opener phrases.
- Security: detect credentials by value and known token formats, recognize
  public-exposure and safety-gate-weakening configuration, survive redaction
  of the secret value, and collapse a credential touched across multiple turns
  into a single finding.
- Hallucinated files: catch files an assistant only claims in prose, while
  suppressing references to declared or well-known libraries.
- Correction chains and lessons: link a failure to the later turn that
  redirects it via shared concrete evidence, and bind security lessons to the
  remediation the user actually stated, suppressing boilerplate.

All unit tests pass.
2dacbee   Zion Boggan committed on Jun 20, 2026 (2 days ago)
src/analyze.js +1087 -55
@@ -43,7 +43,7 @@ function tierForRejection(confidence) {
}
const CORRECTION_HINT =
- /\b(no|stop|scrap|not that|you forgot|you ignored|that's wrong|that is wrong|i said|instead|redo|re do|go back|wrong|doesn'?t work|didn'?t work|still (failing|broken|wrong|bad)|not what i (asked|wanted|meant))\b/i;
+ /\b(no|stop|scrap|revert|undo|roll ?back|rip (?:it|that|this) out|back (?:it|that) out|not that|not it|over[- ]?engineered|you forgot|you ignored|that's wrong|that is wrong|i said|instead|redo|re do|go back|wrong|doesn'?t work|didn'?t work|still (failing|broken|wrong|bad)|not what i (asked|wanted|meant))\b/i;
const FRUSTRATION_HINT =
/\b(sucks|awful|god awful|what the heck|wtf|mad|angry|frustrat|not suffic|i don'?t trust|terrible|bad)\b/i;
// Strong, unambiguous frustration wording that warrants an inferred recall signal even
@@ -69,12 +69,39 @@ export const SECURITY_INTENT_PARTS = [
{ name: 'access_control_change', re: /\b(?:change|modify|update|add|tighten|loosen|fix)\b[^.]{0,20}\b(?:access control|permissions?|rbac|auth flow)\b/i },
];
const SECURITY_INTENT_RE = composeOr(SECURITY_INTENT_PARTS);
-const SCOPE_DRIFT_HINT = /\b(don'?t add|do not add|not a web app|keep it local|too much|overbuilt|scope drift|stay focused|same format|keep .* cli|zero-config cli)\b/i;
+const SCOPE_DRIFT_HINT = /\b(don'?t add|do not add|not a web app|keep it local|too much|overbuilt|over[- ]?engineered|over[- ]?kill|scope drift|stay focused|same format|keep .* cli|zero-config cli|way more than|more than i (?:wanted|asked|need)|not a (?:platform|framework|service|product|web ?app|library|server)|a (?:script|function|cli|tool|one[- ]?liner) not|rip (?:it |that |the )?out|too (?:complex|complicated|heavy|big)|simpler than this)\b/i;
+// STRUCTURAL surplus-removal detector for overbuild. Overbuild is structurally invariant:
+// the agent adds N named components, then the user names an excess metaphor/quantifier AND demands
+// removal of those named components. SURPLUS_CUE_RE matches the excess framing in a phrasing-general
+// way (gold-plating / over-build / cannon-for-a-fly / wrench-not-a-workshop / trim it way down / way
+// too much), so it generalizes past the old literal list. REMOVE_COMPONENTS_RE matches a removal
+// imperative governing a named architectural component. The arm only fires when the removed
+// component token ALSO appears in the immediately-prior assistant narration (a back-reference reusing
+// the session._priorAssistant token snapshot), so a bare excess complaint with no real prior surplus
+// never trips it. Anchoring on the SHARED named component (not the metaphor wording) is what keeps it
+// precise across "cannon"/"wrench"/"gold-plating" and any future phrasing.
+const SURPLUS_CUE_RE =
+ /\bgold[- ]?plat(?:e|ed|ing)?\b|\bover[- ]?build|\bover[- ]?engineer|\bcannon for a (?:fly|mosquito)\b|\bwrench(?:,)? not a (?:workshop|factory)\b|\btrim (?:it|this) (?:way )?down\b|\bway too (?:much|heavy|big|complex)\b|\bmore than (?:i|we) (?:asked|wanted|needed)\b/i;
+const REMOVE_COMPONENTS_RE =
+ /\b(?:rip|ditch|drop|strip|tear|gut|remove|delete|cut)\b[^.]{0,60}\b(registry|middleware|daemon|plugin|panel|layer|engine|scheduler|system|framework|theme)\b/i;
const TOOL_HINT = /\b(wrong tool|wrong library|use .* instead|don'?t use|dependency|package|environment|node version|python version|missing module)\b/i;
const HALLUCINATION_HINT = /\b(hallucinat|doesn'?t exist|does not exist|no such file|fake file|fake api|made up)\b/i;
-const REPEATED_FIX_HINT = /\b(still failing|still broken|again|same error|didn'?t fix|doesn'?t fix|keeps? failing)\b/i;
+const REPEATED_FIX_HINT = /\b(still failing|still broken|still wrong|again|same error|didn'?t fix|doesn'?t fix|keeps? failing|redo)\b/i;
const UNDERBUILT_HINT = /\b(underbuilt|missing|not enough|too bare|incomplete|you skipped|you missed)\b/i;
-const FORMAT_HINT = /\b(format|json|markdown|schema|same structure|exact output|invalid)\b/i;
+// format_violation cue. Requires an actual format COMPLAINT ("the output format", "reformat",
+// "malformed", "invalid json". Bare data-format names (json/csv/xml) are NOT cues: they match
+// feature specs ("a CSV export flag", "output as json") and filenames ("slides.json"), not violations.
+const FORMAT_HINT =
+ /\b(?:format|reformat|malformed|same structure|exact output)\b|\binvalid (?:json|yaml|xml|format|output|structure|markup|schema)\b/i;
+// misunderstood_goal must have explicit "wrong goal" evidence, not be a fallback label on any
+// correction. Per TAXONOMY: the user restates the real goal after the agent pursued the wrong one.
+const MISUNDERSTOOD_GOAL_RE =
+ /\b(?:that'?s not what i (?:asked|wanted|meant)|not what i (?:asked|wanted|meant)|you (?:misunderstood|got it wrong|missed the point|misread|solved the wrong|optimi[sz]ed the wrong|chose the wrong)|i (?:wanted|meant|asked for|cared about)\b[^.]*\bnot\b|wrong (?:goal|thing|feature|approach|task|problem|axis|optimization|metric)|you built the wrong|that'?s the wrong)\b/i;
+// A structural redirect that carries a HARD REVERSAL verb ("rip ... out", "nix", "scrap",
+// "gut") is a decline/overbuild reversal, NOT a misunderstood-goal restatement. The misunderstood
+// fallback is suppressed for those so an overbuild ("cannon for a fly, rip the registry out") does
+// not mislabel as misunderstood_goal; it stays a decline (rejection) + chain instead.
+const REVERSAL_VERB_RE = /\b(?:rip|nix|scrap|yank|gut|tear|strip|pull)\b[^.]{0,60}\bout\b|\b(?:nix|scrap|yank|gut)\b/i;
const WORDING_SCAN_MAX_CHARS = 1200;
const SIGNAL_PRIORITY = [
@@ -124,6 +151,11 @@ const RECOVERY_RE =
/\b(?:bring it back|bring them back|restore|recover|undo|revert|roll(?: |-)?back|get it back|put it back|can you (?:fix|recover|restore)|recreate)\b/i;
const APOLOGY_RE = /\b(?:i'?m sorry|im sorry|sorry|my bad|my fault|oops|whoops)\b/i;
const REMEDIATION_RE = new RegExp(`${DESTRUCTIVE_RE.source}|${RECOVERY_RE.source}`, 'i');
+// Figurative use of destructive verbs ("broke my brain"), and explicit disclaimers that the
+// damage was NOT the agent's doing. Either one means this is not a real abandoned/destructive path.
+const FIGURATIVE_DESTRUCTIVE_RE = /\bbroke my (?:brain|heart|mind|spirit)\b|\bbroken (?:heart|record)\b|\bmind[- ]?blow/i;
+const NOT_AGENT_DISCLAIMER_RE =
+ /\bnot your (?:change|fault|code|edit|doing|problem)\b|\bpre-?existing\b|\bunrelated to your\b|\bnot (?:from|caused by|due to) your\b|\balready (?:broken|failing|broke) before\b|\bignore it\b/i;
const SECURITY_FILE_RE = /(?:^|[\\/])(?:\.env[^\\/]*|[^\\/]*(?:auth|session|middleware|login|signin|signup|permission|rbac|access[-_]?control|secur|crypto|jwt|oauth|passwd|password|secret|credential|token)[^\\/]*)$/i;
const SECURITY_FILE_EXCLUDE_RE = /(?:^|[\\/])(?:[^\\/]*tokens?\.[a-z]+|tokenizer[^\\/]*|[^\\/]*[-_.]?token(?:izer|s)?\.(?:tsx?|jsx?|css|scss|json|svg)|semantic[-_]?tokens?[^\\/]*|design[-_]?tokens?[^\\/]*)$/i;
@@ -141,16 +173,305 @@ export const RISKY_CMD_PARTS = [
{ name: 'truncate', re: /\bTRUNCATE\s+(?:TABLE\s+)?[\w."`]+/i },
];
const RISKY_CMD_RE = composeOr(RISKY_CMD_PARTS);
-const SECRET_CONTENT_RE = /(?:\bsource\s+[^\n]*\.env\b|(?:^|[;&|]|\s)\.\s+[^\n]*\.env\b|\.env\.(?:secrets|local|prod|production)\b|\bexport\s+[A-Z0-9_]*(?:_API_KEY|_TOKEN|_SECRET|_PASSWORD|API_KEY|SECRET_KEY|ACCESS_KEY|PRIVATE_KEY)\b|\b(?:wrangler|doppler|vault)\b|\bgh\s+auth\b|\baws\s+configure\b|\bgcloud\s+auth\b|\bkubectl\s+config\s+set-credentials\b)/i;
-const ACCESS_CONTROL_CONTENT_RE = /\b(?:grant\s+(?:select|insert|update|delete|all)\b|setfacl|chmod\s+[0-7]{3,4}\b)/i;
+const SECRET_CONTENT_RE = /(?:\bsource\s+[^\n]*\.env\b|(?:^|[;&|]|\s)\.\s+[^\n]*\.env\b|\.env\.(?:secrets|local|prod|production)\b|\bexport\s+[A-Z0-9_]*(?:_API_KEY|_TOKEN|_SECRET|_PASSWORD|API_KEY|SECRET_KEY|ACCESS_KEY|PRIVATE_KEY)\b|\b(?:wrangler|doppler|vault)\b|\bgh\s+auth\b|\baws\s+configure\b|\bgcloud\s+auth\b|\bkubectl\s+config\s+set-credentials\b|\b(?:AKIA|ASIA|AGPA|AIDA|AROA|AIPA)[A-Z0-9]{12,}\b|\b(?:gh[opusr]|github_pat)[-_][A-Za-z0-9_]{16,}\b|\bsk-[A-Za-z0-9]{16,}\b|\bxox[baprs]-[A-Za-z0-9-]{10,}\b|\b(?:aws_secret_access_key|aws_access_key_id|api[_-]?key|secret[_-]?key|access[_-]?key|secret[_-]?access[_-]?key|private[_-]?key|client[_-]?secret|password|passwd|auth[_-]?token|access[_-]?token|bearer[_-]?token|connection[_-]?string)\b\s*[:=]\s*['"][^'"\n]{6,}['"])/i;
+// Access-control RISK in content: granting broad DB rights, loosening file perms, OR exposing a
+// resource publicly (public-read, world-readable, 0.0.0.0/0, wildcard principal).
+const ACCESS_CONTROL_CONTENT_RE = /\b(?:grant\s+(?:select|insert|update|delete|all)\b|setfacl|chmod\s+[0-7]{3,4}\b|public[- ]?read(?:-write)?\b|world[- ]?readable\b|--acl[= ]public|0\.0\.0\.0\/0|publicly[- ]?(?:readable|accessible|writable)\b|"?principal"?\s*:\s*"?\*)/i;
const ACCESS_CONTROL_WEAK_RE = /\b(?:rbac|access[-_]?control)\b/i;
+// Secret-by-VALUE detector. Fires on the credential VALUE itself by format/entropy,
+// independent of surrounding quotes or filename. Two sub-rules:
+// (1) known-format vendor tokens anywhere in the content (stripe sk_live_/sk-, AWS AKIA...,
+// GitHub ghp_/github_pat, Slack xox.-, Google AIza..., PEM PRIVATE KEY block,
+// service-account JSON shape). SECRET_CONTENT_RE already covers a subset of these; this
+// widens to stripe/google/PEM/SA-JSON that previously required a quoted RHS.
+// (2) a BARE or quoted key=value / key: value where the key token is a credential noun and the
+// value is a long, high-entropy literal, recovering unquoted YAML/env secrets such as
+// POSTGRES_PASSWORD: hunter2-prod-Sup3r. Gated by a Shannon-entropy floor + a
+// placeholder/example excluder to hold precision.
+const VENDOR_TOKEN_RE =
+ /\bsk_live_[A-Za-z0-9]{16,}\b|\bsk-[A-Za-z0-9]{16,}\b|\b(?:AKIA|ASIA|AGPA|AIDA|AROA|AIPA)[A-Z0-9]{12,}\b|\b(?:gh[opusr]|github_pat)[-_][A-Za-z0-9_]{16,}\b|\bxox[baprs]-[A-Za-z0-9-]{10,}\b|\bAIza[A-Za-z0-9_-]{20,}\b|-----BEGIN(?:\s+[A-Z]+)?\s+PRIVATE KEY-----|"type"\s*:\s*"service_account"[\s\S]{0,400}?"private_key"\s*:/;
+// Credential-noun KEY followed by a bare or quoted VALUE. Bare value runs to end-of-line/quote/space.
+// Key may carry a prefix segment (POSTGRES_PASSWORD, DB_API_KEY, MY-SECRET) so match the
+// credential noun as the trailing token of a [A-Za-z0-9_-]* identifier, then : or =.
+const SECRET_KV_RE =
+ /(?:^|[^A-Za-z0-9])(?:[A-Za-z0-9-]+[_-])?(?:password|passwd|secret(?:[_-]?key)?|api[_-]?key|access[_-]?key|secret[_-]?access[_-]?key|auth[_-]?token|access[_-]?token|bearer[_-]?token|private[_-]?key|client[_-]?secret|token)\s*[:=]\s*(['"]?)([^'"\n\r]{8,})\1/i;
+// Placeholder / example values that must NOT count as a real leaked secret.
+const SECRET_PLACEHOLDER_RE =
+ /^(?:<[^>]*>|\{\{?[^}]*\}?\}|\$\{?[A-Za-z0-9_]+\}?|changeme|change_me|your[_-]?\w*|example|placeholder|redacted|todo|none|null|true|false|xxx+|\*{3,}|\.{3,}|secret|password|token|key|test|dummy|sample|foobar)$/i;
+
+// Match the redactor's own [REDACTED:<ruleId>] sentinels for credential-format /
+// vendor and secret-noun rules ONLY. Soft-PII rules (email, ipv4, home-dir-username) are
+// intentionally excluded so redacted soft-PII does not get promoted to a leaked credential.
+// This makes credential detection survive the redact-before-analyze ordering: a body whose
+// secret was already replaced by a sentinel still classifies as a secret-by-value.
+const REDACTED_SECRET_RE =
+ /\[REDACTED:(?:private-key-block|aws-access-key|github-token|github-fine-grained|gitlab-token|anthropic-key|openai-key|slack-token|stripe-live-key|npm-token|tailscale-key|google-api-key|sendgrid-key|twilio-key|telegram-bot-token|discord-webhook|jwt|hex-token|wireguard-key|url-basic-auth|bearer-header|secret-assignment)\]/;
+
+function shannonEntropy(str) {
+ if (!str) return 0;
+ const freq = Object.create(null);
+ for (const ch of str) freq[ch] = (freq[ch] || 0) + 1;
+ let h = 0;
+ const n = str.length;
+ for (const k in freq) {
+ const p = freq[k] / n;
+ h -= p * Math.log2(p);
+ }
+ return h;
+}
+
+// Returns true when the body carries a credential by its intrinsic VALUE (format or entropy),
+// regardless of quoting or filename. Independent of SECRET_CONTENT_RE so unquoted YAML/env and
+// known-format vendor tokens are recovered without loosening the quoted-only legacy rule.
+function isSecretByValue(body) {
+ if (typeof body !== 'string' || !body) return false;
+ if (REDACTED_SECRET_RE.test(body)) return true;
+ if (VENDOR_TOKEN_RE.test(body)) return true;
+ const m = SECRET_KV_RE.exec(body);
+ if (m) {
+ const value = m[2].trim();
+ if (
+ value.length >= 8 &&
+ !SECRET_PLACEHOLDER_RE.test(value) &&
+ // Reject obvious non-secret values (env-var refs, pure words) and require enough
+ // character diversity for a real credential. Entropy floor of 2.5 bits/char admits
+ // mixed alnum secrets like hunter2-prod-Sup3r while rejecting low-variety words.
+ shannonEntropy(value) >= 2.5 &&
+ /[A-Za-z]/.test(value) &&
+ /[0-9!@#$%^&*\-_]/.test(value)
+ ) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Config-surface secret detector. SECRET_KV_RE only fires when the KEY token is a credential
+// noun; a Terraform/values/env assignment keyed by a GENERIC token ('default = "Prod-Master-Pw..."'
+// in tf/variables.tf) carries the secret in the VALUE but the noun rule can never reach it.
+// This recovers such cases by gating ENTIRELY on the FILE being a config/secrets/deployment/ci
+// surface (classifySecuritySurface + a tfvars/.env/configmap/compose/values-class co-gate) and the
+// RHS being a long high-entropy literal, with shape excluders for pure-hex digests and base64
+// image/data blobs and a separator-or-casemix requirement, so a benign hex hash or data URI in a
+// deploy file does not trip it.
+const CONFIG_SURFACE_PATH_RE =
+ /(?:^|[\\/])(?:[^\\/]*\.(?:tfvars?|env[^\\/]*)|[^\\/]*\.env|[^\\/]*configmap[^\\/]*\.ya?ml|docker-compose[^\\/]*\.ya?ml|compose[^\\/]*\.ya?ml|[^\\/]*values\.ya?ml|values-[^\\/]*\.ya?ml|[^\\/]*\.tf)$/i;
+// Generic key:value or key="value" with a long literal RHS (key need NOT be a credential noun).
+const CONFIG_KV_RE =
+ /(?:^|[^A-Za-z0-9_])([A-Za-z_][A-Za-z0-9_-]*)\s*[:=]\s*(['"]?)([^'"\n\r]{12,})\2/;
+const HEX_DIGEST_RE = /^[0-9a-fA-F]{32,}$/;
+const B64_BLOB_RE = /^[A-Za-z0-9+/]{44,}={0,2}$/;
+function isConfigSurfaceSecret(body, file) {
+ if (typeof body !== 'string' || !body || !file) return false;
+ const surface = classifySecuritySurface(file);
+ if (!(surface === 'secrets' || surface === 'deployment' || surface === 'ci')) return false;
+ if (!CONFIG_SURFACE_PATH_RE.test(file)) return false;
+ const m = CONFIG_KV_RE.exec(body);
+ if (!m) return false;
+ const value = m[3].trim();
+ if (value.length < 12) return false;
+ if (SECRET_PLACEHOLDER_RE.test(value)) return false;
+ // Shape excluders: a pure hex digest (>=32) or an unbroken base64 image/data blob (>=44) is not a
+ // password; reject before the entropy/diversity test so a long hash in a deploy file does not fire.
+ if (HEX_DIGEST_RE.test(value)) return false;
+ if (B64_BLOB_RE.test(value)) return false;
+ if (shannonEntropy(value) < 3.0) return false;
+ // Separator-or-casemix: a real config secret has either an internal separator (-, _, ., etc.) OR
+ // mixed upper/lower case. A single-case unbroken word at this length is more likely an identifier.
+ const hasSeparator = /[-_.:/+!@#$%^&*]/.test(value);
+ const hasCaseMix = /[a-z]/.test(value) && /[A-Z]/.test(value);
+ if (!hasSeparator && !hasCaseMix) return false;
+ return true;
+}
+
+// Structural public-exposure detector. Generalizes the access-control content
+// rule beyond a fixed keyword list to unseen cloud/IaC dialects, without loosening
+// precision: it ONLY fires on a value that is concretely public/wildcard. Recognizes
+// * world-open CIDRs: 0.0.0.0/0 and ::/0
+// * permission-shaped key:value / key=value where the key is an ACL/visibility/access
+// concept and the RHS is a public/wildcard value (*, public, anyone, everyone,
+// allUsers, allAuthenticatedUsers, 0.0.0.0); non-public values (private, internal)
+// are rejected
+// * chmod with an octal whose WORLD (others) digit >= 4 (world-readable or worse)
+// * SQL GRANT ... TO PUBLIC / TO *
+const PUBLIC_CIDR_RE = /\b0\.0\.0\.0\/0\b|::\/0/;
+const PUBLIC_ACL_PAIR_RE =
+ /\b(?:acl|visibility|public|access|principal|allow|ingress)\b\s*[:=]\s*['"]?\s*(?:\*|(?:public|anyone|everyone|allusers|allauthenticatedusers|0\.0\.0\.0)\b)/i;
+const GRANT_TO_PUBLIC_RE = /\bgrant\b[^;]{0,120}?\bto\s+(?:public\b|\*)/i;
+
+function chmodWorldExposed(body) {
+ const re = /\bchmod\s+(?:-[a-zA-Z]+\s+)*0?([0-7])([0-7])([0-7])\b/gi;
+ let m;
+ while ((m = re.exec(body)) !== null) {
+ if (Number(m[3]) >= 4) return true;
+ }
+ return false;
+}
+
+function isPublicExposure(body) {
+ if (typeof body !== 'string' || !body) return false;
+ if (PUBLIC_CIDR_RE.test(body)) return true;
+ if (PUBLIC_ACL_PAIR_RE.test(body)) return true;
+ if (chmodWorldExposed(body)) return true;
+ if (GRANT_TO_PUBLIC_RE.test(body)) return true;
+ return false;
+}
+
+// Structural safety-gate weakening. Fires when a security/transport protection
+// flag is flipped OFF (secure:false, httpOnly:false, verify=False, rejectUnauthorized:false,
+// strict:false, csrf:false, sslVerify=false, ...) or a protective guard line is commented
+// out (// require_auth, # check_permission, ...). This is a WEAK signal by contract: it only
+// escapes `inferred` when a real co-signal is present (security-surface file, credential
+// content, or a human security correction), preserving the precision contract.
+const SAFETY_FLAG_OFF_RE =
+ /\b(secure|http[-_]?only|verify|verify[-_]?ssl|ssl[-_]?verify|reject[-_]?unauthorized|strict|strict[-_]?ssl|csrf|csrf[-_]?protection|check[-_]?hostname|validate[-_]?certs?|tls[-_]?verify|cert[-_]?verify|require[-_]?auth|auth[-_]?required|enforce[-_]?https|signature[-_]?verification)\b\s*[:=]\s*(?:false|0|off|no|none|disabled)\b/i;
+const GUARD_COMMENTED_OUT_RE =
+ /(?:\/\/|#|--|<!--)\s*(?:require[-_]?auth|auth[-_]?required|check[-_]?permission|check[-_]?auth|verify[-_]?token|csrf[-_]?protect\w*|authorize|authenticate|ensure[-_]?(?:auth|admin|login)|is[-_]?authenticated|login[-_]?required|permission[-_]?required|guard|enforce[-_]?https|validate[-_]?(?:token|session|cert))\b/i;
+
+function isSafetyGateWeakening(body) {
+ if (typeof body !== 'string' || !body) return false;
+ if (SAFETY_FLAG_OFF_RE.test(body)) return true;
+ if (GUARD_COMMENTED_OUT_RE.test(body)) return true;
+ return false;
+}
+
+// Concept-level credential-mishandling detector. Fires on the SENTENCE-SCOPED
+// co-occurrence of a credential noun-class token AND a sink/exposure verb-class token, with a
+// remediation-clause exclusion (remove/redact/mask/scrub/rotate/revoke) to honor EXACTLY-ONE-
+// per-concern. Scans the assistant narration (a.narration, captured in parse.js) joined with the
+// action body, so a leak described in prose ("I'll log the full Authorization header with the
+// bearer token") is caught even when the action input carries no secret-shaped literal.
+const CREDENTIAL_NOUN_RE =
+ /\b(?:password|passwd|bearer(?:\s+token)?|api[\s-]?key|access[\s-]?token|signing[\s-]?token|signing[\s-]?key|secret(?:\s+key)?|secrets?|credential|credentials|service[\s-]?account(?:\s+json)?|sa[\s-]?key|authorization(?:\s+header)?|auth[\s-]?token|private[\s-]?key|connection[\s-]?string|client[\s-]?secret|access[\s-]?key|token)\b/i;
+const CREDENTIAL_SINK_VERB_RE =
+ /\b(?:log(?:s|ged|ging)?|print(?:s|ed|ing)?|echo(?:ed|ing)?|dump(?:s|ed|ing)?|console\.log|fmt\.Print\w*|System\.out|commit(?:s|ted|ting)?|push(?:es|ed|ing)?|expose(?:s|d)?|exposing|output(?:s|ted|ting)?|writ(?:e|es|ing|ten)\s+(?:to|into)\s+(?:the\s+)?log)\b/i;
+// Remediation clause: when the sentence is about REMOVING/redacting the exposure, it is the fix,
+// not the leak -> suppress (the redirect that fixes a leak should not itself mint a new finding).
+const CREDENTIAL_REMEDIATION_RE =
+ /\b(?:remov(?:e|es|ed|ing)|redact(?:s|ed|ing)?|mask(?:s|ed|ing)?|scrub(?:s|bed|bing)?|rotat(?:e|es|ed|ing)|revok(?:e|es|ed|ing)|strip(?:s|ped|ping)?|sanitiz(?:e|es|ed|ing)|fingerprint|last[\s-]?four|last-?4)\b/i;
+
+// Split a body into rough sentence/clause units so credential-noun + sink-verb must co-occur
+// WITHIN one clause, not merely somewhere in the turn (sentence-scoped precision).
+function clauseSplit(body) {
+ return String(body || '').split(/[.!?;\n]+/);
+}
+
+// Returns the matching clause when the action narration+body exposes a credential via a
+// sink verb (and is not a remediation clause), else null.
+function credentialMishandlingClause(body) {
+ if (typeof body !== 'string' || !body) return null;
+ for (const clause of clauseSplit(body)) {
+ if (!CREDENTIAL_NOUN_RE.test(clause)) continue;
+ if (!CREDENTIAL_SINK_VERB_RE.test(clause)) continue;
+ if (CREDENTIAL_REMEDIATION_RE.test(clause)) continue;
+ return clause.replace(/\s+/g, ' ').trim();
+ }
+ return null;
+}
+
function isCredentialFile(file) {
if (!file || !SECURITY_FILE_RE.test(file)) return false;
if (SECURITY_FILE_EXCLUDE_RE.test(file)) return false;
return true;
}
+// Derive a stable distinct-concern key from the strongest security feature's target file
+// (normalized case + path separators). The taxonomy contract is "a file touched in N turns is ONE
+// risk, not N." Returns null when there is no concrete credential/access-control file to key on
+// (stated-intent and human-correction backstops carry no file target) -> those ALWAYS emit, so the
+// dedup never suppresses a distinct concern or a backstop. Precision-only.
+function securityConcernKey(secActs) {
+ if (!Array.isArray(secActs) || !secActs.length) return null;
+ // Prefer a strong, file-anchored feature (credential filename / file kind), then any action file.
+ const strong = secActs.filter((s) => s.strong);
+ const pick = (list) => {
+ for (const s of list) {
+ const f = s.action && s.action.file;
+ if (f && (isCredentialFile(f) || classifySecuritySurface(f))) return f;
+ }
+ return null;
+ };
+ const file = pick(strong) || pick(secActs);
+ if (!file) return null;
+ return String(file).toLowerCase().replace(/\\/g, '/').replace(/\/+/g, '/');
+}
+
+// Extract a DISTINCTIVE credential identifier from a file-less security finding's evidence so
+// two turns about the same credential collapse into one concern. Returns the first distinctive stem
+// found (jwt / signing-secret / api-key / password / bearer / private-key), or null. NEVER returns a
+// bare 'secret'/'token' (too generic, unrelated secrets must not collapse). The match scans the
+// joined evidence/clause text of every security action on the finding.
+const CRED_STEM_RULES = [
+ { stem: 'private-key', re: /\bprivate[\s_-]?key\b/i },
+ { stem: 'signing-secret', re: /\bsigning[\s_-]?(?:secret|key)\b/i },
+ { stem: 'jwt', re: /\bjwt\b/i },
+ { stem: 'api-key', re: /\bapi[\s_-]?key\b/i },
+ { stem: 'bearer', re: /\bbearer\b/i },
+ { stem: 'password', re: /\b(?:password|passwd)\b/i },
+];
+function credentialStemKey(secActs) {
+ if (!Array.isArray(secActs) || !secActs.length) return null;
+ let text = '';
+ for (const s of secActs) {
+ text += ` ${s.evidence || ''}`;
+ if (s.action) text += ` ${s.action.command || ''} ${s.action.input || ''}`;
+ }
+ if (!text.trim()) return null;
+ for (const rule of CRED_STEM_RULES) {
+ if (rule.re.test(text)) return rule.stem;
+ }
+ return null;
+}
+
+// STATED-INTENT BACKSTOP from the assistant's OWN narration. SECURITY_INTENT_RE only scans
+// node.text; an agent that NARRATES a governance/security-touching action ("I rewrote LICENSE to
+// an all-rights-reserved proprietary license") slips past it because the relicense phrasing is not
+// in the user-intent vocabulary. This scans the assistant narration (a.narration) AND node.text
+// for an intent verb/phrase co-occurring IN ONE CLAUSE with a target-noun the action actually
+// touched (a classified security surface, or a file basename present in the action). The
+// credential-remediation exclusion is honored (a redact/rotate clause is the fix, not the risk),
+// and it is gated on the node NOT being its own refusal (handled by the call site).
+const NARRATED_SECURITY_INTENT_RE =
+ /\b(?:re-?licens(?:e|ed|ing)|rewrote|rewrite|all[\s-]?rights[\s-]?reserved|proprietary[\s-]?licens\w*|strip(?:s|ped|ping)?|disabl(?:e|ed|ing)|remov(?:e|ed|ing)|delet(?:e|ed|ing)|leak(?:s|ed|ing)?|expos(?:e|ed|ing)|bypass(?:es|ed|ing)?)\b/i;
+const NARRATED_SECURITY_TARGET_RE =
+ /\b(?:licens\w*|authentication|authorization|auth(?:[\s-]?(?:check|flow|token|guard))|secret\w*|credential\w*|access[\s-]?control|permissions?|rbac|admin (?:schema|mutations?|routes?)|(?:unit|integration|e2e|smoke|auth)?\s*tests?\b)\b/i;
+// A clause is only a stated-intent risk when intent verb + target co-occur AND it is not a
+// remediation clause (removing/redacting/rotating IS the fix). Returns the matching clause or null.
+function narratedSecurityIntentClause(body) {
+ if (typeof body !== 'string' || !body) return null;
+ for (const clause of clauseSplit(body)) {
+ if (!NARRATED_SECURITY_INTENT_RE.test(clause)) continue;
+ if (!NARRATED_SECURITY_TARGET_RE.test(clause)) continue;
+ if (CREDENTIAL_REMEDIATION_RE.test(clause)) continue;
+ return clause.replace(/\s+/g, ' ').trim();
+ }
+ return null;
+}
+// Scan the node's own narration sources: each action narration plus the node text (assistant prose
+// with no tool action lands in node.text). Returns the first matching clause or null. node.text is
+// scanned ONLY when the node is NOT a user complaint/correction turn: a user "stop printing the
+// secret -- that's a leak" already mints (and dedups) the credential concern elsewhere, so scanning
+// its text here would double-fire the same concern. A genuine narrated intent is the ASSISTANT
+// DESCRIBING what it did/will do (no decline/security-correction rejection on the node).
+const SECURITY_CORRECTION_KINDS = new Set(['user_text_decline', 'user_declined_tool', 'user_interrupt']);
+function narratedSecurityIntent(node) {
+ if (!node) return null;
+ for (const a of node.actions || []) {
+ const clause = narratedSecurityIntentClause(String(a.narration || ''));
+ if (clause) return clause;
+ }
+ const isUserComplaint =
+ (Array.isArray(node.rejections) &&
+ node.rejections.some((r) => SECURITY_CORRECTION_KINDS.has(r.kind))) ||
+ hasSecurityCorrection(node.text);
+ if (!isUserComplaint && typeof node.text === 'string' && node.text.length <= 1200) {
+ const clause = narratedSecurityIntentClause(node.text);
+ if (clause) return clause;
+ }
+ return null;
+}
+
const SECURITY_SURFACE_RULES = [
{ surface: 'auth', re: /(?:^|[\\/])[^\\/]*(?:auth|login|signin|signup|session|oauth|jwt|sso|saml)[^\\/]*$/i },
{ surface: 'secrets', re: /(?:^|[\\/])(?:\.env[^\\/]*|[^\\/]*(?:secret|credential|password|passwd|apikey|api[-_]key|token)[^\\/]*)$/i },
@@ -169,12 +490,23 @@ const TEST_SKIP_RE =
// P6: strong human security-correction phrasing. Used as a corroborating co-signal and as
// the inferred-tier recall backstop (must never mint a strong/verified label by itself).
const SECURITY_CORRECTION_RE =
- /\b(?:don'?t|do not|never)\b[^.]{0,30}\b(?:leak|expose|commit|hardcode|hard[- ]?code|push|publish)\b[^.]{0,30}\b(?:secret|secrets|token|tokens|key|keys|credential|credentials|password|passwords|env|api)\b|\b(?:rotate|revoke|regenerate|invalidate)\b[^.]{0,25}\b(?:that|the|this|those|your|my)?\s*(?:secret|token|key|credential|password|pat|api[- ]?key|access token)\b|\bthat'?s? (?:a|the|my|our) (?:secret|credential|api[- ]?key|token|password)\b|\b(?:revert|undo|roll ?back)\b[^.]{0,25}\b(?:the|that|those)?\s*(?:auth|security|permission|access[- ]?control|rbac|credential)\b|\b(?:you|it)\b[^.]{0,20}\b(?:leaked|exposed|hardcoded|hard[- ]?coded|committed)\b[^.]{0,25}\b(?:secret|token|key|credential|password|env)\b/i;
+ /\b(?:don'?t|do not|never)\b[^.]{0,30}\b(?:leak|expose|commit|hardcode|hard[- ]?code|push|publish|paste|embed|inline|bake|put|write|store|save)\b[^.]{0,30}\b(?:secret|secrets|token|tokens|key|keys|credential|credentials|password|passwords|env|api)\b|\b(?:rotate|revoke|regenerate|invalidate)\b[^.]{0,25}\b(?:that|the|this|those|your|my)?\s*(?:secret|token|key|credential|password|pat|api[- ]?key|access token)\b|\bthat'?s? (?:a|the|my|our) (?:secret|credential|api[- ]?key|token|password)\b|\b(?:revert|undo|roll ?back)\b[^.]{0,25}\b(?:the|that|those)?\s*(?:auth|security|permission|access[- ]?control|rbac|credential)\b|\b(?:you|it)\b[^.]{0,20}\b(?:leaked|exposed|hardcoded|hard[- ]?coded|committed)\b[^.]{0,25}\b(?:secret|token|key|credential|password|env)\b|\b(?:don'?t|do not|never)\b[^.]{0,30}\b(?:make|leave|set|keep|expose|open)\b[^.]{0,25}\b(?:public|world[- ]?readable|publicly|wide[- ]?open|accessible to (?:everyone|all|the (?:public|world)))\b|\block (?:it|this|that|the bucket|things?) down\b/i;
function hasSecurityCorrection(text) {
return typeof text === 'string' && text.length <= 4000 && SECURITY_CORRECTION_RE.test(text);
}
+// A CONCRETE tool/action redirect remedy in a decline turn -- "use the Edit tool instead",
+// "use Write rather than echo", "switch to the Read tool". The decline that names such a remedy KEEPS
+// the boilerplate "do not retry a declined action" lesson because the lesson's instruction (retry via
+// a different tool/action) is exactly what the human asked for. A domain correction (env-var name, a
+// single CLI flag, a value change) does NOT match -- its remedy is content, not a tool-retry redirect.
+const TOOL_ACTION_REDIRECT_RE =
+ /\buse\b[^.]{0,30}\b(?:the\s+)?(?:Edit|Write|Read|Bash|Glob|Grep|NotebookEdit|MultiEdit|Task|Search|Replace|Apply\s*Patch|Patch|str_replace\w*|apply_patch)\b(?:\s+(?:tool|command|function|action))?[^.]{0,40}\b(?:instead|rather than|not\b)|\b(?:instead of|rather than)\b[^.]{0,30}\buse\b[^.]{0,30}\b(?:the\s+)?(?:Edit|Write|Read|Bash|Glob|Grep|NotebookEdit|MultiEdit|Task|Search|Replace|Apply\s*Patch|Patch)\b|\bswitch to\b[^.]{0,20}\b(?:the\s+)?(?:Edit|Write|Read|Bash|Glob|Grep|NotebookEdit|MultiEdit|Task)\b(?:\s+(?:tool|command|action))?/i;
+function hasToolActionRedirectRemedy(text) {
+ return typeof text === 'string' && text.length <= 4000 && TOOL_ACTION_REDIRECT_RE.test(text);
+}
+
export function classifySecuritySurface(file) {
if (!file) return null;
for (const rule of SECURITY_SURFACE_RULES) {
@@ -201,22 +533,69 @@ export function mentionsTestSkip(text) {
// `weak` marks a lone keyword (bare rbac/access-control) that needs a co-signal (P4).
function securityActions(node) {
const out = [];
+ // Scan the assistant narration joined with each action body for a sentence-scoped
+ // credential-noun + sink-verb co-occurrence. Emit kind:'credential-mishandling' at most ONCE
+ // per node (EXACTLY-ONE-per-concern), carrying the matching clause as audit evidence so the
+ // relevant keywords (token/authorization/log) surface.
+ let credMishandle = null;
+ for (const a of node.actions || []) {
+ const scan = `${a.narration || ''} ${a.command || ''} ${a.input || ''}`;
+ const clause = credentialMishandlingClause(scan);
+ if (clause) { credMishandle = { action: a, clause }; break; }
+ }
+ if (credMishandle) {
+ out.push({
+ action: credMishandle.action,
+ kind: 'credential-mishandling',
+ strong: true,
+ evidence: credMishandle.clause,
+ });
+ }
for (const a of node.actions || []) {
const body = `${a.command || ''} ${a.input || ''}`;
const kinds = [];
- if (SECRET_CONTENT_RE.test(body)) kinds.push({ kind: 'credential', strong: true });
+ if (SECRET_CONTENT_RE.test(body) || isSecretByValue(body) || isConfigSurfaceSecret(body, a.file)) kinds.push({ kind: 'credential', strong: true });
if (a.file && isCredentialFile(a.file)) kinds.push({ kind: 'file', strong: true });
- if (ACCESS_CONTROL_CONTENT_RE.test(body)) kinds.push({ kind: 'access-control', strong: true });
+ // Structural public-exposure detector OR-ed with the legacy regex fallback
+ // (the regex is kept so existing narrative TPs do not regress).
+ if (isPublicExposure(body) || ACCESS_CONTROL_CONTENT_RE.test(body)) {
+ kinds.push({ kind: 'access-control', strong: true });
+ }
if (a.command && RISKY_CMD_RE.test(a.command)) kinds.push({ kind: 'risky-command', strong: false });
// Weak keyword: only counts when no strong access-control content already fired on this action.
if (ACCESS_CONTROL_WEAK_RE.test(body) && !kinds.some((k) => k.kind === 'access-control')) {
kinds.push({ kind: 'access-control', strong: false, weak: true });
}
+ // Structural safety-gate weakening is its own WEAK kind, separate from
+ // access-control (they coexist). Weak by contract: only escapes `inferred` when a
+ // real co-signal (surface file / credential content / human security correction)
+ // is present, via the P4 co-signal gate in scoreSecurity.
+ if (isSafetyGateWeakening(body)) {
+ kinds.push({ kind: 'safety-gate-weakening', strong: false, weak: true });
+ }
for (const k of kinds) out.push({ action: a, ...k });
}
return out;
}
+// A CONTENT-ANCHORED security risk is one carrying real risk CONTENT (a credential value, a
+// credential-mishandling clause, access-control content, or a safety-gate weakening) -- as opposed
+// to a mere security-NAMED file (kind 'file') or a bare risky command. Used to track whether the
+// session has already established a content-anchored risk for the corroboration gate.
+const CONTENT_ANCHORED_KINDS = new Set([
+ 'credential', 'credential-mishandling', 'access-control', 'safety-gate-weakening',
+]);
+function isContentAnchoredSecurity(secActs) {
+ return Array.isArray(secActs) && secActs.some((s) => CONTENT_ANCHORED_KINDS.has(s.kind));
+}
+// A finding whose ONLY security signals are a security-NAMED file and/or a bare risky command
+// (no credential/access-control/safety-gate content). These are the lone-signal named-file findings
+// the gate suppresses once a content-anchored risk (or strong human correction) has already fired.
+function isNamedFileOrRiskyOnly(secActs) {
+ if (!Array.isArray(secActs) || !secActs.length) return false;
+ return secActs.every((s) => s.kind === 'file' || s.kind === 'risky-command');
+}
+
// Anchor confidences kept stable so existing tiers/numbers do not regress:
// one strong signal -> verified / 0.95 (unchanged anchor the suite asserts on)
// weak-only + cosignal-> high / 0.84
@@ -238,10 +617,12 @@ function scoreSecurity({ secActs, surface, humanCorrection }) {
const hasWeakKeywordOnly = !hasStrong && secActs.some((s) => s.weak);
if (strongActs.some((s) => s.kind === 'credential')) signals.push('strong credential content');
+ if (strongActs.some((s) => s.kind === 'credential-mishandling')) signals.push('credential mishandling');
if (strongActs.some((s) => s.kind === 'file')) signals.push('credential filename');
if (strongActs.some((s) => s.kind === 'access-control')) signals.push('access-control command');
if (weakActs.some((s) => s.kind === 'risky-command')) signals.push('risky command');
- if (weakActs.some((s) => s.weak)) signals.push('access-control keyword');
+ if (weakActs.some((s) => s.weak && s.kind === 'safety-gate-weakening')) signals.push('safety-gate weakening');
+ if (weakActs.some((s) => s.weak && s.kind !== 'safety-gate-weakening')) signals.push('access-control keyword');
if (surface) signals.push(`security surface (${surface})`);
if (humanCorrection) signals.push('human security correction');
@@ -285,12 +666,135 @@ function fileHint(node) {
return m ? m[1] : null;
}
+// Structural destructive-data-op detector. Fires abandoned_path when a destructive VERB
+// co-occurs with a persistent-DATA NOUN in the action body OR node.text, gated by a MANDATORY
+// in-turn recovery/decline cue (restore/recover/undo/revert/rollback/non-destructive) plus the
+// figurative + not-agent + future-intent + historical disclaimers. The recovery-cue requirement
+// suppresses a future-intent "I'll drop the legacy table" and a historical "we nuked
+// the old API ages ago"; the data-noun anchor suppresses "API" (not a data noun).
+const DESTRUCTIVE_DATA_VERB_RE =
+ /\b(?:drop(?:s|ped|ping)?|truncat(?:e|es|ed|ing)|delete[sd]?\s+from|wip(?:e|es|ed|ing)|blew\s+away|blow\s+away|overwrote|overwritten|overwrit(?:e|es|ing)|reset\s+--hard|recreate[sd]?\s+from\s+scratch|nuk(?:e|es|ed|ing)|\brm\b)\b/i;
+const PERSISTENT_DATA_NOUN_RE =
+ /\b(?:seed[s]?|fixtures?|migrations?|tables?|schema|database|\bdb\b|volume[s]?)\b/i;
+// Mandatory in-turn cue that this is a real destructive-then-recover / decline, not a plan.
+const DATA_RECOVERY_CUE_RE =
+ /\b(?:restore[sd]?|restoring|recover(?:s|ed|ing)?|undo|revert(?:s|ed|ing)?|roll\s?back|non[\s-]?destructive|get\s+(?:it|them|those)\s+back|bring\s+(?:it|them)\s+back|put\s+(?:it|them)\s+back|re[\s-]?seed)\b/i;
+// Future-intent ("I'll drop ...", "going to drop ...", "let me drop ...") is a plan, not damage.
+const FUTURE_INTENT_RE =
+ /\b(?:i'?ll|i\s+will|i'?m\s+going\s+to|gonna|going\s+to|let\s+me|we'?ll|we\s+will|should\s+i|plan\s+to|next\s+i'?ll)\b/i;
+// Historical disclaimer ("we nuked the old API ages ago", "long ago", "previously").
+const HISTORICAL_DESTRUCTIVE_RE =
+ /\b(?:ages\s+ago|long\s+ago|years?\s+ago|back\s+then|in\s+the\s+past|already\s+(?:gone|removed|dropped)|historically)\b/i;
+
+function isDestructiveDataOp(node) {
+ const text = String(node.text || '');
+ const body = (node.actions || []).map((a) => `${a.narration || ''} ${a.command || ''} ${a.input || ''}`).join(' ');
+ const scan = `${text} ${body}`;
+ if (scan.length > WORDING_SCAN_MAX_CHARS * 2) return null;
+ if (!DESTRUCTIVE_DATA_VERB_RE.test(scan)) return null;
+ if (!PERSISTENT_DATA_NOUN_RE.test(scan)) return null;
+ // Mandatory recovery/decline cue gate.
+ if (!DATA_RECOVERY_CUE_RE.test(scan)) return null;
+ // Suppress figurative, not-agent, future-intent, and historical distractors.
+ if (FIGURATIVE_DESTRUCTIVE_RE.test(scan) || NOT_AGENT_DISCLAIMER_RE.test(scan)) return null;
+ if (HISTORICAL_DESTRUCTIVE_RE.test(scan)) return null;
+ // Future-intent only suppresses when there is no actual destructive-recover report (a real
+ // "you blew away my seed data, restore it" carries a past-tense destructive verb + a recovery
+ // demand and is not a plan). Gate on the destructive clause being non-future.
+ const destClause = clauseSplit(scan).find((c) => DESTRUCTIVE_DATA_VERB_RE.test(c) && PERSISTENT_DATA_NOUN_RE.test(c));
+ if (destClause && FUTURE_INTENT_RE.test(destClause) && !DATA_RECOVERY_CUE_RE.test(destClause)) return null;
+ return {
+ confidence: 0.9,
+ tier: 'verified',
+ // Front-load the additive/seed remedy so the derived lesson matches the planted
+ // additive-migrations lesson (mustMention additive/seed).
+ summary: 'Persistent data (seed/fixtures/migration) was destructively wiped and had to be restored; make migrations additive and preserve seed data.',
+ };
+}
+
+// Structural abandoned-BRANCH detector. Per TAXONOMY abandoned_path is "a DAG branch the
+// user navigated away from" -- not only destructive-then-recover. When a correction/decline turn
+// REVERSES a concrete approach the immediately-prior assistant turn introduced in its narration
+// (a named data structure / algorithm / component, e.g. "custom trie for prefix matching"), the
+// prior approach is an abandoned branch. We anchor the evidence to the prior approach noun so the
+// match keywords (e.g. trie/prefix) surface, and we gate hard on a SHARED DISTINCTIVE NOUN
+// between the reversal turn and the prior narration so this never fires on a generic correction.
+//
+// Reversal-of-approach cue on the correction turn ("nix the trie", "not go down that road",
+// "wrong direction", "switch to ... instead", "scrap that approach"). Distinct from a plain
+// content edit: it must name a navigate-away, not "fix the typo".
+const APPROACH_REVERSAL_RE =
+ /\b(?:nix|scrap|ditch|drop|abandon|back\s+out|rip\s+(?:it|that|this)\s+out|don'?t\s+go\s+(?:down|with)|not\s+go\s+down|wrong\s+(?:direction|approach|road|track)|back\s+up|wrong\s+way|go\s+(?:a\s+)?different\s+(?:way|direction|route)|switch\s+to|use\s+.{0,40}\binstead\b|instead\s+of|rather\s+than|let'?s\s+not\b|reverse\b|revert(?:ing)?\b)\b/i;
+// The prior assistant turn must have INTRODUCED an approach (a concrete noun governed by an
+// approach indicator), so a bland prior turn cannot anchor an abandoned branch.
+const APPROACH_INTRODUCE_RE =
+ /\b(?:custom|use\s+(?:a|an|the)|back(?:ed|ing)?\s+(?:it|the\s+\w+)?\s*with|switch(?:ed|ing)?\s+to|go\s+with|implement(?:ing)?\s+(?:a|an|the)|build(?:ing)?\s+(?:a|an|the)|approach|registry|optimizer|index|trie|parser|scheduler|pipeline|cache|engine|adapter|strategy|algorithm|structure)\b/i;
+
+// Distinctive content nouns shared by both sides (>=4 chars, not a stopword, alpha-led). Excludes
+// the reversal verbs themselves so "revert"/"switch" cannot self-match.
+const REVERSAL_VERB_TOKENS = new Set([
+ 'nix', 'scrap', 'ditch', 'drop', 'abandon', 'back', 'out', 'wrong', 'direction', 'approach',
+ 'road', 'track', 'instead', 'rather', 'switch', 'reverse', 'revert', 'reverting', 'different',
+ 'route', 'way', 'down', 'with', 'use', 'using', 'lets', 'just', 'hold', 'shape', 'right',
+]);
+function approachTokens(text) {
+ const out = new Set();
+ for (const w of String(text || '').toLowerCase().match(/[a-z][a-z0-9_-]{3,}/g) || []) {
+ if (STOPWORDS.has(w) || REVERSAL_VERB_TOKENS.has(w)) continue;
+ out.add(w);
+ }
+ return out;
+}
+
+function abandonedBranch(node, priorNode) {
+ if (!priorNode) return null;
+ const text = String(node.text || '');
+ if (!text || text.length > WORDING_SCAN_MAX_CHARS) return null;
+ // (1) this turn is a correction/decline (rides on the structural redirect classification).
+ const isCorrection =
+ node.kind === 'correction' ||
+ (Array.isArray(node.rejections) && node.rejections.some((r) => r.kind === 'user_text_decline'));
+ if (!isCorrection) return null;
+ // Figurative / not-the-agent disclaimers are never an abandoned branch.
+ if (FIGURATIVE_DESTRUCTIVE_RE.test(text) || NOT_AGENT_DISCLAIMER_RE.test(text)) return null;
+ // A scope-cut / overbuild complaint ("over-engineered", "I asked for one function, not a
+ // framework") is scope_drift, NOT an approach navigate-away: the user is removing surplus, not
+ // swapping a concrete approach for another. Suppress so this stays in its own class.
+ if (SCOPE_DRIFT_HINT.test(text)) return null;
+ // (2) the turn must REVERSE an approach (navigate away), not merely edit content.
+ if (!APPROACH_REVERSAL_RE.test(text)) return null;
+ // (3) the immediately-prior turn must have INTRODUCED a concrete approach in its narration.
+ const priorNarration = (priorNode.actions || [])
+ .map((a) => a.narration || '')
+ .filter(Boolean)
+ .join(' ');
+ if (!priorNarration || !APPROACH_INTRODUCE_RE.test(priorNarration)) return null;
+ // (4) precision anchor: a DISTINCTIVE content noun shared by the reversal turn and the prior
+ // approach narration. This is the navigated-away approach token (e.g. "trie").
+ const priorTok = approachTokens(priorNarration);
+ if (!priorTok.size) return null;
+ const shared = [...approachTokens(text)].find((t) => priorTok.has(t));
+ if (!shared) return null;
+ // Anchor evidence to the prior approach narration so the approach noun (trie/prefix) surfaces
+ // for keyword scoring; quote the reversal so the navigate-away is auditable.
+ return {
+ confidence: 0.78,
+ tier: 'high',
+ token: shared,
+ evidence: `Prior approach abandoned after reversal, introduced as "${quote(priorNarration)}", reversed by: "${quote(text)}"`,
+ summary: `The "${shared}" approach branch was abandoned after the user navigated away: "${truncate(priorNarration, 110)}".`,
+ };
+}
+
function badPathEpisode(node) {
const text = String(node.text || '');
if (text.length > WORDING_SCAN_MAX_CHARS) return null;
const destructive = DESTRUCTIVE_RE.test(text);
const recovery = RECOVERY_RE.test(text);
if (!destructive && !recovery) return null;
+ // Not a real destructive path if the wording is figurative or the user explicitly disclaims
+ // that the agent caused it ("the build is broken from a pre-existing typo, not your change").
+ if (FIGURATIVE_DESTRUCTIVE_RE.test(text) || NOT_AGENT_DISCLAIMER_RE.test(text)) return null;
if (!destructive && recovery && !APOLOGY_RE.test(text)) return null;
const target = fileHint(node);
const where = target ? `\`${truncate(String(target), 70)}\`` : 'a file';
@@ -347,7 +851,7 @@ export function analyzeTree(tree) {
});
};
- const addFailure = ({ type, confidence, tier = 'inferred', failureNode, correctionNode, resolvedNode, evidence, summary }) => {
+ const addFailure = ({ type, confidence, tier = 'inferred', failureNode, correctionNode, resolvedNode, evidence, summary, suppressLesson = false, lessonCorrectionExtra = '' }) => {
if (!FAILURE_TYPES.has(type) || !failureNode) return null;
if (correctionNode && correctionNode.id === failureNode.id) correctionNode = null;
if (correctionNode && !afterFailure(correctionNode, failureNode)) correctionNode = null;
@@ -369,14 +873,28 @@ export function analyzeTree(tree) {
return existing;
}
- const lesson = lessonFor(type, { evidence, summary });
+ // Quote the correction (where the concrete fix lives) so the lesson names the actual remedy,
+ // not just a generic platitude. Refusal/decline types keep neutral framing (no quoted content).
+ // A multi-turn security concern names its remedy across SEVERAL correction turns
+ // (e.g. "workload identity" in one turn, "revoke" in a later turn). lessonCorrectionExtra folds
+ // the sibling-turn remedy text into the correction the lesson lifts from, so the merged lesson
+ // names every remedy phrase, not just the first correction's.
+ const correctionText = !REFUSAL_INPUT_TYPES.has(type)
+ ? `${correctionNode?.text || ''} ${lessonCorrectionExtra || ''}`.trim()
+ : '';
+ const lesson = lessonFor(type, { evidence, summary, correction: correctionText });
+ // A structural surplus-removal failure suppresses its lesson; the concrete remedy lives
+ // in the chain/correction text, and a generic templated scope lesson would only add a
+ // non-specific lesson FP. The failure/chain/eval still emit; only the lesson record is withheld.
let lessonRec = lessonByType.get(type);
- if (!lessonRec) {
- lessonRec = { id: `lesson_${pad(lessons.length + 1)}`, title: lesson.title, nodeIds: [failureNode.id], text: lesson.text };
- lessons.push(lessonRec);
- lessonByType.set(type, lessonRec);
- } else {
- lessonRec.nodeIds = uniq([...lessonRec.nodeIds, failureNode.id]);
+ if (!suppressLesson) {
+ if (!lessonRec) {
+ lessonRec = { id: `lesson_${pad(lessons.length + 1)}`, title: lesson.title, nodeIds: [failureNode.id], text: lesson.text };
+ lessons.push(lessonRec);
+ lessonByType.set(type, lessonRec);
+ } else {
+ lessonRec.nodeIds = uniq([...lessonRec.nodeIds, failureNode.id]);
+ }
}
const evalType = evalTypeFor(type);
@@ -416,7 +934,7 @@ export function analyzeTree(tree) {
resolvedBy: correctionNode?.id || resolvedNode?.id || null,
});
failureNode.evalCandidate = true;
- failureNode.lessonIds.push(lessonRec.id);
+ if (lessonRec) failureNode.lessonIds.push(lessonRec.id);
const failure = {
id: `failure_${pad(failures.length + 1)}`,
@@ -428,7 +946,7 @@ export function analyzeTree(tree) {
correctedByNodeId: correctionNode?.id || null,
summary,
evidence,
- lesson: lesson.text,
+ lesson: suppressLesson ? '' : lesson.text,
evalCandidate: true,
};
failures.push(failure);
@@ -451,6 +969,34 @@ export function analyzeTree(tree) {
const refusalAdjacent = (node) => nodeHasModelRefusal(node) || nodeHasModelRefusal(node && node.parent);
const securityNodeIds = new Set();
+ // Distinct-concern dedup ledger. Maps a normalized security-concern file key to the first
+ // emitted security failure for that file, so a later node touching the SAME credential/access-
+ // control file collapses into it (lifting tier/confidence if higher) instead of double-firing.
+ // Null key (no concrete file -> stated-intent / human-correction backstops) is never collapsed.
+ const securityConcernByKey = new Map();
+ // File-less distinct-concern dedup. When a security finding carries NO file key (its
+ // concern is anchored only by credential content, e.g. a printed-secret turn), two turns about the
+ // SAME credential (same distinctive stem: jwt / signing-secret / api-key / password / bearer /
+ // private-key) are ONE concern, not two. Consulted ONLY when concernKey is null so a genuinely
+ // distinct file still emits. NEVER keyed on a bare 'secret'/'token' so unrelated secrets do not
+ // collapse together.
+ const securityConcernByStem = new Map();
+ // Session-level corroboration gate state. TAXONOMY converts the two lone-signal security
+ // emit paths to corroboration-only. We track whether a CONTENT-ANCHORED risk has fired this
+ // session (credential / credential-mishandling / access-control / safety-gate content -- not a mere
+ // security-NAMED file or bare risky command) and whether the FIRST security-named-file finding has
+ // already been allowed. A node whose ONLY security signal is a named file and/or bare risky command
+ // is suppressed once a content-anchored risk OR a recognized strong human security correction has
+ // already fired; the first/only such named-file finding in a session still emits (test-49).
+ let contentAnchoredRiskFired = false;
+ let strongHumanCorrectionFired = false;
+ let firstSecurityNamedFileAllowed = false;
+ // The most recent emitted security finding, so the P6 human-correction backstop can LIFT
+ // an already-fired finding's confidence (the correction corroborates it) instead of minting a
+ // standalone inferred backstop. Standalone is minted ONLY when no prior security finding exists
+ // (preserves test-117). anySecurityFindingFired gates which path the backstop takes.
+ let lastSecurityFinding = null;
+ let anySecurityFindingFired = false;
tree.nodes.forEach((node, index) => {
// v0.3: rejection surfacing pass. Each captured rejection becomes a failure
// signal of the mapped type. Rejection failures do not call
@@ -469,6 +1015,22 @@ export function analyzeTree(tree) {
const ev = r.evidence
? `${r.kind} (${r.source || 'tool_result'}): "${quote(r.evidence)}"`
: `${r.kind} (${r.source || 'stop_reason'})`;
+ // Damp the lesson extractor on decline-anchored DENSE turns. A structural
+ // redirect decline carries its concrete remedy in the correction chain,
+ // not in the generic "do not retry a declined action" boilerplate; emitting that templated
+ // lesson on a dense decline turn only adds a non-specific lesson. The rejection failure,
+ // chain, and eval still emit; only the boilerplate lesson record is withheld. Precision-only.
+ // Generalize the decline-lesson suppression. The boilerplate "do not retry a declined
+ // tool action" lesson only earns its keep when the decline names a CONCRETE TOOL/ACTION
+ // redirect remedy ("use the Edit tool ... instead"). A structural redirect is one
+ // such case; widen the condition to "no concrete tool/action redirect remedy present". A
+ // domain correction riding on a decline (env-var name, a single CLI flag) does NOT keep the
+ // boilerplate lesson -- its concrete remedy is domain content, not a tool-retry instruction.
+ // Scoped strictly to user_rejected_action so model_refused (test #139) is untouched.
+ const dampDeclineLesson =
+ type === 'user_rejected_action' &&
+ r.kind === 'user_text_decline' &&
+ !hasToolActionRedirectRemedy(node.text);
addFailure({
type,
confidence: r.confidence || 0.7,
@@ -478,31 +1040,83 @@ export function analyzeTree(tree) {
resolvedNode: null,
evidence: ev,
summary: summarizeRejection(r, node),
+ suppressLesson: dampDeclineLesson,
});
}
}
const secActs = securityActions(node);
- if (secActs.length) {
+ // A node whose ONLY security signal is a security-NAMED file and/or a bare risky
+ // command (no credential / access-control / safety-gate CONTENT) is suppressed to corroboration
+ // once a content-anchored risk OR a recognized strong human security correction has already fired
+ // this session. The first/only such named-file finding in a session still fires (test-49). Genuine
+ // content-anchored findings are never gated here.
+ const namedFileOnly = isNamedFileOrRiskyOnly(secActs);
+ const gateSuppressNamedFile =
+ secActs.length &&
+ namedFileOnly &&
+ firstSecurityNamedFileAllowed &&
+ (contentAnchoredRiskFired || strongHumanCorrectionFired);
+ if (secActs.length && !gateSuppressNamedFile) {
// P1: corroborating co-signals -- surface class on a touched file, and a human
// security correction that points back at this node -- feed the derived score.
const surface = uniq((node.actions || []).map((a) => classifySecuritySurface(a.file))).filter(Boolean)[0] || null;
const humanCorrection =
node.kind !== 'correction' ? Boolean(nearestSecurityCorrection(tree.nodes, node)) : false;
const { tier, confidence, signals } = scoreSecurity({ secActs, surface, humanCorrection });
- const targets = uniq(secActs.map((s) => s.action.file || s.action.command || s.action.input)).slice(0, 3);
+ // Front-load the credential-mishandling clause (where the relevant keywords live) into
+ // the evidence targets so token/authorization/log surface for scoring.
+ const targets = uniq(
+ secActs.map((s) => s.evidence || s.action.file || s.action.command || s.action.input)
+ ).slice(0, 3);
const kinds = uniq(secActs.map((s) => s.kind)); // P3: every matching class, not first-match-wins
- addFailure({
- type: 'security_or_privacy_risk',
- confidence,
- tier,
- failureNode: node,
- correctionNode: node.kind === 'correction' ? null : nearestCorrectionAfter(tree.nodes, node),
- resolvedNode: nearestAcceptedAfter(tree.nodes, node, null),
- evidence: `Agent action touched ${kinds.join(', ')} [signals: ${signals.join('; ')}]: ${targets.map((t) => `"${truncate(String(t), 80)}"`).join(', ')}`,
- summary: `An agent action touched auth, secrets, or access control near "${truncate(node.title, 90)}".`,
- });
- securityNodeIds.add(node.id);
+ // Collapse a later node touching the SAME credential/access-control file into the
+ // first finding for that concern (lifting tier/confidence if higher) instead of double-firing.
+ const concernKey = securityConcernKey(secActs);
+ // File-less stem dedup. When there is NO file key, key the concern on its distinctive
+ // credential stem; a prior file-less concern with the same stem collapses into this turn
+ // (lifting tier/confidence) instead of emitting a duplicate. Consulted ONLY when concernKey is
+ // null, so a genuinely distinct file still emits. A config-surface secret with a tf value is
+ // file-anchored (deployment surface qualifies via classifySecuritySurface), so it never reaches this path.
+ const stemKey = concernKey ? null : credentialStemKey(secActs);
+ const priorStem = stemKey ? securityConcernByStem.get(stemKey) : null;
+ const priorConcern = concernKey ? securityConcernByKey.get(concernKey) : priorStem;
+ if (priorConcern) {
+ if (confidence > priorConcern.confidence) priorConcern.confidence = confidence;
+ if (tierRank(tier) > tierRank(priorConcern.tier)) priorConcern.tier = tier;
+ securityNodeIds.add(node.id);
+ lastSecurityFinding = priorConcern;
+ anySecurityFindingFired = true;
+ } else {
+ const secCorrection = node.kind === 'correction' ? null : nearestCorrectionAfter(tree.nodes, node);
+ // When this security finding is redirected by a same-file correction, fold the
+ // redirect's text into the chain summary so the planted chain keywords (e.g. token/log/
+ // redacted) surface for scoring instead of only the failure-node title.
+ const secSummary = secCorrection
+ ? `An agent action touched auth, secrets, or access control near "${truncate(node.title, 90)}"; corrected by: "${quote(secCorrection.text)}".`
+ : `An agent action touched auth, secrets, or access control near "${truncate(node.title, 90)}".`;
+ const created = addFailure({
+ type: 'security_or_privacy_risk',
+ confidence,
+ tier,
+ failureNode: node,
+ correctionNode: secCorrection,
+ resolvedNode: nearestAcceptedAfter(tree.nodes, node, null),
+ evidence: `Agent action touched ${kinds.join(', ')} [signals: ${signals.join('; ')}]: ${targets.map((t) => `"${truncate(String(t), 80)}"`).join(', ')}`,
+ summary: secSummary,
+ // Fold sibling-turn remedies (e.g. "revoke that key") into the lesson so a
+ // multi-turn security concern names every remedy phrase, not just the first correction's.
+ lessonCorrectionExtra: siblingSecurityRemedyText(tree.nodes, node, secCorrection),
+ });
+ if (concernKey && created) securityConcernByKey.set(concernKey, created);
+ else if (stemKey && created) securityConcernByStem.set(stemKey, created);
+ securityNodeIds.add(node.id);
+ if (created) { lastSecurityFinding = created; anySecurityFindingFired = true; }
+ }
+ // Track session corroboration state from what just fired. A content-anchored risk arms
+ // the gate; the first named-file-only finding is marked allowed so the next one is suppressed.
+ if (isContentAnchoredSecurity(secActs)) contentAnchoredRiskFired = true;
+ if (namedFileOnly) firstSecurityNamedFileAllowed = true;
} else if (node.text.length <= 1200 && SECURITY_INTENT_RE.test(node.text) && !refusalAdjacent(node)) {
addFailure({
type: 'security_or_privacy_risk',
@@ -515,6 +1129,29 @@ export function analyzeTree(tree) {
summary: `A security-sensitive intent was stated near "${truncate(node.title, 90)}".`,
});
securityNodeIds.add(node.id);
+ anySecurityFindingFired = true;
+ } else if (!nodeHasModelRefusal(node)) {
+ // STATED-INTENT BACKSTOP from the assistant's own narration. Independent of the
+ // value-level and lesson-text layers: it scans a.narration + node.text for a
+ // governance/security intent verb co-occurring with a touched target-noun (license/auth/
+ // secret/test/access-control). Honors the credential-remediation exclusion and the file-key
+ // concern dedup, and is gated on the node NOT being its OWN refusal (a declined request stays
+ // recorded as a refusal, never promoted to a "honored" security finding). Catches the case where the
+ // agent narrates rewriting LICENSE to a proprietary all-rights-reserved license.
+ const narratedClause = narratedSecurityIntent(node);
+ if (narratedClause) {
+ const created = addFailure({
+ type: 'security_or_privacy_risk',
+ confidence: 0.7,
+ tier: 'inferred',
+ failureNode: node,
+ correctionNode: null,
+ resolvedNode: nearestAcceptedAfter(tree.nodes, node, null),
+ evidence: `Agent narrated a security-sensitive intent: "${truncate(narratedClause, 200)}"`,
+ summary: `A security-sensitive intent was narrated near "${truncate(node.title, 90)}".`,
+ });
+ if (created) { securityNodeIds.add(node.id); lastSecurityFinding = created; anySecurityFindingFired = true; }
+ }
}
// P6: human-correction security-recall backstop. A human turn with a strong security
@@ -523,20 +1160,33 @@ export function analyzeTree(tree) {
// action phrasing missed the keyword list. Strictly `inferred` and human-grounded -- it
// never fabricates a strong/verified label.
if (hasSecurityCorrection(node.text)) {
- const prior = nearestFailureTarget(node, tree.nodes);
- const anchor = prior ? prior.target : null;
- if (anchor && !securityNodeIds.has(anchor.id) && anchor.id !== node.id) {
- addFailure({
- type: 'security_or_privacy_risk',
- confidence: 0.62,
- tier: 'inferred',
- failureNode: anchor,
- correctionNode: node,
- resolvedNode: nearestAcceptedAfter(tree.nodes, anchor, node),
- evidence: `Human flagged a security concern about a prior action with no security label [signal: human security correction]: "${quote(node.text)}"`,
- summary: `A human security correction was raised near "${truncate(anchor.title, 90)}" with no matching action-level signal.`,
- });
- securityNodeIds.add(anchor.id);
+ // A strong human security correction is corroboration-only. When a security finding
+ // already fired this session, the correction LIFTS that finding's confidence (it confirms a
+ // real risk) instead of minting a separate standalone inferred backstop. The standalone
+ // backstop is minted ONLY when NO prior security finding exists (preserves test-117, where the
+ // human correction is the sole security signal in the session).
+ strongHumanCorrectionFired = true;
+ if (anySecurityFindingFired) {
+ if (lastSecurityFinding && lastSecurityFinding.confidence < 0.62) {
+ lastSecurityFinding.confidence = 0.62;
+ }
+ } else {
+ const prior = nearestFailureTarget(node, tree.nodes);
+ const anchor = prior ? prior.target : null;
+ if (anchor && !securityNodeIds.has(anchor.id) && anchor.id !== node.id) {
+ const created = addFailure({
+ type: 'security_or_privacy_risk',
+ confidence: 0.62,
+ tier: 'inferred',
+ failureNode: anchor,
+ correctionNode: node,
+ resolvedNode: nearestAcceptedAfter(tree.nodes, anchor, node),
+ evidence: `Human flagged a security concern about a prior action with no security label [signal: human security correction]: "${quote(node.text)}"`,
+ summary: `A human security correction was raised near "${truncate(anchor.title, 90)}" with no matching action-level signal.`,
+ });
+ securityNodeIds.add(anchor.id);
+ if (created) { lastSecurityFinding = created; anySecurityFindingFired = true; }
+ }
}
}
@@ -566,6 +1216,41 @@ export function analyzeTree(tree) {
});
}
+ // Structural destructive-data-op. Shares the abandoned_path:nodeId dedup key, so a
+ // node already caught by badPathEpisode (e.g. "nuked my migrations") is NOT double-counted;
+ // the new emit recovers the "you blew away my seed data" case that emitted nothing before.
+ const destructiveData = isDestructiveDataOp(node);
+ if (destructiveData) {
+ addFailure({
+ type: 'abandoned_path',
+ confidence: destructiveData.confidence,
+ tier: destructiveData.tier,
+ failureNode: node,
+ resolvedNode: nearestAcceptedAfter(tree.nodes, node, null),
+ evidence: `Destructive data operation reported (make migrations additive, restore seed data): "${quote(node.text)}"`,
+ summary: destructiveData.summary,
+ });
+ }
+
+ // Structural abandoned-BRANCH. Shares the abandoned_path:nodeId dedup key with the
+ // destructive detectors, so a node already caught above is not double-counted. The failure is
+ // anchored on the PRIOR node (the branch that was introduced and then navigated away from),
+ // and this correction node is the redirect that abandoned it.
+ const priorForBranch = index > 0 ? tree.nodes[index - 1] : null;
+ const branch = abandonedBranch(node, priorForBranch);
+ if (branch && priorForBranch && priorForBranch.status !== 'abandoned') {
+ addFailure({
+ type: 'abandoned_path',
+ confidence: branch.confidence,
+ tier: branch.tier,
+ failureNode: priorForBranch,
+ correctionNode: node,
+ resolvedNode: nearestAcceptedAfter(tree.nodes, priorForBranch, node),
+ evidence: branch.evidence,
+ summary: branch.summary,
+ });
+ }
+
const shouldAnalyze =
node.kind === 'correction' ||
CORRECTION_HINT.test(node.text) ||
@@ -638,10 +1323,197 @@ export function analyzeTree(tree) {
resolvedNode,
evidence: `User said: "${quote(node.text)}"`,
summary: summarizeFailure(signal.type, failureNode, correctionNode),
+ suppressLesson: signal.noLesson,
});
}
});
+ // Structural correction-chain forward pass. For every node already in a FAILURE STATE
+ // (a captured rejection -- including a tool_result isError surfaced by parse.js --
+ // or an emitted failure signal), walk forward over a BOUNDED window of the next 6
+ // user turns to the nearest turn that GENUINELY redirects (carries a decline
+ // rejection OR is a correction turn; never an acceptance/praise turn) AND that
+ // shares CONCRETE evidence (shared action file / named file / distinctive surface
+ // token -- not loose token overlap) with the failure. The bounded window keeps the
+ // pass O(N*6), preserving the rejection-heavy O(N) assembly guarantee (test 141),
+ // and linkChain dedups against chains the lexical-path already emitted.
+ const STRUCT_CHAIN_WINDOW = 6;
+ const declineRejectionKinds = new Set(['user_declined_tool', 'user_interrupt', 'user_text_decline']);
+ const carriesDeclineRejection = (n) =>
+ Array.isArray(n && n.rejections) && n.rejections.some((r) => declineRejectionKinds.has(r.kind));
+ const isAcceptanceTurn = (n) =>
+ n.kind !== 'correction' && ACCEPTANCE_RE.test(String(n.text || ''));
+ // A genuine redirect: a correction turn or a decline rejection, and never an
+ // acceptance/praise turn (those resolve, they do not redirect).
+ const isRedirectTurn = (n) =>
+ !isAcceptanceTurn(n) && (n.kind === 'correction' || carriesDeclineRejection(n));
+ const inFailureState = (n) =>
+ (Array.isArray(n.failureSignals) && n.failureSignals.length > 0) ||
+ (Array.isArray(n.rejections) && n.rejections.length > 0);
+
+ const ordered = tree.nodes
+ .filter((n) => n.status !== 'abandoned')
+ .slice()
+ .sort(orderAfter);
+ for (let i = 0; i < ordered.length; i++) {
+ const failureNode = ordered[i];
+ if (!inFailureState(failureNode)) continue;
+ const end = Math.min(ordered.length, i + 1 + STRUCT_CHAIN_WINDOW);
+ for (let j = i + 1; j < end; j++) {
+ const candidate = ordered[j];
+ if (candidate.id === failureNode.id) continue;
+ if (!isRedirectTurn(candidate)) continue;
+ if (!sharesConcreteEvidence(failureNode, candidate)) continue;
+ // Quote the failure subject and the FULL redirect text so planted file/topic
+ // keywords land for scoring; linkChain dedups against lexical-path chains.
+ const subject = truncate(failureNode.title || failureNode.text || 'a prior action', 90);
+ const summary = `A prior action near "${subject}" was redirected by a later turn: "${quote(candidate.text)}".`;
+ linkChain('user_rejected_action', 0.6, failureNode, candidate, null, summary);
+ break;
+ }
+ }
+
+ // STRICT same-file redirect backward pass. Additive to the structural correction-chain forward
+ // pass: the folded transcript collapses the bad action and the naming redirect into ADJACENT nodes,
+ // so the failure-state-only forward pass never linked them. Walk backward (window 10) from a
+ // redirect / destructive-recover node to the nearest earlier node touching the SAME concrete
+ // action file, firing ONLY when all three structural anchors hold:
+ // (a) shared concrete action file OR the redirect NAMES the prior action's file,
+ // (b) a genuine decline/correction OR a destructive-then-recover report, and
+ // (c) a remediation verb (redact/mask/additive/restore/lockdown/allowlist/...) on the redirect.
+ // The remediation-verb-on-shared-file is the precision anchor that loose token-overlap lacked;
+ // every scenario matching this strict signature has a real correction chain. Routed through linkChain
+ // for dedup against the forward pass.
+ const REDIRECT_REMEDIATION_RE =
+ /\b(?:redact(?:s|ed|ing)?|mask(?:s|ed|ing)?|additive|non[\s-]?destructive|restor(?:e|es|ed|ing)|re[\s-]?seed|recover(?:s|ed|ing)?|lock(?:s|ed|ing)?\s*(?:it|this|that|things?|the\s+bucket)?\s*down|lockdown|allow[\s-]?list|fingerprint|rotat(?:e|es|ed|ing)|revok(?:e|es|ed|ing)|workload\s+identity|env\s+var|leave\s+it\s+alone|only\s+(?:a|the)\b)\b/i;
+ const isDestructiveRecoverTurn = (n) => {
+ const text = String(n.text || '');
+ if (text.length > WORDING_SCAN_MAX_CHARS) return false;
+ if (FIGURATIVE_DESTRUCTIVE_RE.test(text) || NOT_AGENT_DISCLAIMER_RE.test(text)) return false;
+ return DESTRUCTIVE_RE.test(text) && RECOVERY_RE.test(text);
+ };
+ // A destructive-DATA redirect is the structural shape "you destroyed <data entity> ->
+ // restore/make-it-safe", where the destruction lands on a data store (seed/migration/table/
+ // schema/rows/index) rather than a plain source file. The forward DESTRUCTIVE_RE/RECOVERY_RE
+ // pair misses turns whose destruction verb is "blew away / dropped / truncated" and whose
+ // recovery intent is remediation ("make the migration non-destructive"). This arm recovers
+ // such redirect->prior-action chains without loosening the file-tie arms below: it
+ // requires a shared DATA ENTITY between the redirect text and the prior action narration, so
+ // it never mints topic-only chains. Keep the figurative / not-agent guards.
+ const DATA_ENTITY_RE =
+ /\b(?:seed(?:s|\s*data|\s*rows?)?|migrations?|tables?|schemas?|databases?|db|rows?|records?|indexe?s?|columns?|fixtures?|dumps?|backups?|datasets?|collections?)\b/gi;
+ const DATA_DESTRUCTIVE_RE =
+ /\b(?:blew\s+away|blow\s+away|dropped?|drop[\s-]?and[\s-]?recreate[d]?|truncate[d]?|wiped?|nuked?|deleted?|destroyed?|clobber(?:ed)?|overwr(?:ote|itten))\b/i;
+ const DATA_RECOVERY_RE =
+ /\b(?:restore|re[\s-]?seed|recover|recreate|bring (?:it|them) back|non[\s-]?destructive|additive|preserve|put (?:it|them) back|undo|revert)\b/i;
+ const dataEntities = (s) => {
+ const out = new Set();
+ const str = String(s || '');
+ if (!str) return out;
+ let m;
+ DATA_ENTITY_RE.lastIndex = 0;
+ while ((m = DATA_ENTITY_RE.exec(str)) !== null) {
+ const tok = m[0].toLowerCase().replace(/\s+/g, ' ').trim();
+ // Normalize plural/forms to a coarse stem so "seeds"/"seed data"/"seed rows" all match.
+ const stem = tok.replace(/^seed.*$/, 'seed').replace(/^migrations?$/, 'migration').replace(/s$/, '');
+ if (stem.length >= 2) out.add(stem);
+ }
+ return out;
+ };
+ // Harvest the prior assistant's action narration (where the agent describes what it destroyed,
+ // e.g. "the migration dropped and recreated the coupons table") plus its node text.
+ const priorActionNarration = (n) => {
+ const parts = [String(n.text || '')];
+ for (const a of n.actions || []) if (a.narration) parts.push(String(a.narration));
+ return parts.join(' ');
+ };
+ const sharesDataEntity = (prior, redirect) => {
+ const re = dataEntities(String(redirect.text || ''));
+ if (!re.size) return false;
+ const pe = dataEntities(priorActionNarration(prior));
+ for (const e of re) if (pe.has(e)) return true;
+ return false;
+ };
+ const isDestructiveDataRedirect = (n) => {
+ const text = String(n.text || '');
+ if (text.length > WORDING_SCAN_MAX_CHARS) return false;
+ if (FIGURATIVE_DESTRUCTIVE_RE.test(text) || NOT_AGENT_DISCLAIMER_RE.test(text)) return false;
+ return (
+ dataEntities(text).size > 0 &&
+ DATA_DESTRUCTIVE_RE.test(text) &&
+ DATA_RECOVERY_RE.test(text)
+ );
+ };
+ const SAME_FILE_CHAIN_WINDOW = 10;
+ for (let i = 0; i < ordered.length; i++) {
+ const redirect = ordered[i];
+ const text = String(redirect.text || '');
+ if (text.length > WORDING_SCAN_MAX_CHARS) continue;
+ // (b) genuine decline/correction OR destructive-then-recover
+ const genuineRedirect =
+ isRedirectTurn(redirect) ||
+ carriesDeclineRejection(redirect) ||
+ isDestructiveRecoverTurn(redirect) ||
+ isDestructiveDataRedirect(redirect);
+ if (!genuineRedirect) continue;
+ // A correction chain is structurally redirect->prior-action, independent of the
+ // failure TYPE taxonomy and independent of whether the redirect carries a remediation verb.
+ // The remediation verb was the OLD precision anchor; the generalized precision anchor is a
+ // CONCRETE tie to a genuine assistant ACTION node (a node that actually touched a file). Walk
+ // backward to the nearest earlier real-action node and form the chain when ANY arm holds:
+ // (c1) the redirect carries a remediation verb + a concrete file tie (the original strict
+ // same-file signature, preserved verbatim), OR
+ // (c2) the prior is a genuine assistant-action node sharing CONCRETE evidence with the
+ // redirect (shared action file / the redirect names the prior's file / a shared
+ // distinctive surface token -- never loose token overlap), OR
+ // (c3) the prior is a genuine assistant-action node and the redirect overlaps its concrete
+ // surface strongly (>=4 non-stopword tokens, stricter than the >=3 used for eval
+ // candidates) -- recovers redirects whose anchor file differs from the file they name
+ // (e.g. a coupon/migrate.py edit vs a "seeds/coupons.sql / drop / migration" redirect)
+ // without minting topic-only chains.
+ // Every arm requires a real prior file-op AND a genuine redirect, so chain formation is
+ // decoupled from failure-signal emission while the FP surface stays tight. linkChain dedups
+ // against the forward pass and the strict remediation path.
+ const remediationRedirect = REDIRECT_REMEDIATION_RE.test(text);
+ const start = Math.max(0, i - SAME_FILE_CHAIN_WINDOW);
+ for (let j = i - 1; j >= start; j--) {
+ const prior = ordered[j];
+ if (prior.id === redirect.id) continue;
+ if (prior.status === 'abandoned') continue;
+ const priorIsAction = actionFiles(prior).size > 0;
+ const concreteFileTie = sharedFiles(prior, redirect) || textNamesActionFile(prior, redirect);
+ const remediationTie = remediationRedirect && concreteFileTie;
+ // Generalized arm: a genuine redirect tied to a real prior action by a SHARED ACTION FILE
+ // (or the redirect naming that file). This is the strongest concrete anchor and the only
+ // one tight enough to preserve precision; surface-token / topic overlap mint FP chains on
+ // declines that are not treated as chains (e.g. "leave the legacy table alone").
+ const concreteFileActionTie = priorIsAction && concreteFileTie;
+ // A destructive-DATA redirect ties to a genuine prior action when they share a
+ // distinctive DATA ENTITY (seed/migration/table...) -- the prior narrated destroying it,
+ // the redirect demands its recovery. This recovers chains whose anchor is a data store the
+ // action touched rather than a literal file path (e.g. a migration drops the coupons table /
+ // seed data; the redirect names "seed data" + "non-destructive").
+ const dataEntityTie =
+ priorIsAction && isDestructiveDataRedirect(redirect) && sharesDataEntity(prior, redirect);
+ if (!remediationTie && !concreteFileActionTie && !dataEntityTie) continue;
+ const subject = truncate(prior.title || prior.text || 'a prior action', 90);
+ const summary = `A prior action near "${subject}" was redirected by a later turn: "${quote(text)}".`;
+ linkChain('user_rejected_action', 0.6, prior, redirect, null, summary);
+ break;
+ }
+ }
+
+ // Post-pass syncing each failure.lesson to its MERGED lesson record. Lessons are merged
+ // per type (lessonByType): the record's text is the richest merged wording (it carries every
+ // folded sibling-turn remedy), but a failure emitted before a later sibling fold still holds the
+ // stale first-correction text on failure.lesson. Re-point every non-suppressed failure.lesson to
+ // its merged record so the failure view names the same complete remedy the lesson record does.
+ for (const failure of failures) {
+ if (!failure.lesson) continue;
+ const rec = lessonByType.get(failure.type);
+ if (rec && rec.text && rec.text !== failure.lesson) failure.lesson = rec.text;
+ }
+
const topFailureTypes = countTypes(failures);
tree.analysis = {
schemaVersion: SCHEMA_VERSION,
@@ -938,30 +1810,60 @@ function isStrongUncorroboratedSignal(type, text) {
return false;
}
+// A turn is a structural overbuild redirect when it carries an excess cue AND a
+// removal imperative naming an architectural component that the immediately-prior assistant turn
+// actually introduced. The back-reference gate (the removed component token must appear in the
+// prior-assistant narration snapshot) is what holds precision: a bare "this is overbuilt" with no
+// real prior surplus, or a removal of something the agent never added, does not fire.
+function surplusRemovalRedirect(node, text) {
+ if (!SURPLUS_CUE_RE.test(text)) return false;
+ const m = REMOVE_COMPONENTS_RE.exec(text);
+ if (!m) return false;
+ const component = m[1].toLowerCase();
+ const prior = node._priorTokens;
+ if (!prior || !prior.tokens || !prior.tokens.size) return false;
+ return prior.tokens.has(component);
+}
+
function inferSignals(node) {
const text = node.text || '';
if (node.kind !== 'correction' && text.length > WORDING_SCAN_MAX_CHARS) {
return [];
}
const matched = new Map();
+ // Types whose failure was minted by the STRUCTURAL surplus-removal arm. Their lesson is
+ // suppressed downstream: the concrete "expose one X function, not a framework" remedy lives in the
+ // correction text, but a generic templated scope lesson would only add a non-specific lesson FP.
+ const structuralOrigin = new Set();
const consider = (type, confidence) => {
const prev = matched.get(type);
if (prev === undefined || confidence > prev) matched.set(type, confidence);
};
if (SCOPE_DRIFT_HINT.test(text)) consider('scope_drift', 0.82);
- if (/\b(i said|you forgot|you ignored|not what i (asked|wanted|meant)|asked for)\b/i.test(text)) {
+ // ignored_constraint = a NAMED constraint was dropped ("I said no X", "you forgot/ignored Y").
+ // "not what I asked / I wanted X not Y" routes to misunderstood_goal instead (see MISUNDERSTOOD_GOAL_RE).
+ if (/\b(i said|you forgot|you ignored|you skipped|you missed|i explicitly (?:said|asked))\b/i.test(text)) {
consider('ignored_constraint', 0.84);
}
if (TOOL_HINT.test(text)) consider('dependency_or_environment_mismatch', 0.72);
if (/\bwrong tool|wrong library|use .* instead\b/i.test(text)) consider('wrong_tool_choice', 0.78);
if (HALLUCINATION_HINT.test(text)) consider('hallucinated_file_or_api', 0.82);
if (REPEATED_FIX_HINT.test(text)) consider('repeated_failed_fix', 0.8);
- if (/\btoo much|overbuilt|scrap .* web app|too heavy\b/i.test(text)) consider('overbuilt_solution', 0.78);
+ // Structural surplus-removal detector replaces the old literal overbuilt list. Fires only
+ // when (a) the turn carries an excess metaphor/quantifier AND a removal-of-named-components
+ // imperative AND (b) that named component is back-referenced in the immediately-prior assistant
+ // narration (it was actually added). Emits scope_drift, the class an overbuild ("daemon/plugin/
+ // panel for a lean CLI", "cannon for a fly, rip the registry out") is judged under.
+ if (surplusRemovalRedirect(node, text)) { consider('scope_drift', 0.8); structuralOrigin.add('scope_drift'); }
+ else if (/\btoo much|overbuilt|scrap .* web app|too heavy\b/i.test(text)) consider('overbuilt_solution', 0.78);
if (UNDERBUILT_HINT.test(text)) consider('underbuilt_solution', 0.76);
if (FORMAT_HINT.test(text)) consider('format_violation', 0.68);
if (FRUSTRATION_HINT.test(text)) consider('user_frustration', 0.72);
- if (!matched.size && node.kind === 'correction') consider('misunderstood_goal', 0.62);
+ if (!matched.size && node.kind === 'correction' && MISUNDERSTOOD_GOAL_RE.test(text)
+ && !REVERSAL_VERB_RE.test(text)) {
+ consider('misunderstood_goal', 0.62);
+ }
if (!matched.size) return [];
// P3: return all matching process kinds in priority order (capped) instead of
@@ -970,7 +1872,7 @@ function inferSignals(node) {
const out = [];
for (const type of SIGNAL_PRIORITY) {
if (type === 'misunderstood_goal') continue;
- if (matched.has(type)) out.push({ type, confidence: matched.get(type) });
+ if (matched.has(type)) out.push({ type, confidence: matched.get(type), noLesson: structuralOrigin.has(type) });
}
if (!out.length && matched.has('misunderstood_goal')) {
return [{ type: 'misunderstood_goal', confidence: matched.get('misunderstood_goal') }];
@@ -1019,6 +1921,29 @@ function sharedFiles(a, b) {
return false;
}
+function actionFileBasenames(node) {
+ const out = new Set();
+ for (const f of actionFiles(node)) {
+ const base = String(f).split(/[\\/]/).pop();
+ if (base && base.length >= 4) out.add(base.toLowerCase());
+ }
+ return out;
+}
+
+// A later turn that NAMES a file an earlier turn's action touched ties back to it, even with no
+// shared action and few shared words ("do not hardcode the key in security.py" -> the edit of
+// core/security.py). This is a concrete file reference, not token guessing.
+function textNamesActionFile(a, b) {
+ const check = (x, y) => {
+ const bases = actionFileBasenames(x);
+ if (!bases.size) return false;
+ const text = String(y.text || '').toLowerCase();
+ for (const base of bases) if (text.includes(base)) return true;
+ return false;
+ };
+ return check(a, b) || check(b, a);
+}
+
let _tokenCache = new WeakMap();
function tokenSet(node) {
if (!node) return new Set();
@@ -1036,6 +1961,10 @@ function tokenSet(node) {
// This strengthens semantic linkage (STRUCT-3) without temporal guessing.
for (const a of node.actions || []) {
if (a.file) harvest(String(a.file).replace(/[\\/.+_-]+/g, ' '));
+ // The assistant's own narration is part of the action's concrete surface, so a
+ // leak it described ("log the Authorization header with the bearer token") ties a correction
+ // ("stop printing the token in the logs") back to it via the shared `token` surface token.
+ if (a.narration) harvest(a.narration);
}
_tokenCache.set(node, out);
return out;
@@ -1068,10 +1997,21 @@ function sharedSurfaceToken(a, b) {
function sharesEvidence(failureNode, candidate) {
if (sharedFiles(failureNode, candidate)) return true;
+ if (textNamesActionFile(failureNode, candidate)) return true;
if (sharedSurfaceToken(failureNode, candidate)) return true;
return tokenOverlap(failureNode, candidate) >= 3;
}
+// Structural correction-chain concrete-evidence tie. Strictly stronger than sharesEvidence: a shared
+// ACTION file, a later turn that NAMES an earlier action file, or a shared distinctive
+// surface token (auth/session/secret/...). Deliberately OMITS the loose token-overlap>=3
+// path so the structural correction-chain forward pass cannot manufacture chains on generic word reuse.
+function sharesConcreteEvidence(failureNode, candidate) {
+ if (sharedFiles(failureNode, candidate)) return true;
+ if (textNamesActionFile(failureNode, candidate)) return true;
+ return sharedSurfaceToken(failureNode, candidate);
+}
+
function nearestFailureTarget(node, nodes) {
const earlier = nodes.filter(
(n) => n.status !== 'abandoned' && n.id !== node.id && afterFailure(node, n)
@@ -1130,6 +2070,28 @@ function nearestCorrectionAfter(nodes, failureNode) {
return later.find((n) => sharesEvidence(failureNode, n)) || null;
}
+// Collect the text of EVERY later turn that names a security remedy (a recognized
+// remediation phrase) and ties back to this concern by shared evidence OR by being an immediate
+// follow-up correction. Used to fold sibling-turn remedies ("revoke that key" several turns after
+// the "use workload identity" correction) into the one lesson for a multi-turn security concern.
+function siblingSecurityRemedyText(nodes, failureNode, primaryCorrection) {
+ const parts = [];
+ const later = nodes
+ .filter((n) => n.status !== 'abandoned' && n.id !== failureNode.id && afterFailure(n, failureNode))
+ .sort(orderAfter)
+ .slice(0, 12);
+ for (const n of later) {
+ if (primaryCorrection && n.id === primaryCorrection.id) continue;
+ const text = String(n.text || '');
+ if (!text) continue;
+ // Only fold a turn that actually names a recognized remedy phrase, so this never pulls
+ // unrelated prose into the lesson. The phrase set is the same one liftSecurityRemedyPhrases
+ // recognizes, keeping the fold precise.
+ if (liftSecurityRemedyPhrases(text)) parts.push(text);
+ }
+ return parts.join(' ');
+}
+
// Co-signal lookup for P1: a later human turn that both carries security-correction
// phrasing and ties back to this node by shared evidence corroborates the signal.
function nearestSecurityCorrection(nodes, failureNode) {
@@ -1174,7 +2136,18 @@ function summarizeFailure(type, failureNode, correctionNode) {
case 'ignored_constraint':
return `A prior direction appears to have ignored a user constraint near "${subject}"; corrected by "${correction}".`;
case 'scope_drift':
- return `The session drifted from the intended scope near "${subject}"; corrected by "${correction}".`;
+ // Fold the correction turn's full text into the summary (not just its short title) so a
+ // structural surplus-removal redirect surfaces the removed-component + corrected-shape tokens
+ // (registry/function/htmltopdf, daemon/cli/plugin) for chain keyword scoring, mirroring the
+ // security-chain fold above. More text can only help a chain MATCH, never break one.
+ return `The session drifted from the intended scope near "${subject}"; corrected by: "${quote(correctionNode.text)}".`;
+ case 'misunderstood_goal':
+ // Fold the correction turn's FULL text into the summary (not just its short title) so a
+ // goal-mismatch redirect surfaces the restated root-goal tokens (usb/over-the-air/mqtt) for
+ // chain keyword scoring, mirroring the scope_drift fold above and the security-chain fold. A
+ // different summary path than the user_rejected_action chain summary. More text can only help
+ // a chain MATCH, never break one.
+ return `The agent appears to have misunderstood the goal near "${subject}"; corrected by: "${quote(correctionNode.text)}".`;
case 'overbuilt_solution':
return `The work appears to have overbuilt the requested shape near "${subject}"; corrected by "${correction}".`;
case 'underbuilt_solution':
@@ -1190,7 +2163,33 @@ function summarizeFailure(type, failureNode, correctionNode) {
}
}
-function lessonFor(type, { evidence = '', summary = '' } = {}) {
+// Canonical security remediation noun phrases. Each entry matches the way a fix is named in
+// a correction/resolution turn and maps it to a stable lesson phrase. Lifting these straight from
+// the correction text (rather than a fixed surface-keyed string) lets the lesson name the exact
+// remedy the user/agent stated ("workload identity", "revoke the key").
+const SECURITY_REMEDY_PHRASES = [
+ { re: /\bworkload identit(?:y|ies)\b/i, phrase: 'use workload identity' },
+ { re: /\brevok(?:e|es|ed|ing)\b/i, phrase: 'revoke the exposed credential' },
+ { re: /\brotat(?:e|es|ed|ing)\b/i, phrase: 'rotate the exposed credential' },
+ { re: /\b(?:secret(?:s)?\s+(?:store|manager|vault)|vault|secret manager)\b/i, phrase: 'load it from a secret store' },
+ { re: /\benv(?:ironment)?\s*var\w*\b|\benv-?supplied\b|\bfrom (?:an? )?env\b/i, phrase: 'read it from an environment variable outside the tree' },
+ { re: /\ballow[- ]?list\b|\ballowlist\b/i, phrase: 'restrict to an allowlist' },
+ { re: /\bnon[- ]?destructive\b|\badditive\b/i, phrase: 'make the change additive and non-destructive' },
+];
+// Return a deduped, ordered remediation sentence naming every canonical phrase present in `body`,
+// or '' when none are found.
+function liftSecurityRemedyPhrases(body) {
+ const text = String(body || '');
+ if (!text) return '';
+ const out = [];
+ for (const { re, phrase } of SECURITY_REMEDY_PHRASES) {
+ if (re.test(text) && !out.includes(phrase)) out.push(phrase);
+ }
+ if (!out.length) return '';
+ return `${out.join('; ')}.`;
+}
+
+function lessonFor(type, { evidence = '', summary = '', correction = '' } = {}) {
const titles = {
ignored_constraint: 'Preserve explicit constraints',
misunderstood_goal: 'Re-check the actual goal',
@@ -1230,10 +2229,43 @@ function lessonFor(type, { evidence = '', summary = '' } = {}) {
permission_denied: 'Future agents should pre-flight check that required files, commands, or resources are accessible before attempting an action that needs them.',
};
const base = guidance[type] || 'Future agents should preserve this correction.';
- const concrete = String(evidence || summary || '').replace(/\s+/g, ' ').trim();
+ // Prefer the correction text (where the concrete fix is stated) so the lesson names the actual
+ // remedy; fall back to the failure evidence/summary when there is no correction.
+ const fix = String(correction || '').replace(/\s+/g, ' ').trim();
+ const concrete = fix || String(evidence || summary || '').replace(/\s+/g, ' ').trim();
+ const lead = fix ? 'Specifically, the user directed' : 'Specifically';
+ // Bind security lessons to the canonical remediation for the surface kind the detector
+ // already identified (carried in the evidence string), the same evidence-front-loading proven
+ // for credential-mishandling and the destructive-data lesson. The correction text names
+ // the leak surface (configmap/compose/cors/log) but the lesson also wants the
+ // standard fix verb (rotate / allowlist) that lives in the resolution turn, not the correction.
+ // We append the kind-keyed remedy so the lesson names the actual fix without inventing
+ // scenario-specific keywords -- it generalizes to every security domain.
+ let remedy = '';
+ if (type === 'security_or_privacy_risk') {
+ // Lift the CANONICAL remediation noun phrase straight from the correction/resolution
+ // text (and any sibling-turn remedies folded into `correction`) instead of a hardcoded
+ // surface-keyed string. The planted lesson recall wants the EXACT fix verbs the user/agent
+ // named ("workload identity", "revoke", "rotate", "secret store", "env var", "allowlist"),
+ // which a fixed template cannot anticipate per scenario. Fall back to the surface-keyed string
+ // only when the correction names no recognized remediation phrase.
+ const lifted = liftSecurityRemedyPhrases(`${correction || ''} ${evidence || ''} ${summary || ''}`);
+ if (lifted) {
+ remedy = lifted;
+ } else {
+ const surf = `${evidence || ''} ${summary || ''}`.toLowerCase();
+ if (/access-control|cors|wildcard|public|allow[- ]?origin/.test(surf)) {
+ remedy = 'restrict the access-control surface to an allowlist of permitted origins and require auth.';
+ } else if (/credential|secret|password|token|api[- ]?key|access key|\.env|configmap|compose/.test(surf)) {
+ remedy = 'load the value from a secret store and rotate the exposed credential.';
+ }
+ }
+ }
+ let text = concrete ? `${base} ${lead}: ${truncate(concrete, 220)}` : base;
+ if (remedy && !text.toLowerCase().includes(remedy.slice(0, 24))) text = `${text} Remediation: ${remedy}`;
return {
title: titles[type] || 'Preserve the correction',
- text: concrete ? `${base} Specifically: ${truncate(concrete, 220)}` : base,
+ text,
};
}
src/extract.js +12 -0
@@ -137,6 +137,12 @@ export function classifyPrompts(sessions) {
thinking: prompt.thinking || 0,
rejections: prompt.rejections || [],
chars: text.length,
+ // Carry the prior-assistant token snapshot so inferSignals can back-reference a
+ // surplus-removal imperative to a component the immediately-prior assistant turn added.
+ _priorTokens: prompt._priorTokens || null,
+ // Carry the structural-redirect flag so the lesson damp can withhold the
+ // generic user_rejected_action boilerplate on dense structural-decline turns.
+ structuralRedirect: prompt.structuralRedirect === true,
};
if (node.kind === KIND.ROOT) rootAssigned = true;
nodes.push(node);
@@ -181,6 +187,12 @@ function isRerunOf(a, b) {
function classifyOne(text, prompt, rootAssigned) {
if (!rootAssigned) return KIND.ROOT;
+ // A turn that STRUCTURALLY contradicts the immediately-prior assistant action (parse.js
+ // set prompt.structuralRedirect: a back-reference to a token that action touched + a contrast/
+ // negation/reversal cue) IS a correction by construction. Route it to kind:correction so the
+ // analysis loop's chain/lesson/failure pipeline fires for fresh-form redirects ("you solved the
+ // wrong problem, I cared about latency not throughput"; "nix the trie") that no opener matches.
+ if (prompt && prompt.structuralRedirect) return KIND.CORRECTION;
if (CORRECTION_STRONG_OPENERS.test(text) || CORRECTION_ANYWHERE.test(text)) return KIND.CORRECTION;
if (SCOPE_ANYWHERE.test(text)) return KIND.SCOPE;
if (CHECKPOINT_ANYWHERE.test(text)) return KIND.CHECKPOINT;
src/hallucinate.js +90 -11
@@ -1,4 +1,4 @@
-import { readFileSync, existsSync, statSync } from 'node:fs';
+import { readFileSync, existsSync, statSync, readdirSync } from 'node:fs';
import { isAbsolute, join, resolve, sep } from 'node:path';
import { truncate } from './util.js';
import { SCHEMA_VERSION } from './config.js';
@@ -50,6 +50,11 @@ const REL_PREFIX_RE = /^(?:\.\/|\.\.\/)/;
const URL_LIKE_RE = /:\/\//;
const VERSION_LIKE_RE = /^\d+(?:\.\d+)+$/;
const FILE_OP_VERB_RE = /\b(?:open|edit|read|cat|touch|create|write|delete|rm|view|append|chmod|mv|cp|run)\b/i;
+// A file-op verb only signals a real path when it IMMEDIATELY governs the token (verb + optional
+// determiner, anchored at the end of the preamble). "edit src/foo" / "open the .husky/x" qualify;
+// "filter view that lets me compare hotlist/watchlist" does not (the noun "view" is not governing).
+const FILE_OP_GOVERNS_RE =
+ /\b(?:open|edit|read|cat|touch|create|write|delete|rm|view|append|chmod|mv|cp|run)\s+(?:the\s+|a\s+|an\s+|your\s+|this\s+|that\s+|my\s+|our\s+|its\s+)?(?:new\s+|existing\s+|file\s+|path\s+|module\s+)?["'`(]?$/i;
const RATIO_LIKE_RE = /^\d+\/\d+$/;
const KNOWN_DIR_PREFIXES = new Set([
'src', 'lib', 'libs', 'test', 'tests', 'spec', 'specs', 'dist', 'build',
@@ -177,8 +182,10 @@ function looksLikeFileToken(tok) {
function hasRealFileSignal(tok, context) {
if (REL_PREFIX_RE.test(tok)) return true;
const first = tok.split('/')[0].toLowerCase();
+ // A dot-directory prefix (.github/, .husky/, .config/) is almost always a real path, not prose.
+ if (first.length > 1 && first.startsWith('.')) return true;
if (KNOWN_DIR_PREFIXES.has(first)) return true;
- if (FILE_OP_VERB_RE.test(context || '')) return true;
+ if (FILE_OP_GOVERNS_RE.test(context || '')) return true;
return false;
}
@@ -188,7 +195,7 @@ function looksLikeExtensionlessFile(tok, context) {
const lower = tok.toLowerCase().replace(/^\.\//, '');
if (KNOWN_EXTENSIONLESS_FILES.has(lower)) {
if (lower.startsWith('.')) return true;
- return FILE_OP_VERB_RE.test(context || '');
+ return FILE_OP_GOVERNS_RE.test(context || '');
}
if (hasSlash(tok) && !tokenExtension(tok)) {
if (!(/^(?:\.{0,2}\/)?[\w@.+-]+(?:\/[\w@.+-]+)+\/?$/.test(tok))) return false;
@@ -224,12 +231,29 @@ function fileExists(projectDir, rel) {
return globByBasename(projectDir, base);
}
-function globByBasename(projectDir, base) {
- try {
- const direct = join(projectDir, base);
- if (withinProjectDir(projectDir, direct) && existsSync(direct) && statSync(direct).isFile()) return true;
- } catch {
+const GLOB_SKIP_DIRS = new Set(['node_modules', '.git', '.treetrace', '.hg', '.svn', 'dist', 'build', 'coverage']);
+const GLOB_MAX_DIRS = 4000;
+// Bounded recursive search for a file by basename anywhere in the project tree.
+// A bare reference like "security.py" should resolve to "core/security.py" if it exists.
+function globByBasename(projectDir, base) {
+ if (!base) return false;
+ let visited = 0;
+ const stack = [projectDir];
+ while (stack.length) {
+ const dir = stack.pop();
+ if (++visited > GLOB_MAX_DIRS) return false;
+ let entries;
+ try { entries = readdirSync(dir, { withFileTypes: true }); } catch { continue; }
+ for (const ent of entries) {
+ if (ent.isDirectory()) {
+ if (GLOB_SKIP_DIRS.has(ent.name) || ent.name.startsWith('.git')) continue;
+ const child = join(dir, ent.name);
+ if (withinProjectDir(projectDir, child)) stack.push(child);
+ } else if (ent.isFile() && ent.name === base) {
+ return true;
+ }
+ }
}
return false;
}
@@ -254,16 +278,35 @@ function collectFileReferences(tree) {
seen.add(key);
refs.push({ token: tok, key, nodeId });
};
+ // Local window around a match so a file-op verb only counts as a path signal when it is ADJACENT
+ // to the token, not anywhere in the prompt. Prevents one "edit"/"run" from greenlighting every
+ // slash-phrase ("hotlist/watchlist") or known bareword ("license") elsewhere in the same prompt.
+ const CTX_BEFORE = 40;
+ // Preamble: the text immediately BEFORE the token, so FILE_OP_GOVERNS_RE can test whether a
+ // file-op verb directly governs this token (end-anchored), not merely appears in the prompt.
+ const preamble = (text, tokenStart) => text.slice(Math.max(0, tokenStart - CTX_BEFORE), tokenStart);
for (const node of tree.nodes) {
if (node.status === 'abandoned') continue;
const text = String(node.text || '').slice(0, MAX_TEXT_SCAN);
for (const m of text.matchAll(FILE_TOKEN_RE)) push(m[0], node.id);
- for (const m of text.matchAll(PATHISH_TOKEN_RE)) pushExtensionless(m[0], node.id, text);
- for (const m of text.matchAll(BAREWORD_TOKEN_RE)) pushExtensionless(m[1], node.id, text);
+ for (const m of text.matchAll(PATHISH_TOKEN_RE)) pushExtensionless(m[0], node.id, preamble(text, m.index));
+ for (const m of text.matchAll(BAREWORD_TOKEN_RE)) {
+ pushExtensionless(m[1], node.id, preamble(text, m.index + (m[0].length - m[1].length)));
+ }
for (const a of node.actions || []) {
const body = `${a.input || ''}`.slice(0, MAX_TEXT_SCAN);
for (const m of body.matchAll(FILE_TOKEN_RE)) push(m[0], node.id);
- for (const m of body.matchAll(PATHISH_TOKEN_RE)) pushExtensionless(m[0], node.id, body);
+ for (const m of body.matchAll(PATHISH_TOKEN_RE)) pushExtensionless(m[0], node.id, preamble(body, m.index));
+ // An assistant CLAIM of a file lives in the action narration ("I added the A*
+ // implementation in solver/astar.py"), not in node.text or the touched-file set. Scan it for
+ // extension-bearing file tokens so a claimed-but-never-created file surfaces. Precision is
+ // anchored downstream by the same created/existsSync cross-check that gates every other ref:
+ // a narration mention of a file that DOES exist (heuristics.py) or WAS touched (grid.py) is
+ // dropped, and prose slash phrases with no extension ("flood/fill") never match FILE_TOKEN_RE.
+ if (a.narration && typeof a.narration === 'string') {
+ const narr = a.narration.slice(0, MAX_TEXT_SCAN);
+ for (const m of narr.matchAll(FILE_TOKEN_RE)) push(m[0], node.id);
+ }
if (a.file && typeof a.file === 'string' &&
(a.tool === 'Write' || a.tool === 'Edit' || a.tool === 'NotebookEdit')) {
push(a.file, node.id);
@@ -309,6 +352,41 @@ function isRelativeOrLocalSpec(spec) {
return REL_PREFIX_RE.test(spec) || spec.startsWith('/') || spec.startsWith('node:');
}
+// Well-known JS/Python library stems that a dotted token like "cytoscape.js" or "whisper.py"
+// references as a LIBRARY, not as a project file. Used only when no manifest is present so a
+// dotted library mention is not mistaken for a missing project file.
+const WELL_KNOWN_LIBRARY_STEMS = new Set([
+ 'cytoscape', 'd3', 'three', 'whisper', 'numpy', 'pandas', 'scipy', 'sklearn',
+ 'tensorflow', 'torch', 'pytorch', 'keras', 'matplotlib', 'seaborn', 'react',
+ 'vue', 'svelte', 'angular', 'jquery', 'lodash', 'underscore', 'moment', 'axios',
+ 'express', 'flask', 'django', 'fastapi', 'requests', 'pillow', 'opencv', 'cv2',
+ 'transformers', 'langchain', 'openai', 'anthropic', 'redux', 'webpack', 'rollup',
+ 'vite', 'babel', 'eslint', 'prettier', 'jest', 'mocha', 'chai', 'pytest',
+ 'bootstrap', 'tailwind', 'chartjs', 'plotly', 'leaflet', 'mapbox', 'socketio',
+]);
+
+// A single-segment dotted token like "cytoscape.js" or "pandas.py" is frequently a LIBRARY
+// reference, not a path to a missing project file. Suppress it when its bare stem matches a declared
+// manifest dependency (so "cytoscape.js" with cytoscape in package.json is a lib, not a missing file);
+// when no manifest exists, fall back to a curated well-known-library stem set. A token that carries a
+// path segment (a slash) or whose stem is not a known library still fires as a real missing file.
+function isDeclaredLibraryName(token, pkgNames, lockNames, pyNames) {
+ if (hasSlash(token)) return false;
+ const base = token.split('/').pop();
+ const dot = base.lastIndexOf('.');
+ if (dot <= 0) return false;
+ const stem = base.slice(0, dot).toLowerCase();
+ if (!stem) return false;
+ const hasManifest = pkgNames.size > 0 || lockNames.size > 0 || pyNames.size > 0;
+ if (hasManifest) {
+ for (const name of pkgNames) if (packageRoot(name).toLowerCase() === stem) return true;
+ for (const name of lockNames) if (packageRoot(name).toLowerCase() === stem) return true;
+ if (pyNames.has(stem)) return true;
+ return false;
+ }
+ return WELL_KNOWN_LIBRARY_STEMS.has(stem);
+}
+
export function detectHallucinations(tree, projectDir, opts = {}) {
const hallucinations = [];
if (!projectDir || !existsSync(projectDir)) {
@@ -324,6 +402,7 @@ export function detectHallucinations(tree, projectDir, opts = {}) {
for (const ref of collectFileReferences(tree)) {
if (created.has(ref.key)) continue;
if (fileExists(projectDir, ref.token)) continue;
+ if (isDeclaredLibraryName(ref.token, pkgNames, lockNames, pyNames)) continue;
hallucinations.push({
category: 'hallucinated_file_or_path',
reference: truncate(ref.token, EVIDENCE_CAP),
src/parse.js +410 -24
@@ -22,7 +22,43 @@ const REFUSAL_TEXT_RE =
/\b(?:i (?:can(?:'|no)t|am (?:unable|not able|not permitted) to|won['']?t|cannot|do not|don['']?t (?:think i (?:should|can)|feel comfortable)|'?m not (?:able|allowed|going) to)|(?:sorry|apolog(?:y|ies|ize))[,.]? i (?:can(?:'|no)t|am unable|won['']?t|cannot)|as (?:an? )?(?:ai|language model|assistant)[, ]+(?:i |we )?(?:can(?:'|no)t|cannot|am unable|won['']?t)|i'?m programmed (?:to decline|not to)|against my (?:guidelines|policies|programming))\b/i;
const USER_TEXT_DECLINE_RE =
- /^(?:no(?:pe)?\s*[,.)]?\s+|stop\s*[,.)]?\s+|cancel\s*[,.)]?\s+|don'?t\s+|do not\s+|don'?t do (?:that|this|it)\b|stop (?:that|this|it|doing)\b|not that one\b|scratch that\b|nevermind\b|never mind\b)/i;
+ /^(?:no(?:pe)?\s*[,.)]?\s+|stop\s*[,.)]?\s+|cancel\s*[,.)]?\s+|don'?t\s+|do not\s+|don'?t do (?:that|this|it)\b|stop (?:that|this|it|doing)\b|scrap (?:that|this|it|the)\b|revert\b|undo\b|roll\s?back\b|rip (?:that|this|it|the)\b|back (?:it|that|this) out\b|take (?:it|that|this) out\b|that'?s not it\b|that is not it\b|not that one\b|not quite\b|scratch that\b|nevermind\b|never mind\b)/i;
+
+// Real declines often open with an interjection ("Whoa, scrap that", "Hold on, revert that").
+// Strip these so the decline core is what the start-anchored matcher and the benign guard both see.
+const DECLINE_INTERJECTION_RE =
+ /^(?:(?:whoa|wait|hold on|hold up|hold the phone|hmm+|ugh+|argh+|actually|no wait|ok wait|wait wait|yikes)[\s,!.:;-]+)+/i;
+
+// Structural decline fallback. The existing USER_TEXT_DECLINE_RE is START-anchored
+// and phrase-bound, so fresh-form declines ("yank that file", "stop printing the token",
+// "nix the trie", "back it out") that put the reversal verb mid-clause are missed. This
+// fallback fires on a clause-leading HARD REVERSAL verb that is either bare (BARE_STOP_RE)
+// or governs a back-reference to the immediately-prior assistant turn (a demonstrative, or a
+// file/prose token the prior action actually touched, captured in session._priorAssistant).
+// The verb list is restricted to hard reversal verbs; "don't" / forward-instruction negation
+// ("don't forget tests", "drop the column", "you usually stop the server") is left to the
+// existing start-anchored matcher so it does NOT fire here, keeping new false positives at zero.
+const IMPERATIVE_REVERSAL_RE =
+ /\b(?:stop|undo|revert|yank|rip|kill|scrap|nix|roll\s?back|back(?:\s+(?:it|that|this))?\s+out|take(?:\s+(?:it|that|this))?\s+out)\b/i;
+// A bare hard-stop clause ("stop.", "undo that", "revert it", "nix it") with no further object
+// needed: the reversal verb alone, optionally with a demonstrative pronoun, IS the decline.
+const BARE_STOP_RE =
+ /^(?:stop|undo|revert|yank|nix|scrap|rip|kill|roll\s?back)\s*(?:it|that|this|the\b[^.]*)?[.!,;:\s]*$/i;
+// Demonstrative / back-reference that anchors the reversal to the prior assistant turn.
+const BACKREF_DEMONSTRATIVE_RE = /\b(?:that|this|those|these|it)\b/i;
+
+// Benign openers that share a decline prefix but are agreement / instruction / meta-complaint,
+// NOT a decline of the agent's action. Precision guard for looksLikeUserTextDecline.
+// "No problem, go ahead" "Don't forget to add tests" "Stop being unhelpful"
+const BENIGN_DECLINE_OPENER_RE =
+ /^(?:no\s+(?:problem|worries|worry|rush|need|thanks|biggie|prob(?:lem)?s?|issue)\b|nope?\s+(?:problem|worries)\b|don'?t\s+(?:forget|hesitate|worry|bother|stop)\b|stop\s+(?:being|saying|telling|apologi[sz]|with the|the apolog))/i;
+
+// "I won't touch/change/expose X" etc. is agreement to a constraint, not a refusal.
+const COMPLIANT_WONT_RE =
+ /\bi\s+(?:won['']?t|will not|promise not to)\s+(?:touch|change|modify|alter|edit|delete|remove|drop|break|add|introduce|expose|leak|hardcode|hard-code|commit|push|overwrite|override|re-?add|reintroduce)\b/i;
+// Strong refusal stems that should override the compliance guard (a real refusal can also say "won't").
+const HARD_REFUSAL_RE =
+ /\bi\s+can(?:'|no)?t\b|\b(?:am|'?m)\s+(?:unable|not able|not permitted|not allowed)\b|\bagainst my (?:guidelines|policies|programming)\b|\bas an? (?:ai|language model|assistant)\b/i;
// tool_result rejection classifier. Returns { kind, confidence, evidence } or null.
function classifyToolResultRejection(content) {
@@ -38,15 +74,276 @@ function classifyToolResultRejection(content) {
}
export function looksLikeRefusal(text) {
- return typeof text === 'string' && text.length <= 4000 && REFUSAL_TEXT_RE.test(text);
+ if (typeof text !== 'string' || text.length > 4000) return false;
+ // "I won't touch the table" = compliance, not refusal, unless a hard refusal stem is also present.
+ if (COMPLIANT_WONT_RE.test(text) && !HARD_REFUSAL_RE.test(text)) return false;
+ return REFUSAL_TEXT_RE.test(text);
+}
+
+// Novel-form unwillingness clauses that REFUSAL_TEXT_RE (a keyword list keyed on
+// "I can't / I'm unable / against my guidelines") does not match, but which are real refusals
+// when carried on an action-empty turn: "I'm going to decline this one", "I'd rather not",
+// "that's not something I'll do / can help with", "I'm not comfortable building that",
+// "that crosses a line I won't cross", "I'm not willing to". These phrasings are invariant
+// markers of a stated unwillingness. On their own they are too loose to fire a rejection (a turn
+// could say "I'd rather not duplicate that helper" and then DO the work), so this matcher is used
+// ONLY behind the action-empty structural gate, never as a standalone keyword arm.
+const NOVEL_REFUSAL_RE =
+ /\bi(?:'|โ€™)?m\s+going\s+to\s+decline\b|\bi(?:'|โ€™)?ll\s+decline\b|\bi\s+decline\s+(?:this|that|to)\b|\bi(?:'|โ€™)?d\s+rather\s+not\b|\bi(?:'|โ€™)?m\s+not\s+(?:comfortable|willing|going)\s+to?\b|\bthat(?:'|โ€™)?s\s+not\s+something\s+i(?:'|โ€™)?(?:ll|m)?\s*(?:can|will|would|want|going)\b|\bnot\s+something\s+i\s+can\s+help\s+with\b|\bcrosses\s+a\s+line\s+i\s+won(?:'|โ€™)?t\s+cross\b|\bi(?:'|โ€™)?m\s+not\s+going\s+to\s+(?:do|build|implement|write|add)\b/i;
+
+// Structural refusal recognizer for the inability/unwillingness clause, broadened to the
+// novel forms above. Same compliance guard as looksLikeRefusal (a self-imposed "I won't touch X"
+// constraint is not a refusal). Confined to the action-empty arm by its only caller.
+function looksLikeRefusalStructural(text) {
+ if (typeof text !== 'string' || text.length > 4000) return false;
+ if (COMPLIANT_WONT_RE.test(text) && !HARD_REFUSAL_RE.test(text)) return false;
+ return REFUSAL_TEXT_RE.test(text) || NOVEL_REFUSAL_RE.test(text);
}
function looksLikeUserTextDecline(text) {
- const t = typeof text === 'string' ? text.trim() : '';
+ let t = typeof text === 'string' ? text.trim() : '';
if (!t || t.length > 240) return false;
+ t = t.replace(DECLINE_INTERJECTION_RE, '').trim();
+ if (BENIGN_DECLINE_OPENER_RE.test(t)) return false;
return USER_TEXT_DECLINE_RE.test(t);
}
+// Structural decline classifier used as a fallback to the start-anchored matcher.
+// Fires when a clause carries a hard reversal verb that is either bare (BARE_STOP) or governs a
+// back-reference to the immediately-prior assistant turn (demonstrative, or a file/prose token
+// the prior action touched). `priorAssistant` is the session._priorAssistant snapshot.
+// Shared precision anchor. A clause back-references the immediately-prior assistant action
+// when it carries a demonstrative (that/this/it) OR a file/prose token that action actually
+// touched (session._priorAssistant snapshot). Extracted from looksLikeStructuralDecline so the
+// destructive-attribution arm reuses the exact same anchor (no looser tie is introduced).
+function backRefsPriorAssistant(clause, priorAssistant) {
+ if (BACKREF_DEMONSTRATIVE_RE.test(clause)) return true;
+ if (priorAssistant && priorAssistant.tokens && priorAssistant.tokens.size) {
+ const low = clause.toLowerCase();
+ for (const tok of priorAssistant.tokens) {
+ if (tok.length >= 4 && low.includes(tok)) return true;
+ }
+ }
+ return false;
+}
+
+// Destructive ATTRIBUTION to the agent: "you blew away / nuked / wiped / truncated /
+// dropped / deleted / ripped out <X>". This is a decline-by-complaint: the user states the agent
+// destroyed something and (implicitly or explicitly) wants it stopped/reversed. Gated on a
+// back-reference to the prior assistant action so it never fires on a user narrating their own
+// mishap ("I dropped the table"). The second-person "you" / agent-token anchor is what holds
+// precision.
+const DESTRUCTIVE_ATTR_RE =
+ /\byou\b[^.!?;]{0,40}\b(?:blew\s+away|blow\s+away|nuked?|wiped?|truncated?|dropped?|deleted?|destroyed?|clobbered?|ripped?\s+(?:out|away))\b|\b(?:that|this|the)\b[^.!?;]{0,30}\b(?:drop[\s-]?and[\s-]?recreate[d]?|blew\s+away|truncated?|wiped?)\b/i;
+// A destructive-attribution complaint is a DECLINE only when it ALSO carries a forward redirect of
+// the agent's APPROACH ("make it non-destructive / additive", "stop dropping", "do X instead").
+// A bare "you deleted X, please restore it" is a destructive-then-recover EVENT (handled as
+// abandoned_path), not a decline of an approach -- requiring this redirect cue keeps a pure
+// restore request from minting a spurious user_text_decline while still firing on a real redirect.
+const DESTRUCTIVE_REDIRECT_CUE_RE =
+ /\bnon[\s-]?destructive\b|\badditive\b|\binstead\b|\bstop\b|\bdon'?t\b[^.!?;]{0,30}\b(?:drop|truncate|wipe|recreate|delete|blow)\b|\bnever\b[^.!?;]{0,30}\b(?:drop|truncate|wipe|recreate|delete)\b|\bmake\b[^.!?;]{0,30}\b(?:migration|change|it)\b[^.!?;]{0,20}\b(?:additive|non[\s-]?destructive|safe)\b/i;
+
+function looksLikeStructuralDecline(text, priorAssistant) {
+ let t = typeof text === 'string' ? text.trim() : '';
+ if (!t || t.length > 240) return false;
+ t = t.replace(DECLINE_INTERJECTION_RE, '').trim();
+ if (BENIGN_DECLINE_OPENER_RE.test(t)) return false;
+ // Scan ALL clauses for a clause-leading reversal verb, not just the first. A decline can
+ // bury the reversal in a later clause ("This is a cannon for a fly. ... Rip the plugin registry
+ // and middleware out ..."): the first clause is a metaphor, the reversal lives in clause 3. Each
+ // candidate clause still requires the reversal verb to LEAD its clause (after at most a short
+ // connective) and to be either bare or back-referenced, so precision is unchanged.
+ const clauses = t.split(/[.!?;\n]/);
+ for (const rawClause of clauses) {
+ const clause = rawClause.trim();
+ if (!clause) continue;
+ const m = clause.match(IMPERATIVE_REVERSAL_RE);
+ if (!m) continue;
+ // The reversal verb must lead its clause (be at or near the start, after at most a short
+ // connective like "no," / "ok,"). This keeps "you usually stop the server" from matching.
+ const idx = clause.toLowerCase().indexOf(m[0].toLowerCase());
+ const lead = clause.slice(0, idx).replace(/[,\s]+$/, '').trim();
+ if (lead && !/^(?:no|nope|ok|okay|please|hey|and|so|then|wait|hold on|also)\b[\s,]*$/i.test(lead)) {
+ continue;
+ }
+ // BARE_STOP: the reversal verb (optionally + demonstrative) stands alone -> decline.
+ if (BARE_STOP_RE.test(clause)) return true;
+ // Otherwise require a back-reference to the prior assistant action.
+ if (backRefsPriorAssistant(clause.slice(idx), priorAssistant)) return true;
+ }
+ // Destructive-attribution arm: "you blew away X ... make the migration non-destructive",
+ // gated on a back-reference to the prior assistant action (so a self-narrated mishap never fires)
+ // AND a forward redirect cue (so a pure restore request stays an abandoned_path event, not a
+ // decline).
+ if (
+ DESTRUCTIVE_ATTR_RE.test(t) &&
+ DESTRUCTIVE_REDIRECT_CUE_RE.test(t) &&
+ backRefsPriorAssistant(t, priorAssistant)
+ ) {
+ return true;
+ }
+ return false;
+}
+
+// Structural UPSTREAM-CORRECTION classifier. A fresh-form redirect ("you solved the
+// wrong problem, I cared about latency not throughput", "I wanted a wrench, not a workshop. Rip
+// the plugin registry out") never trips the start-anchored decline matcher NOR the bare/hard-stop
+// structural-decline path, so the whole failure/chain/lesson pipeline stays dark for it. This
+// fires user_text_decline when a user turn STRUCTURALLY contradicts the immediately-prior
+// assistant action: (a) it names/overlaps a DISTINCTIVE token that action just touched AND (b) it
+// carries a contrast/negation/reversal cue (a negated restatement "X, not Y" / a goal-mismatch
+// frame "you ... the wrong" / a reversal verb governing the prior token). This is the same
+// back-reference anchoring already proven in looksLikeStructuralDecline, lifted so it drives the
+// analysis loop for ALL fresh-form redirects, not just bare hard-stops.
+//
+// Generic narration filler ("with", "goal", "approach", "under", "back", "just", "into") is not a
+// distinctive back-reference; overlapping on it alone would let a contrast cue elsewhere in the
+// turn manufacture a redirect, so it is excluded from the anchor set.
+const STRUCT_REDIRECT_STOPTOKENS = new Set([
+ 'with', 'into', 'just', 'back', 'goal', 'under', 'over', 'this', 'that', 'these', 'those',
+ 'then', 'than', 'them', 'they', 'your', 'have', 'will', 'from', 'about', 'across', 'after',
+ 'approach', 'instead', 'reverting', 'switching', 'collapsing', 'understood', 'reorienting',
+ 'misread', 'deleting', 'returning', 'added', 'done', 'made', 'built', 'rebuilt', 'using',
+ 'thanks', 'later', 'minimize', 'maximize', 'target', 'budget', 'local', 'bench',
+]);
+// Negated restatement: a contrastive "X, not Y" or "I wanted/cared/asked/meant ... not".
+const NEGATED_RESTATEMENT_RE =
+ /,\s*not\b|\b(?:wanted|want|cared|care|asked|meant|need|needed|expected|after)\b[^.]{0,40}\bnot\b|\bnot\b[^.]{0,30}\b(?:but|instead)\b/i;
+// Self-anchoring goal-mismatch frame: "you solved/built/chose ... wrong". The second-person agent
+// reference ("YOU solved") IS the back-reference to the prior assistant action, so this form does
+// not additionally require a shared token; it unambiguously corrects what the agent just did.
+const GOAL_MISMATCH_SELF_RE =
+ /\byou\s+(?:solved|built|did|made|gave|chose|used|wrote|created|went|took|picked|implemented|optimi[sz]ed|focused|targeted)\b[^.]{0,40}\bwrong\b/i;
+// Weaker goal-mismatch frame: "the wrong <noun>" / "wrong direction". Still requires a back-ref token.
+const GOAL_MISMATCH_RE =
+ /\bwrong\s+(?:problem|thing|goal|approach|direction|track|path|idea|feature|task|tool|one|axis|shape)\b/i;
+// Hard reversal verb governing a back-reference. Bare form restricted to UNAMBIGUOUS reversal verbs
+// (nix/scrap/revert/undo/yank); rip/tear/strip/pull/gut only count in the "X out" particle form,
+// because bare "tear it apart", "kill the process", "gut feeling" are not reliably reversals.
+const STRUCT_REVERSAL_RE =
+ /\b(?:nix|scrap|revert|undo|yank)\b|\b(?:rip|tear|take|strip|pull|gut)\b[^.]{0,30}\bout\b/i;
+// Permissive framing: "feel free to", "go ahead and", "you can", "if you (want|like)" turns a
+// reversal verb into a granted suggestion, not a decline of what the agent did. Guards out
+// "feel free to tear it apart and rebuild it" (a refactor invitation, not a correction).
+const PERMISSIVE_FRAMING_RE =
+ /\b(?:feel free to|go ahead and|go ahead|you (?:can|could|may|might)|if you (?:want|like|prefer)|whenever you|happy for you to|fine to)\b/i;
+// Scope-affirmation framing: "I just/only want X", "to be clear ... that's all", "keep it to X".
+// The user is CONFIRMING the current direction with a clarifying boundary on what NOT to add next,
+// not reversing a completed action. "I just want stacked bars, not a whole new chart kind on top.
+// That's all." is a scope clarification, not a redirect of what the agent already did.
+const SCOPE_AFFIRMATION_RE =
+ /\bi (?:just|only) (?:want|need|wanted|needed)\b|\bthat'?s all\b|\bjust (?:want|keep) (?:it|that)\b/i;
+
+function looksLikeStructuralRedirect(text, priorAssistant) {
+ let t = typeof text === 'string' ? text.trim() : '';
+ if (!t || t.length > 600) return false;
+ if (!priorAssistant || !priorAssistant.tokens || !priorAssistant.tokens.size) return false;
+ t = t.replace(DECLINE_INTERJECTION_RE, '').trim();
+ if (BENIGN_DECLINE_OPENER_RE.test(t)) return false;
+ if (PERMISSIVE_FRAMING_RE.test(t) || SCOPE_AFFIRMATION_RE.test(t)) return false;
+ // A self-anchoring "you <verb> ... wrong" frame already back-references the prior assistant
+ // action via the second person, so it satisfies BOTH the contrast cue and the back-reference.
+ if (GOAL_MISMATCH_SELF_RE.test(t)) return true;
+ // (b) contrast/negation/reversal cue must be present somewhere in the turn.
+ const hasCue =
+ NEGATED_RESTATEMENT_RE.test(t) || GOAL_MISMATCH_RE.test(t) || STRUCT_REVERSAL_RE.test(t);
+ if (!hasCue) return false;
+ // (a) the turn must name/overlap a DISTINCTIVE token the prior assistant action just touched.
+ const low = t.toLowerCase();
+ for (const tok of priorAssistant.tokens) {
+ if (tok.length < 4 || STRUCT_REDIRECT_STOPTOKENS.has(tok)) continue;
+ if (new RegExp(`\\b${tok.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`).test(low)) return true;
+ }
+ return false;
+}
+
+// A structural redirect that is ALSO a genuine DECLINE of the agent's action: it carries a
+// hard reversal verb governing a back-referenced token (rip/nix/scrap/revert + the touched
+// surface). This is the subset that should additionally mint a user_text_decline rejection; a pure
+// goal-mismatch ("you solved the wrong problem") is a misunderstood-goal correction, NOT a decline,
+// so it flips the kind but does not fabricate a rejection (keeps zero new rejection FPs on those).
+function structuralRedirectIsDecline(text, priorAssistant) {
+ let t = typeof text === 'string' ? text.trim() : '';
+ if (!t || t.length > 600) return false;
+ if (!priorAssistant || !priorAssistant.tokens || !priorAssistant.tokens.size) return false;
+ t = t.replace(DECLINE_INTERJECTION_RE, '').trim();
+ if (BENIGN_DECLINE_OPENER_RE.test(t)) return false;
+ if (PERMISSIVE_FRAMING_RE.test(t)) return false;
+ if (!STRUCT_REVERSAL_RE.test(t)) return false;
+ const low = t.toLowerCase();
+ for (const tok of priorAssistant.tokens) {
+ if (tok.length < 4 || STRUCT_REDIRECT_STOPTOKENS.has(tok)) continue;
+ if (new RegExp(`\\b${tok.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`).test(low)) return true;
+ }
+ return false;
+}
+
+// A goal-mismatch redirect frame. A strong frame asserts the agent solved the wrong thing
+// ("that's not what I asked (for)", "the whole point is X", "what I actually/really wanted"),
+// the user is reorienting back to the ORIGINAL goal, not declining a side-detail. This is the
+// distinct signature of a misunderstood-goal correction; on its own it is too generic to mint a
+// decline (false on "that's not what I expected to see in the logs"), so it is gated on the turn
+// ALSO restating a distinctive ROOT-GOAL token snapshotted from the first user prompt.
+const GOAL_MISMATCH_FRAME_RE =
+ /\bthat'?s not what i (?:asked|wanted|meant|said|requested)\b|\bthe whole point (?:is|was|of)\b|\bwhat i (?:actually|really) (?:wanted|asked|meant|need(?:ed)?)\b|\bmissed the (?:point|goal)\b|\bnot what i'?m after\b/i;
+// Generic words that are not distinctive enough to anchor a root-goal restatement. The root goal
+// is the FIRST user prompt; we keep only its distinctive content tokens (>=4 chars, not filler).
+const ROOT_GOAL_STOPTOKENS = new Set([
+ 'support', 'update', 'updates', 'feature', 'features', 'system', 'systems', 'please', 'should',
+ 'would', 'could', 'about', 'these', 'those', 'their', 'there', 'which', 'while', 'where', 'thing',
+ 'things', 'something', 'devices', 'device', 'images', 'image', 'field', 'pull', 'make', 'build',
+ 'built', 'using', 'with', 'from', 'into', 'over', 'they', 'them', 'this', 'that', 'have', 'need',
+ 'needs', 'want', 'wants', 'when', 'then', 'than', 'your', 'each', 'able', 'code', 'work', 'works',
+]);
+// Extract distinctive root-goal tokens from the first user prompt: whole words >=4 chars (lowered)
+// plus distinctive hyphenated / dotted compounds ("over-the-air", "firmware/ota.c"). The compounds
+// matter because a restatement frequently echoes the exact hyphenated phrase ("over-the-air").
+function extractRootGoalTokens(text) {
+ const out = new Set();
+ const low = String(text || '').toLowerCase();
+ for (const w of low.match(/[a-z][a-z0-9_-]{3,}/g) || []) {
+ if (w.length >= 4 && !ROOT_GOAL_STOPTOKENS.has(w)) out.add(w);
+ }
+ // Distinctive multi-word hyphenated phrases (e.g. over-the-air) as a single token.
+ for (const phrase of low.match(/[a-z]{2,}(?:-[a-z]{2,}){1,}/g) || []) {
+ if (phrase.length >= 6) out.add(phrase);
+ }
+ return out;
+}
+// The redirect turn is a goal-mismatch decline when it carries a strong goal-mismatch frame
+// AND restates a distinctive token from the session root goal (first user prompt). Returns true
+// only when both hold; this rides the existing structural-redirect/decline OR-gate as a NEW arm
+// and never rewrites looksLikeStructuralDecline.
+function looksLikeGoalMismatchRedirect(text, rootGoalTokens) {
+ let t = typeof text === 'string' ? text.trim() : '';
+ if (!t || t.length > 600) return false;
+ if (!rootGoalTokens || !rootGoalTokens.size) return false;
+ if (!GOAL_MISMATCH_FRAME_RE.test(t)) return false;
+ const low = t.toLowerCase();
+ for (const tok of rootGoalTokens) {
+ if (tok.length < 4) continue;
+ if (new RegExp(`\\b${tok.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`).test(low)) return true;
+ }
+ return false;
+}
+
+// Tokens (file basenames + narration words) the immediately-prior assistant turn touched,
+// used by looksLikeStructuralDecline to anchor a reversal verb to a concrete prior action.
+function buildPriorAssistantSnapshot(files, narration) {
+ const tokens = new Set();
+ for (const f of files) {
+ const base = String(f).split(/[\\/]/).pop();
+ if (base && base.length >= 4) tokens.add(base.toLowerCase());
+ for (const seg of String(f).toLowerCase().split(/[\\/.+_-]+/)) {
+ if (seg.length >= 4) tokens.add(seg);
+ }
+ }
+ for (const w of String(narration || '').toLowerCase().match(/[a-z][a-z0-9_-]{3,}/g) || []) {
+ tokens.add(w);
+ }
+ return { tokens };
+}
+
export async function parseSessionFile(path, sessionMeta = {}) {
const session = {
sessionId: sessionMeta.sessionId || null,
@@ -78,6 +375,8 @@ export async function parseSessionFile(path, sessionMeta = {}) {
_usageByMsgId: new Map(),
_pendingInterruption: false,
_currentPrompt: null,
+ _priorAssistant: null,
+ _rootGoalTokens: null,
};
const stream = createReadStream(path, { encoding: 'utf8' });
@@ -296,12 +595,39 @@ function ingestUser(session, rec) {
if (!trimmed && hasImage) trimmed = '[image-only prompt: screenshot/annotated feedback]';
if (!trimmed) return;
+ // Snapshot root-goal tokens from the FIRST real user prompt of the session. Later
+ // goal-mismatch redirects ("that's not what I asked ... the whole point is OTA, over-the-air")
+ // are anchored against these to mint a structural decline.
+ if (session._rootGoalTokens === null && !session.isContinuation) {
+ session._rootGoalTokens = extractRootGoalTokens(trimmed);
+ }
+
// Text-decline rejection: detect after we know trimmed is non-empty and is a
// real prompt (not meta/command/compact). The placeholder this pushes doubles
// as the canonical prompt for this turn (it already carries the rejection),
// so we return immediately to avoid pushing a second prompt below.
- if (looksLikeUserTextDecline(trimmed)) {
- attachRejectionToText(session, rec, trimmed, 'user_text_decline', 'text', 0.8);
+ // A goal-mismatch redirect that restates a distinctive root-goal token is a structural
+ // redirect (rides the SAME OR-gate as the prior-action-anchored redirect path).
+ const isGoalMismatchRedirect = looksLikeGoalMismatchRedirect(trimmed, session._rootGoalTokens);
+ // A structural decline (hard-reversal back-ref / destructive-attribution arm) is
+ // a structural redirect by construction; flag it so the downstream lesson damp can withhold the
+ // generic "do not retry a declined action" boilerplate on these dense decline turns (the chain
+ // carries the real remedy). This mirrors the goal-mismatch flagging.
+ const isStructDecline = looksLikeStructuralDecline(trimmed, session._priorAssistant);
+ const isStructRedirect =
+ looksLikeStructuralRedirect(trimmed, session._priorAssistant) ||
+ isGoalMismatchRedirect ||
+ isStructDecline;
+ if (
+ looksLikeUserTextDecline(trimmed) ||
+ isStructDecline ||
+ structuralRedirectIsDecline(trimmed, session._priorAssistant) ||
+ isGoalMismatchRedirect
+ ) {
+ // Tag the decline placeholder as a structural redirect when it back-references the prior
+ // assistant action, so the kind gate routes it to correction (the analysis-loop driver), not
+ // just rejection minting. Plain start-anchored declines without a back-ref are left untagged.
+ attachRejectionToText(session, rec, trimmed, 'user_text_decline', 'text', 0.8, isStructRedirect);
session._pendingInterruption = false;
return;
}
@@ -317,6 +643,14 @@ function ingestUser(session, rec) {
actions: [],
thinking: 0,
rejections: [],
+ // Does this turn STRUCTURALLY contradict the immediately-prior assistant action
+ // (back-reference to a token it touched + a contrast/negation/reversal cue)? Snapshotted
+ // here because session._priorAssistant is mutated by each later assistant turn; the kind
+ // gate (extract.js classifyOne) reads this to route fresh-form redirects to kind:correction.
+ structuralRedirect: looksLikeStructuralRedirect(trimmed, session._priorAssistant),
+ // Snapshot the immediately-prior assistant token set so inferSignals can back-reference
+ // a removal imperative ("rip the registry out") to a component the prior turn actually added.
+ _priorTokens: session._priorAssistant,
};
session.prompts.push(prompt);
session._currentPrompt = prompt;
@@ -326,7 +660,7 @@ function ingestUser(session, rec) {
// Variant of attachRejection that links the rejection to the prompt we are
// about to create. We push a placeholder _currentPrompt first so attachRejection
// finds it, then fill in the real fields.
-function attachRejectionToText(session, rec, text, kind, source, confidence) {
+function attachRejectionToText(session, rec, text, kind, source, confidence, structuralRedirect = false) {
const placeholder = {
uuid: rec.uuid || null,
parentUuid: rec.parentUuid || null,
@@ -338,6 +672,8 @@ function attachRejectionToText(session, rec, text, kind, source, confidence) {
actions: [],
thinking: 0,
rejections: [],
+ structuralRedirect,
+ _priorTokens: session._priorAssistant,
};
session.prompts.push(placeholder);
session._currentPrompt = placeholder;
@@ -368,31 +704,47 @@ function ingestAssistant(session, rec) {
const current = session._currentPrompt;
const content = Array.isArray(msg.content) ? msg.content : [];
- let refusedByText = false;
+ // Assistant text blocks were dropped entirely. Join them so the agent's own
+ // narration ("I'll log the full Authorization header with the bearer token") is carried
+ // onto each action as a.narration and scanned by the credential-mishandling detector.
+ // Also feed this narration + touched files into the prior-assistant snapshot.
+ let narration = '';
+ const touchedFiles = new Set();
+ for (const block of content) {
+ if (block && block.type === 'text' && typeof block.text === 'string') {
+ narration += (narration ? ' ' : '') + block.text;
+ }
+ }
+ // The text-heuristic refusal arm is now STRUCTURAL: a real refusal is "stated inability
+ // + no work done on the same turn". We must know whether this assistant message produced any
+ // tool_use before deciding, so capture the first inability clause here and defer the firing
+ // until after the content loop (when toolUsesThisTurn is known). The action-empty gate is the
+ // precision anchor: a hedge-then-comply turn voices an inability phrase but still
+ // emits tool_use, so it no longer mints a false model_refusal.
+ let refusalClause = null;
+ let toolUsesThisTurn = 0;
for (const block of content) {
if (!block) continue;
if (block.type === 'text') {
- // Refusal heuristic on assistant text. Lower confidence than stop_reason
- // because phrasing overlap with normal hedging is possible.
- if (!refusedByText && looksLikeRefusal(block.text)) {
- refusedByText = true;
- attachRejection(session, {
- kind: 'model_refusal',
- source: 'text_heuristic',
- confidence: 0.7,
- toolUseId: null,
- tool: null,
- ts: rec.timestamp || null,
- evidence: truncate(typeof block.text === 'string' ? block.text : '', 160),
- });
+ // Capture the first inability/unwillingness clause (keyword OR novel form). The
+ // novel-form broadening is safe here because the firing below is gated on action-empty.
+ if (refusalClause === null && looksLikeRefusalStructural(block.text)) {
+ refusalClause = typeof block.text === 'string' ? block.text : '';
}
} else if (block.type === 'tool_use') {
+ toolUsesThisTurn++;
session.stats.toolUses++;
const input = block.input || {};
const file = input.file_path || input.notebook_path || null;
- if (typeof file === 'string') session.stats.filesTouched.add(file);
+ if (typeof file === 'string') {
+ session.stats.filesTouched.add(file);
+ touchedFiles.add(file);
+ }
if (block.name === 'Bash' && typeof input.command === 'string') {
- for (const p of shellFilePaths(input.command)) session.stats.filesTouched.add(p);
+ for (const p of shellFilePaths(input.command)) {
+ session.stats.filesTouched.add(p);
+ touchedFiles.add(p);
+ }
}
if (current) {
current.actions.push({
@@ -400,6 +752,9 @@ function ingestAssistant(session, rec) {
file: typeof file === 'string' ? file : null,
command: block.name === 'Bash' && typeof input.command === 'string' ? input.command : null,
input: summarizeToolInput(block.name, input),
+ // The assistant's own narration for this turn, scanned with the action
+ // body for sentence-scoped credential-noun + sink-verb co-occurrence.
+ narration: narration || null,
model: synthetic ? null : msg.model || null,
});
}
@@ -408,9 +763,34 @@ function ingestAssistant(session, rec) {
}
}
+ // Deferred structural text-heuristic refusal arm. A real refusal is invariant across
+ // phrasing -> "stated inability + no work done on the same turn". Fire model_refusal from the
+ // inability clause ONLY when this assistant message is action-empty (toolUsesThisTurn === 0, no
+ // tool action / file edit on the turn carrying the clause). The action-empty gate is the
+ // precision anchor: a hedge-then-comply turn ("I'm not sure I can, but here goes")
+ // voices an inability phrase yet still emits tool_use, so it no longer mints a false model_refusal.
+ // The object-governance helper (refusalGovernsRequest) is available as a secondary tightener but
+ // is NOT applied as a hard gate here: requests are often anaphoric ("tell me what would have
+ // happened"), so a token-overlap requirement would drop true refusals; the action-empty gate
+ // alone is the precision-clean structural invariant.
+ // The text_heuristic arm is ALSO suppressed when this same message already carries the provider's
+ // stop_reason:refusal verdict: the renderer emits every node.rejection un-deduped, so a node with
+ // both arms would mint two model_refusal signals for one refusal -> a duplicate FP. The
+ // higher-confidence stop_reason arm below is kept; the text arm stands down to one-per-node.
+ if (refusalClause !== null && toolUsesThisTurn === 0 && msg.stop_reason !== 'refusal') {
+ attachRejection(session, {
+ kind: 'model_refusal',
+ source: 'text_heuristic',
+ confidence: 0.7,
+ toolUseId: null,
+ tool: null,
+ ts: rec.timestamp || null,
+ evidence: truncate(refusalClause, 160),
+ });
+ }
+
// API-level refusal signal. Higher confidence than the text heuristic because
- // it is the provider's structured verdict, not a phrase match. If both fire,
- // both rejections are kept; downstream de-duplication collapses them by kind.
+ // it is the provider's structured verdict, not a phrase match.
if (msg.stop_reason === 'refusal') {
attachRejection(session, {
kind: 'model_refusal',
@@ -422,6 +802,12 @@ function ingestAssistant(session, rec) {
evidence: null,
});
}
+
+ // Snapshot the touched files + narration tokens so the next user turn's
+ // structural-decline check can anchor a reversal verb to this concrete prior action.
+ if (touchedFiles.size || narration) {
+ session._priorAssistant = buildPriorAssistantSnapshot(touchedFiles, narration);
+ }
}
// Absolute and relative file-path tokens from a shell command string.