Zion Boggan zionboggan.com ↗

refactor: decompose SECURITY_INTENT and RISKY_CMD regexes into named pieces

Replace the two monolithic alternation regexes with arrays of named
{name, re} pieces composed by composeOr(). Behavior-identical to the
former matchers (covered by equivalence tests against isRiskyCommand);
each piece is now independently unit-testable and tunable.
a7d34c9   Zion Boggan committed on Jun 18, 2026 (4 days ago)
src/analyze.js +30 -3
@@ -21,7 +21,22 @@ const CORRECTION_HINT =
const FRUSTRATION_HINT =
/\b(sucks|awful|god awful|what the heck|wtf|mad|angry|frustrat|not suffic|i don'?t trust|terrible|bad)\b/i;
const PRIVACY_HINT = /\b(secret|token|api key|apikey|password|redact|privacy|private|local-first|telemetry|upload|cloud)\b/i;
-const SECURITY_INTENT_RE = /(?:\b(?:updated?|rotat(?:e|ed|ing)|regenerat(?:e|ed)|new|replaced?|revoked?)\b[^.]{0,40}\b(?:pat|personal access token|api[- ]?key|access token|secret|credential)s?\b|\bpat\b[^.]{0,30}\b(?:updated?|rotat|regenerat|revoked?)|\b(?:make|change|set|update|use)\b[^.]{0,30}\bemail\b(?=[^.]*@|[^.]*\bcontact\b|[^.]*\bpublic\b)|\b(?:don'?t|do not|never)\b[^.]{0,20}\b(?:expose|leak)\b|\bexpose us\b|\bleak (?:anything|audit|nothing|secrets?|creds?)\b|\b(?:full )?audit\b[^.]{0,40}\b(?:repo|repos|repositor|organization|git commit|commit history)\b|\bcommit history\b[^.]{0,30}\b(?:audit|expose|leak|clean)\b|\b(?:re-?licens(?:e|ing)|licens(?:e|ing) (?:adjustment|change)|chang(?:e|ing)[^.]{0,15}licens)\b|\b(?:disabl|skip|remov|delet)\w*\b[^.]{0,15}\btests?\b|\b(?:change|modify|update|add|tighten|loosen|fix)\b[^.]{0,20}\b(?:access control|permissions?|rbac|auth flow)\b)/i;
+const composeOr = (parts) => new RegExp(parts.map((p) => `(?:${p.re.source})`).join('|'), 'i');
+
+export const SECURITY_INTENT_PARTS = [
+ { name: 'credential_lifecycle', re: /\b(?:updated?|rotat(?:e|ed|ing)|regenerat(?:e|ed)|new|replaced?|revoked?)\b[^.]{0,40}\b(?:pat|personal access token|api[- ]?key|access token|secret|credential)s?\b/i },
+ { name: 'pat_lifecycle', re: /\bpat\b[^.]{0,30}\b(?:updated?|rotat|regenerat|revoked?)/i },
+ { name: 'email_change', re: /\b(?:make|change|set|update|use)\b[^.]{0,30}\bemail\b(?=[^.]*@|[^.]*\bcontact\b|[^.]*\bpublic\b)/i },
+ { name: 'do_not_expose', re: /\b(?:don'?t|do not|never)\b[^.]{0,20}\b(?:expose|leak)\b/i },
+ { name: 'expose_us', re: /\bexpose us\b/i },
+ { name: 'leak_list', re: /\bleak (?:anything|audit|nothing|secrets?|creds?)\b/i },
+ { name: 'audit_repos', re: /\b(?:full )?audit\b[^.]{0,40}\b(?:repo|repos|repositor|organization|git commit|commit history)\b/i },
+ { name: 'commit_history_audit', re: /\bcommit history\b[^.]{0,30}\b(?:audit|expose|leak|clean)\b/i },
+ { name: 'relicensing', re: /\b(?:re-?licens(?:e|ing)|licens(?:e|ing) (?:adjustment|change)|chang(?:e|ing)[^.]{0,15}licens)\b/i },
+ { name: 'disable_tests', re: /\b(?:disabl|skip|remov|delet)\w*\b[^.]{0,15}\btests?\b/i },
+ { name: 'access_control_change', re: /\b(?:change|modify|update|add|tighten|loosen|fix)\b[^.]{0,20}\b(?:access control|permissions?|rbac|auth flow)\b/i },
+];
+const SECURITY_INTENT_RE = composeOr(SECURITY_INTENT_PARTS);
const SCOPE_DRIFT_HINT = /\b(don'?t add|do not add|not a web app|keep it local|too much|overbuilt|scope drift|stay focused|same format|keep .* cli|zero-config cli)\b/i;
const TOOL_HINT = /\b(wrong tool|wrong library|use .* instead|don'?t use|dependency|package|environment|node version|python version|missing module)\b/i;
const HALLUCINATION_HINT = /\b(hallucinat|doesn'?t exist|does not exist|no such file|fake file|fake api|made up)\b/i;
@@ -80,8 +95,20 @@ const REMEDIATION_RE = new RegExp(`${DESTRUCTIVE_RE.source}|${RECOVERY_RE.source
const SECURITY_FILE_RE = /(?:^|[\\/])(?:\.env[^\\/]*|[^\\/]*(?:auth|session|middleware|login|signin|signup|permission|rbac|access[-_]?control|secur|crypto|jwt|oauth|passwd|password|secret|credential|token)[^\\/]*)$/i;
const SECURITY_FILE_EXCLUDE_RE = /(?:^|[\\/])(?:[^\\/]*tokens?\.[a-z]+|tokenizer[^\\/]*|[^\\/]*[-_.]?token(?:izer|s)?\.(?:tsx?|jsx?|css|scss|json|svg)|semantic[-_]?tokens?[^\\/]*|design[-_]?tokens?[^\\/]*)$/i;
-const RISKY_CMD_RE =
- /(?:\brm\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*(?:rf|fr)[a-zA-Z]*\b|\brm\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*r[a-zA-Z]*\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*f[a-zA-Z]*\b|\brm\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*f[a-zA-Z]*\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*r[a-zA-Z]*\b|\bchmod\s+(?:-[a-zA-Z]+\s+)*0?777\b|(?:curl|wget)[^|\n]*\|\s*(?:sudo\s+)?(?:sh|bash|zsh|dash|ksh)\b|\b(?:sh|bash|zsh|dash|ksh)\s+<\(\s*(?:curl|wget)\b|--no-verify\b|--force(?![\w-])|\bDROP\s+TABLE\b|\bDROP\s+SCHEMA\b|\bTRUNCATE\s+(?:TABLE\s+)?[\w."`]+)/i;
+export const RISKY_CMD_PARTS = [
+ { name: 'rm_rf_combined', re: /\brm\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*(?:rf|fr)[a-zA-Z]*\b/i },
+ { name: 'rm_r_then_f', re: /\brm\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*r[a-zA-Z]*\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*f[a-zA-Z]*\b/i },
+ { name: 'rm_f_then_r', re: /\brm\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*f[a-zA-Z]*\s+(?:-[a-zA-Z]*\s+)*-[a-zA-Z]*r[a-zA-Z]*\b/i },
+ { name: 'chmod_world_writable', re: /\bchmod\s+(?:-[a-zA-Z]+\s+)*0?777\b/i },
+ { name: 'curl_pipe_shell', re: /(?:curl|wget)[^|\n]*\|\s*(?:sudo\s+)?(?:sh|bash|zsh|dash|ksh)\b/i },
+ { name: 'shell_process_substitution', re: /\b(?:sh|bash|zsh|dash|ksh)\s+<\(\s*(?:curl|wget)\b/i },
+ { name: 'no_verify', re: /--no-verify\b/i },
+ { name: 'force', re: /--force(?![\w-])/i },
+ { name: 'drop_table', re: /\bDROP\s+TABLE\b/i },
+ { name: 'drop_schema', re: /\bDROP\s+SCHEMA\b/i },
+ { name: 'truncate', re: /\bTRUNCATE\s+(?:TABLE\s+)?[\w."`]+/i },
+];
+const RISKY_CMD_RE = composeOr(RISKY_CMD_PARTS);
const SECRET_CONTENT_RE = /(?:\bsource\s+[^\n]*\.env\b|(?:^|[;&|]|\s)\.\s+[^\n]*\.env\b|\.env\.(?:secrets|local|prod|production)\b|\bexport\s+[A-Z0-9_]*(?:_API_KEY|_TOKEN|_SECRET|_PASSWORD|API_KEY|SECRET_KEY|ACCESS_KEY|PRIVATE_KEY)\b|\b(?:wrangler|doppler|vault)\b|\bgh\s+auth\b|\baws\s+configure\b|\bgcloud\s+auth\b|\bkubectl\s+config\s+set-credentials\b)/i;
const ACCESS_CONTROL_CONTENT_RE = /\b(?:grant\s+(?:select|insert|update|delete|all)\b|setfacl|chmod\s+[0-7]{3,4}\b)/i;
const ACCESS_CONTROL_WEAK_RE = /\b(?:rbac|access[-_]?control)\b/i;
test/treetrace.test.js +63 -0
@@ -22,6 +22,8 @@ import {
renderMemoryMarkdown,
isRiskyCommand,
mentionsTestSkip,
+ SECURITY_INTENT_PARTS,
+ RISKY_CMD_PARTS,
} from '../src/analyze.js';
import { main, parseArgs, wrapMermaidDoc } from '../src/cli.js';
import { mungePath } from '../src/discover.js';
@@ -1346,6 +1348,67 @@ test('security report: test-disable APIs and phrasing are detected', () => {
}
});
+test('regex decomposition: every RISKY_CMD named piece fires on its command family', () => {
+ const compose = (parts) => new RegExp(parts.map((p) => `(?:${p.re.source})`).join('|'), 'i');
+ const byName = new Map(RISKY_CMD_PARTS.map((p) => [p.name, p.re]));
+ const positives = {
+ rm_rf_combined: 'rm -rf build',
+ rm_r_then_f: 'rm -r -f build',
+ rm_f_then_r: 'rm -f -r build',
+ chmod_world_writable: 'chmod -R 777 dir',
+ curl_pipe_shell: 'curl https://x | sudo bash',
+ shell_process_substitution: 'bash <(curl https://x)',
+ no_verify: 'git commit --no-verify',
+ force: 'git push --force',
+ drop_table: 'DROP TABLE users',
+ drop_schema: 'drop schema public cascade',
+ truncate: 'TRUNCATE users',
+ };
+ for (const [name, cmd] of Object.entries(positives)) {
+ const re = byName.get(name);
+ assert.ok(re, `unknown piece ${name}`);
+ assert.ok(re.test(cmd), `piece ${name} missed its command: ${cmd}`);
+ }
+ assert.equal(RISKY_CMD_PARTS.length, Object.keys(positives).length, 'piece count drifted');
+ const composed = compose(RISKY_CMD_PARTS);
+ for (const cmd of [...Object.values(positives), 'rm -fr /tmp', 'chmod 0777 f']) {
+ assert.equal(composed.test(cmd), isRiskyCommand(cmd), `composed != isRiskyCommand for: ${cmd}`);
+ }
+ for (const benign of ['rm file.txt', 'chmod 644 file', 'ls -la', 'curl https://x > out.txt', '--force-with-lease']) {
+ assert.equal(composed.test(benign), isRiskyCommand(benign), `benign mismatch: ${benign}`);
+ assert.ok(!composed.test(benign), `benign over-flagged: ${benign}`);
+ }
+});
+
+test('regex decomposition: every SECURITY_INTENT named piece fires on its phrasing family', () => {
+ const compose = (parts) => new RegExp(parts.map((p) => `(?:${p.re.source})`).join('|'), 'i');
+ const byName = new Map(SECURITY_INTENT_PARTS.map((p) => [p.name, p.re]));
+ const positives = {
+ credential_lifecycle: 'please rotate the api key',
+ pat_lifecycle: 'the pat was rotated yesterday',
+ email_change: 'change the email to a public contact',
+ do_not_expose: 'never expose the token',
+ expose_us: 'this could expose us',
+ leak_list: 'audit for leak anything',
+ audit_repos: 'do a full audit of the repo',
+ commit_history_audit: 'the commit history needs an audit',
+ relicensing: 'relicense the project to MIT',
+ disable_tests: 'skip the auth test',
+ access_control_change: 'tighten the auth flow',
+ };
+ for (const [name, phrase] of Object.entries(positives)) {
+ const re = byName.get(name);
+ assert.ok(re, `unknown piece ${name}`);
+ assert.ok(re.test(phrase), `piece ${name} missed its phrase: ${phrase}`);
+ }
+ assert.equal(SECURITY_INTENT_PARTS.length, Object.keys(positives).length, 'piece count drifted');
+ const composed = compose(SECURITY_INTENT_PARTS);
+ for (const phrase of Object.values(positives)) assert.ok(composed.test(phrase), `composed missed: ${phrase}`);
+ for (const benign of ['a normal sentence about the weather', 'use the api carefully', 'email me later']) {
+ assert.ok(!composed.test(benign), `benign security phrasing over-flagged: ${benign}`);
+ }
+});
+
test('cli: value-taking options reject a missing value or a flag-shaped value', () => {
for (const args of [['--dir'], ['--out', '--redact-auto'], ['--report-file', '--quiet'], ['--from'], ['--since']]) {
assert.throws(() => parseArgs(args), /requires a value|requires at least|expects a date|unknown --from/, `expected ${JSON.stringify(args)} to throw`);