| @@ -56,6 +56,9 @@ Options: | ||
| --security print a security-focused report and write hallucinations.json | ||
| --mcp start a read-only MCP server over stdio (same as: treetrace mcp) | ||
| --redact-auto redact every detected secret without prompting | ||
| + | --keep-git-shas keep git object hashes (40/64-hex in a git context) instead of | |
| + | redacting them as generic hex tokens; opt-in, still fail-closed | |
| + | for any value that also matches a named secret rule | |
| --since <YYYY-MM-DD> only include sessions active on/after this date | ||
| (timestamped sessions only; plain transcripts are excluded) | ||
| --quiet suppress progress output | ||
| @@ -263,9 +266,10 @@ export async function loadRedactedTree(opts, projectDir, projectName, log = () = | ||
| } | ||
| const interactive = !forceAuto && process.stdin.isTTY && process.stderr.isTTY && !opts.redactAuto; | ||
| - | const { decisions, asked, autoRedacted, overriddenKeeps } = await resolveFindings(findings, priorDecisions, { | |
| + | const { decisions, asked, autoRedacted, overriddenKeeps, autoKeptGitShas } = await resolveFindings(findings, priorDecisions, { | |
| interactive, | ||
| autoRedact: forceAuto || opts.redactAuto, | ||
| + | keepGitShas: opts.keepGitShas, | |
| }); | ||
| if (overriddenKeeps) { | ||
| log( | ||
| @@ -281,6 +285,9 @@ export async function loadRedactedTree(opts, projectDir, projectName, log = () = | ||
| ) | ||
| ); | ||
| } | ||
| + | if (autoKeptGitShas) { | |
| + | log(c.dim(`kept ${plural(autoKeptGitShas, 'git object hash')} as non-secret (--keep-git-shas)`)); | |
| + | } | |
| for (const node of tree.nodes) { | ||
| const before = node.text; | ||
| @@ -510,6 +517,7 @@ export function parseArgs(argv) { | ||
| mcp: false, | ||
| titlesOnly: false, | ||
| redactAuto: false, | ||
| + | keepGitShas: false, | |
| quiet: false, | ||
| help: false, | ||
| version: false, | ||
| @@ -552,6 +560,7 @@ export function parseArgs(argv) { | ||
| case 'mcp': case '--mcp': opts.mcp = true; break; | ||
| case '--titles-only': opts.titlesOnly = true; break; | ||
| case '--redact-auto': opts.redactAuto = true; break; | ||
| + | case '--keep-git-shas': opts.keepGitShas = true; break; | |
| case '--quiet': opts.quiet = true; break; | ||
| case '--help': case '-h': opts.help = true; break; | ||
| case '--version': case '-v': opts.version = true; break; |
| @@ -85,6 +85,19 @@ function findOversizedRuns(text) { | ||
| return runs; | ||
| } | ||
| + | const GIT_SHA_LENGTHS = new Set([40, 64]); | |
| + | ||
| + | export function isGitShaCandidate(match, text, index) { | |
| + | if (!match || !GIT_SHA_LENGTHS.has(match.length)) return false; | |
| + | if (!/^[0-9a-fA-F]+$/.test(match)) return false; | |
| + | const before = text.slice(Math.max(0, index - 48), index); | |
| + | if (/\b(?:commit|tree|parent|object|merge|ref|refs|origin|HEAD|tag|blob|cherry|rebase|bisect|stash)\b[\s:./-]*$/i.test(before)) { | |
| + | return true; | |
| + | } | |
| + | const atLineStart = index === 0 || text[index - 1] === '\n'; | |
| + | return atLineStart && text[index + match.length] === ' '; | |
| + | } | |
| + | ||
| export function scanText(text) { | ||
| const oversized = text.length > MAX_TOKEN_LEN ? findOversizedRuns(text) : []; | ||
| let scanInput = text; | ||
| @@ -110,12 +123,14 @@ export function scanText(text) { | ||
| rule.re.lastIndex = 0; | ||
| let m; | ||
| while ((m = rule.re.exec(scanInput)) !== null) { | ||
| - | findings.push({ | |
| + | const finding = { | |
| ruleId: rule.id, | ||
| severity: rule.severity, | ||
| match: m[0], | ||
| index: m.index, | ||
| - | }); | |
| + | }; | |
| + | if (rule.id === 'hex-token') finding.gitShaCandidate = isGitShaCandidate(m[0], scanInput, m.index); | |
| + | findings.push(finding); | |
| if (m.index === rule.re.lastIndex) rule.re.lastIndex++; | ||
| } | ||
| } | ||
| @@ -173,7 +188,7 @@ export function maskFor(finding) { | ||
| return `[REDACTED:${finding.ruleId}]`; | ||
| } | ||
| - | export async function resolveFindings(findings, priorDecisions, { interactive, autoRedact }) { | |
| + | export async function resolveFindings(findings, priorDecisions, { interactive, autoRedact, keepGitShas = false } = {}) { | |
| const decisions = { ...priorDecisions }; | ||
| const unique = new Map(); | ||
| for (const f of findings) { | ||
| @@ -182,12 +197,25 @@ export async function resolveFindings(findings, priorDecisions, { interactive, a | ||
| unique.get(h).count++; | ||
| } | ||
| + | let autoKeptGitShas = 0; | |
| + | if (keepGitShas) { | |
| + | const highHashes = new Set(); | |
| + | for (const f of findings) if (f.severity === 'high') highHashes.add(sha256(f.match)); | |
| + | for (const [h, { finding }] of unique) { | |
| + | if (finding.gitShaCandidate && !decisions[h] && !highHashes.has(h)) { | |
| + | decisions[h] = { action: 'keep', ruleId: 'git-commit-sha' }; | |
| + | autoKeptGitShas++; | |
| + | } | |
| + | } | |
| + | } | |
| + | ||
| const autoMode = !interactive || autoRedact; | ||
| let overriddenKeeps = 0; | ||
| if (autoMode) { | ||
| for (const [h, { finding }] of unique) { | ||
| const prior = decisions[h]; | ||
| if (prior && prior.action === 'keep' && (finding.severity === 'high' || finding.severity === 'medium')) { | ||
| + | if (keepGitShas && finding.gitShaCandidate) continue; | |
| delete decisions[h]; | ||
| overriddenKeeps++; | ||
| } | ||
| @@ -195,13 +223,13 @@ export async function resolveFindings(findings, priorDecisions, { interactive, a | ||
| } | ||
| const unresolved = [...unique.entries()].filter(([h]) => !decisions[h]); | ||
| - | if (!unresolved.length) return { decisions, asked: 0, overriddenKeeps }; | |
| + | if (!unresolved.length) return { decisions, asked: 0, overriddenKeeps, autoKeptGitShas }; | |
| if (autoMode) { | ||
| for (const [h, { finding }] of unresolved) { | ||
| decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId }; | ||
| } | ||
| - | return { decisions, asked: 0, autoRedacted: unresolved.length, overriddenKeeps }; | |
| + | return { decisions, asked: 0, autoRedacted: unresolved.length, overriddenKeeps, autoKeptGitShas }; | |
| } | ||
| const rl = createInterface({ input: process.stdin, output: process.stderr }); | ||
| @@ -236,7 +264,7 @@ export async function resolveFindings(findings, priorDecisions, { interactive, a | ||
| } | ||
| } | ||
| rl.close(); | ||
| - | return { decisions, asked: unresolved.length }; | |
| + | return { decisions, asked: unresolved.length, autoKeptGitShas }; | |
| } | ||
| export function applyDecisions(text, findings, decisions) { |
| @@ -8,7 +8,7 @@ import { dirname, join } from 'node:path'; | ||
| import { parseSessionFile, parsePlainTranscript, classifySpecialUserText } from '../src/parse.js'; | ||
| import { classifyPrompts } from '../src/extract.js'; | ||
| import { buildTree } from '../src/tree.js'; | ||
| - | import { scanText, applyDecisions, shadowScan, maskFor, resolveFindings } from '../src/redact.js'; | |
| + | import { scanText, applyDecisions, shadowScan, maskFor, resolveFindings, isGitShaCandidate } from '../src/redact.js'; | |
| import { renderMarkdown, promptPack } from '../src/render-md.js'; | ||
| import { renderMermaid, isSummaryByDefault, SUMMARY_NODE_THRESHOLD } from '../src/render-mermaid.js'; | ||
| import { renderJson } from '../src/render-json.js'; | ||
| @@ -201,6 +201,41 @@ test('redaction: uuids and long lowercase identifiers are not flagged as high-en | ||
| } | ||
| }); | ||
| + | test('redaction: git object hashes are classified as candidates only in a git context', () => { | |
| + | const sha1 = '0123456789abcdef0123456789abcdef01234567'; | |
| + | const sha256hex = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'; | |
| + | assert.ok(isGitShaCandidate(sha1, `commit ${sha1}`, 7), 'commit <sha1> should be a candidate'); | |
| + | assert.ok(isGitShaCandidate(sha256hex, `git tree ${sha256hex}`, 9), 'git tree <sha256> should be a candidate'); | |
| + | assert.ok(isGitShaCandidate(sha1, `${sha1} fix the parser\n`, 0), 'oneline sha should be a candidate'); | |
| + | assert.ok(!isGitShaCandidate(sha1, `token=${sha1} end`, 6), 'token= context is not git'); | |
| + | assert.ok(!isGitShaCandidate(sha256hex, `session_hex=${sha256hex}`, 12), 'session_hex= context is not git'); | |
| + | assert.ok(!isGitShaCandidate('0123456789abcdef0123456789abcdef', `commit ${'0123456789abcdef0123456789abcdef'}`, 7), '32-hex is not a git object id'); | |
| + | }); | |
| + | ||
| + | test('redaction: --keep-git-shas keeps git hashes but stays fail-closed for other hex', async () => { | |
| + | const sha1 = '0123456789abcdef0123456789abcdef01234567'; | |
| + | const secret = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'; | |
| + | const text = `commit ${sha1}\nmy key is session_hex=${secret} ok`; | |
| + | const findings = scanText(text); | |
| + | const git = findings.find((f) => f.match === sha1); | |
| + | const sec = findings.find((f) => f.match === secret); | |
| + | assert.ok(git && git.gitShaCandidate, 'git sha must be flagged as a candidate'); | |
| + | assert.ok(sec && !sec.gitShaCandidate, 'session_hex secret must NOT be a git candidate'); | |
| + | ||
| + | const { decisions } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true, keepGitShas: true }); | |
| + | assert.equal(decisions[sha256(sha1)].action, 'keep', 'git object hash should be kept'); | |
| + | assert.equal(decisions[sha256(sha1)].ruleId, 'git-commit-sha', 'kept under git-commit-sha rule'); | |
| + | assert.equal(decisions[sha256(secret)].action, 'redact', 'non-git hex must still be redacted'); | |
| + | ||
| + | const { decisions: d2 } = await resolveFindings(findings, {}, { interactive: false, autoRedact: true }); | |
| + | assert.equal(d2[sha256(sha1)].action, 'redact', 'default must redact git sha too (fail-closed)'); | |
| + | ||
| + | const cleaned = applyDecisions(text, findings, decisions); | |
| + | assert.ok(cleaned.includes(sha1), 'kept git sha should survive in output'); | |
| + | assert.ok(!cleaned.includes(secret), 'non-git secret must be redacted'); | |
| + | assert.equal(shadowScan(cleaned, decisions).length, 0, 'shadow scan must be clean after keep + redact'); | |
| + | }); | |
| + | ||
| test('redaction: end-to-end hex secret leaves no raw hex in any artifact', async () => { | ||
| const lower = '6881f8290266f4cc939959917f893a2a88787eb24bbcb6b9c37594c72bf448c3'; | ||
| const upper = lower.toUpperCase(); |