Zion Boggan
repos/TreeTrace/src/redact.js
zionboggan.com ↗
317 lines · javascript
History for this file →
1
import { createInterface } from 'node:readline/promises';
2
import { sha256, shannonEntropy, truncate, c } from './util.js';
3
 
4
export const RULES = [
5
 
6
  { id: 'private-key-block', severity: 'high', re: /-----BEGIN [A-Z ]*PRIVATE KEY( BLOCK)?-----[\s\S]*?(-----END [A-Z ]*PRIVATE KEY( BLOCK)?-----|$)/g },
7
  { id: 'aws-access-key', severity: 'high', re: /\b(AKIA|ASIA)[0-9A-Z]{16}\b/g },
8
  { id: 'github-token', severity: 'high', re: /\b(ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}\b/g },
9
  { id: 'github-fine-grained', severity: 'high', re: /\bgithub_pat_[A-Za-z0-9_]{22,}\b/g },
10
  { id: 'gitlab-token', severity: 'high', re: /\bglpat-[0-9a-zA-Z_-]{20,}\b/g },
11
  { id: 'anthropic-key', severity: 'high', re: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/g },
12
  { id: 'openai-key', severity: 'high', re: /\bsk-(?!ant-)[A-Za-z0-9_-]{20,}\b/g },
13
  { id: 'slack-token', severity: 'high', re: /\bxox[baprs]-[0-9A-Za-z-]{10,}\b/g },
14
  { id: 'stripe-live-key', severity: 'high', re: /\b[sr]k_live_[0-9a-zA-Z]{10,}\b/g },
15
  { id: 'npm-token', severity: 'high', re: /\bnpm_[A-Za-z0-9]{36}\b/g },
16
  { id: 'tailscale-key', severity: 'high', re: /\btskey-[a-zA-Z0-9-]{10,}\b/g },
17
  { id: 'google-api-key', severity: 'high', re: /\bAIza[0-9A-Za-z_-]{35}\b/g },
18
  { id: 'sendgrid-key', severity: 'high', re: /\bSG\.[A-Za-z0-9_-]{16,32}\.[A-Za-z0-9_-]{16,64}\b/g },
19
  { id: 'twilio-key', severity: 'high', re: /\bSK[0-9a-fA-F]{32}\b/g },
20
  { id: 'telegram-bot-token', severity: 'high', re: /\b\d{8,10}:AA[A-Za-z0-9_-]{32,33}\b/g },
21
  { id: 'discord-webhook', severity: 'high', re: /https:\/\/(?:ptb\.|canary\.)?discord(?:app)?\.com\/api\/webhooks\/\d+\/[A-Za-z0-9_-]+/g },
22
  { id: 'jwt', severity: 'high', re: /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{5,}\b/g },
23
 
24
  { id: 'hex-token', severity: 'medium', re: /\b[0-9a-fA-F]{32,512}\b/g },
25
  { id: 'wireguard-key', severity: 'medium', re: /\b(PrivateKey|PresharedKey)\s*=\s*[A-Za-z0-9+/]{42,44}=?/g },
26
  { id: 'url-basic-auth', severity: 'medium', re: /\b[a-z][a-z0-9+.-]{0,30}:\/\/[^/\s:@'"`]{2,256}:[^/\s@'"`]{2,256}@[^\s'"`]{1,512}/gi },
27
  { id: 'bearer-header', severity: 'medium', re: /\bBearer\s+[A-Za-z0-9._+/=-]{20,}\b/gi },
28
  { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.){4,512}"|'(?:[^'\\]|\\.){4,512}'|`(?:[^`\\]|\\.){4,512}`|[^\s'"`,;){}]{6,512})/gi },
29
  { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.)*?\\.(?:[^"\\]|\\.)*?"|'(?:[^'\\]|\\.)*?\\.(?:[^'\\]|\\.)*?'|`(?:[^`\\]|\\.)*?\\.(?:[^`\\]|\\.)*?`)/gi },
30
 
31
  { id: 'email', severity: 'soft', re: /\b[A-Za-z0-9._%+-]+@(?!(?:users\.noreply\.github\.com|example\.(?:com|org)))[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g },
32
  { id: 'ipv4', severity: 'soft', re: /\b(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\b(?!\.\d)/g },
33
  { id: 'home-dir-username', severity: 'soft', re: /(?:\/(?:home|Users)\/|C:\\Users\\)([A-Za-z][A-Za-z0-9._-]{2,30})\b/g },
34
];
35
 
36
const HEX_RE = /^[0-9a-fA-F]+$/;
37
const ENTROPY_CANDIDATE_RE = /\b[A-Za-z0-9+/_=-]{32,4096}\b/g;
38
const MAX_TOKEN_LEN = 4096;
39
const TOKEN_CHAR_RE = /[A-Za-z0-9+/_=-]/;
40
const VERSION_LIKE_RE = /^\d+[.\d-]*$/;
41
const JOIN_SEPARATOR_RE = /[\s\u200B-\u200D\uFEFF]/;
42
const JOINED_SCAN_RULE_IDS = new Set([
43
  'aws-access-key',
44
  'github-token',
45
  'github-fine-grained',
46
  'gitlab-token',
47
  'anthropic-key',
48
  'openai-key',
49
  'slack-token',
50
  'stripe-live-key',
51
  'npm-token',
52
  'tailscale-key',
53
  'google-api-key',
54
  'sendgrid-key',
55
  'twilio-key',
56
  'telegram-bot-token',
57
  'jwt',
58
]);
59
 
60
const LOOSE_RULES = RULES.filter((r) => JOINED_SCAN_RULE_IDS.has(r.id)).map((r) => ({
61
  id: r.id,
62
  severity: r.severity,
63
  re: new RegExp(
64
    r.re.source.replace(/^\\b/, '').replace(/\\b$/, '').replace(/\{(\d+),\}/g, '{$1,128}'),
65
    'g'
66
  ),
67
}));
68
 
69
function findOversizedRuns(text) {
70
  const runs = [];
71
  let start = -1;
72
  for (let i = 0; i <= text.length; i++) {
73
    const isTok = i < text.length && TOKEN_CHAR_RE.test(text[i]);
74
    if (isTok) {
75
      if (start === -1) start = i;
76
    } else if (start !== -1) {
77
      if (i - start > MAX_TOKEN_LEN) runs.push([start, i]);
78
      start = -1;
79
    }
80
  }
81
  return runs;
82
}
83
 
84
const GIT_SHA_LENGTHS = new Set([40, 64]);
85
 
86
export function isGitShaCandidate(match, text, index) {
87
  if (!match || !GIT_SHA_LENGTHS.has(match.length)) return false;
88
  if (!/^[0-9a-fA-F]+$/.test(match)) return false;
89
  const before = text.slice(Math.max(0, index - 48), index);
90
  if (/\b(?:commit|tree|parent|object|merge|ref|refs|origin|HEAD|tag|blob|cherry|rebase|bisect|stash)\b[\s:./-]*$/i.test(before)) {
91
    return true;
92
  }
93
  const atLineStart = index === 0 || text[index - 1] === '\n';
94
  return atLineStart && text[index + match.length] === ' ';
95
}
96
 
97
export function scanText(text) {
98
  const oversized = text.length > MAX_TOKEN_LEN ? findOversizedRuns(text) : [];
99
  let scanInput = text;
100
  if (oversized.length) {
101
    const chars = text.split('');
102
    for (const [s, e] of oversized) {
103
      for (let i = s; i < e; i++) chars[i] = '\n';
104
    }
105
    scanInput = chars.join('');
106
  }
107
 
108
  const findings = [];
109
  for (const [s, e] of oversized) {
110
    findings.push({
111
      ruleId: 'oversized-token',
112
      severity: 'medium',
113
      match: text.slice(s, e),
114
      index: s,
115
    });
116
  }
117
 
118
  for (const rule of RULES) {
119
    rule.re.lastIndex = 0;
120
    let m;
121
    while ((m = rule.re.exec(scanInput)) !== null) {
122
      const finding = {
123
        ruleId: rule.id,
124
        severity: rule.severity,
125
        match: m[0],
126
        index: m.index,
127
      };
128
      if (rule.id === 'hex-token') finding.gitShaCandidate = isGitShaCandidate(m[0], scanInput, m.index);
129
      findings.push(finding);
130
      if (m.index === rule.re.lastIndex) rule.re.lastIndex++;
131
    }
132
  }
133
 
134
  const seenSpans = findings.map((f) => [f.index, f.index + f.match.length]);
135
  ENTROPY_CANDIDATE_RE.lastIndex = 0;
136
  let m;
137
  while ((m = ENTROPY_CANDIDATE_RE.exec(scanInput)) !== null) {
138
    const tok = m[0];
139
    if (HEX_RE.test(tok) || VERSION_LIKE_RE.test(tok)) continue;
140
    const classes = (/[A-Z]/.test(tok) ? 1 : 0) + (/[a-z]/.test(tok) ? 1 : 0) + (/[0-9]/.test(tok) ? 1 : 0);
141
    if (classes < 2) continue;
142
    if (shannonEntropy(tok) < 4.4) continue;
143
    const start = m.index;
144
    if (seenSpans.some(([s, e]) => start >= s && start < e)) continue;
145
    findings.push({ ruleId: 'high-entropy-token', severity: 'medium', match: tok, index: start });
146
  }
147
 
148
  findings.push(...scanJoinedProviderTokens(scanInput, findings, text));
149
  return findings;
150
}
151
 
152
function scanJoinedProviderTokens(scanInput, existing, original = scanInput) {
153
  const chars = [];
154
  const indexMap = [];
155
  for (let i = 0; i < scanInput.length; i++) {
156
    if (JOIN_SEPARATOR_RE.test(scanInput[i])) continue;
157
    chars.push(scanInput[i]);
158
    indexMap.push(i);
159
  }
160
  if (chars.length === scanInput.length) return [];
161
 
162
  const joined = chars.join('');
163
  const existingSpans = existing.map((f) => [f.index, f.index + f.match.length]);
164
  const findings = [];
165
  for (const rule of LOOSE_RULES) {
166
    rule.re.lastIndex = 0;
167
    let m;
168
    while ((m = rule.re.exec(joined)) !== null) {
169
      if (m[0].length <= 256) {
170
        const start = indexMap[m.index];
171
        const end = indexMap[m.index + m[0].length - 1] + 1;
172
        const slice = original.slice(start, end);
173
        if (JOIN_SEPARATOR_RE.test(slice) && !existingSpans.some(([s, e]) => start >= s && start < e)) {
174
          findings.push({ ruleId: rule.id, severity: rule.severity, match: slice, index: start });
175
        }
176
      }
177
      if (m.index === rule.re.lastIndex) rule.re.lastIndex++;
178
    }
179
  }
180
  return findings;
181
}
182
 
183
export function maskFor(finding) {
184
  return `[REDACTED:${finding.ruleId}]`;
185
}
186
 
187
export async function resolveFindings(findings, priorDecisions, { interactive, autoRedact, keepGitShas = false } = {}) {
188
  const decisions = { ...priorDecisions };
189
  const unique = new Map();
190
  for (const f of findings) {
191
    const h = sha256(f.match);
192
    if (!unique.has(h)) unique.set(h, { finding: f, count: 0 });
193
    unique.get(h).count++;
194
  }
195
 
196
  let autoKeptGitShas = 0;
197
  if (keepGitShas) {
198
    const highHashes = new Set();
199
    for (const f of findings) if (f.severity === 'high') highHashes.add(sha256(f.match));
200
    for (const [h, { finding }] of unique) {
201
      if (finding.gitShaCandidate && !decisions[h] && !highHashes.has(h)) {
202
        decisions[h] = { action: 'keep', ruleId: 'git-commit-sha' };
203
        autoKeptGitShas++;
204
      }
205
    }
206
  }
207
 
208
  const autoMode = !interactive || autoRedact;
209
  let overriddenKeeps = 0;
210
  if (autoMode) {
211
    for (const [h, { finding }] of unique) {
212
      const prior = decisions[h];
213
      if (prior && prior.action === 'keep' && (finding.severity === 'high' || finding.severity === 'medium')) {
214
        if (keepGitShas && finding.gitShaCandidate) continue;
215
        delete decisions[h];
216
        overriddenKeeps++;
217
      }
218
    }
219
  }
220
 
221
  const unresolved = [...unique.entries()].filter(([h]) => !decisions[h]);
222
  if (!unresolved.length) return { decisions, asked: 0, overriddenKeeps, autoKeptGitShas };
223
 
224
  if (autoMode) {
225
    for (const [h, { finding }] of unresolved) {
226
      decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId };
227
    }
228
    return { decisions, asked: 0, autoRedacted: unresolved.length, overriddenKeeps, autoKeptGitShas };
229
  }
230
 
231
  const rl = createInterface({ input: process.stdin, output: process.stderr });
232
  process.stderr.write(
233
    `\n${c.bold(`${unresolved.length} potential secret${unresolved.length === 1 ? '' : 's'} found`)}. Nothing is exported until each is resolved.\n\n`
234
  );
235
  let i = 0;
236
  for (const [h, { finding, count }] of unresolved) {
237
    i++;
238
    const sev =
239
      finding.severity === 'high' ? c.red(finding.severity)
240
      : finding.severity === 'medium' ? c.yellow(finding.severity)
241
      : c.gray(finding.severity);
242
    process.stderr.write(
243
      `${c.dim(`[${i}/${unresolved.length}]`)} ${sev} ${c.bold(finding.ruleId)} ×${count}\n    ${c.cyan(truncate(finding.match, 72))}\n`
244
 
245
    );
246
    let answer;
247
    for (;;) {
248
      answer = (await rl.question(`    ${c.bold('[r]')}edact  ${c.bold('[k]')}eep  ${c.bold('[e]')}dit replacement › `))
249
        .trim()
250
        .toLowerCase();
251
      if (['r', 'k', 'e', 'redact', 'keep', 'edit', ''].includes(answer)) break;
252
    }
253
    if (answer === 'k' || answer === 'keep') {
254
      decisions[h] = { action: 'keep', ruleId: finding.ruleId };
255
    } else if (answer === 'e' || answer === 'edit') {
256
      const replacement = (await rl.question('    replacement text › ')).trim() || maskFor(finding);
257
      decisions[h] = { action: 'redact', replacement, ruleId: finding.ruleId };
258
    } else {
259
      decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId };
260
    }
261
  }
262
  rl.close();
263
  return { decisions, asked: unresolved.length, autoKeptGitShas };
264
}
265
 
266
export function applyDecisions(text, findings, decisions) {
267
  const toRedact = new Map();
268
  for (const f of findings) {
269
    const d = decisions[sha256(f.match)];
270
    if (d && d.action === 'redact') {
271
      toRedact.set(f.match, d.replacement || maskFor(f));
272
    }
273
  }
274
  let out = text;
275
 
276
  for (const [original, replacement] of [...toRedact.entries()].sort(
277
    (a, b) => b[0].length - a[0].length
278
  )) {
279
    out = out.split(original).join(replacement);
280
  }
281
  return out;
282
}
283
 
284
export function shadowScan(renderedText, decisions) {
285
  const leaks = [];
286
  for (const f of scanText(renderedText)) {
287
    if (f.severity === 'soft') continue;
288
    const d = decisions[sha256(f.match)];
289
    if (d && d.action === 'keep') continue;
290
    if (f.match.startsWith('[REDACTED:')) continue;
291
    leaks.push(f);
292
  }
293
  return leaks;
294
}
295
 
296
export function patchResiduals(text, decisions) {
297
  const leaks = shadowScan(text, decisions);
298
  if (!leaks.length) return text;
299
 
300
  for (const f of leaks) {
301
    const h = sha256(f.match);
302
    if (!decisions[h]) {
303
      decisions[h] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId };
304
    }
305
  }
306
 
307
  let out = applyDecisions(text, leaks, decisions);
308
 
309
  const residual = shadowScan(out, decisions);
310
  if (residual.length) {
311
    throw new Error(
312
      `patchResiduals: ${residual.length} leak(s) remain after auto-redaction ` +
313
        `(${[...new Set(residual.map((l) => l.ruleId))].join(', ')}). Refusing to write.`
314
    );
315
  }
316
  return out;
317
}