Zion Boggan
repos/Oversight/oversight_core/decoy.py
zionboggan.com ↗
217 lines · python
History for this file →
1
"""
2
oversight_core.decoy
3
===================
4
 
5
LLM-powered decoy document generator.
6
 
7
Generates N plausible-looking decoy files that sit alongside real sensitive
8
content. Every decoy is sealed for a "trap" recipient whose beacons all fire
9
when accessed. Any open of a decoy is a high-confidence signal of intrusion -
10
no legitimate user should touch them, because the decoys are filenames
11
engineered to be interesting to an attacker browsing.
12
 
13
This is the Thinkst canary pattern applied at scale with LLM-generated
14
realism. Recent research (SPADE 2025, HoneyGPT) shows this is an open area
15
with no strong commercial shipment.
16
 
17
Backend options (pick via `backend` arg or OVERSIGHT_DECOY_BACKEND env):
18
  - "ollama"   - POST to a local Ollama server (recommended)
19
  - "openai"   - OpenAI-compatible API (for testing)
20
  - "static"   - hardcoded templates (works offline; lowest quality)
21
 
22
Override the Ollama endpoint and model with the ``OLLAMA_URL`` and
23
``OVERSIGHT_DECOY_MODEL`` environment variables. Defaults target a
24
loopback Ollama install.
25
"""
26
 
27
from __future__ import annotations
28
 
29
import json
30
import os
31
import random
32
from dataclasses import dataclass
33
from typing import Optional
34
 
35
import httpx
36
 
37
 
38
DEFAULT_OLLAMA = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
39
DEFAULT_MODEL = os.environ.get("OVERSIGHT_DECOY_MODEL", "llama3.1:8b")
40
 
41
 
42
DEFAULT_DECOY_NAMES = [
43
    "Q4-board-deck-FINAL-v3.docx",
44
    "acquisition-targets-2026.xlsx",
45
    "legal-hold-privileged.pdf",
46
    "compensation-bands-confidential.xlsx",
47
    "incident-response-playbook-internal.docx",
48
    "vendor-contracts-summary.pdf",
49
    "cto-1on1-notes.docx",
50
    "layoff-planning-tier1.xlsx",
51
    "customer-churn-risk-2026.xlsx",
52
    "M&A-pipeline-confidential.pptx",
53
    "security-audit-findings-Q3.pdf",
54
    "api-keys-rotation-plan.txt",
55
    "lawsuit-draft-settlement.docx",
56
    "executive-bonus-structure.xlsx",
57
    "strategic-partnership-nda-drafts.pdf",
58
]
59
 
60
 
61
DECOY_SYSTEM_PROMPT = """You are a corporate document generator for a security
62
research system. You produce plausible-looking but entirely fictional business
63
documents that will be used as decoys in an intrusion-detection system. All
64
names, numbers, and claims must be invented - never use real company names,
65
real people, or real data. The goal is realism of form, not content.
66
 
67
Rules:
68
- All dollar figures are fake.
69
- All people are fictional (use generic names like "A. Smith", "J. Chen").
70
- All company names are fake (use "Acme Industries", "Meridian Partners").
71
- Avoid dates in the near past (the document should look "current" as of 2026).
72
- Tone: dry, corporate, slightly bureaucratic. No irony.
73
- Length: 250-600 words for text documents.
74
"""
75
 
76
 
77
@dataclass
78
class DecoyRequest:
79
    """A request to generate one decoy."""
80
    filename: str
81
    topic_hint: str
82
    context: Optional[str] = None
83
 
84
 
85
def _prompt_for(req: DecoyRequest) -> str:
86
    ctx = f"\nOrganizational context: {req.context}" if req.context else ""
87
    return (
88
        f"Produce a realistic but entirely fictional document that would "
89
        f"plausibly be saved as the filename '{req.filename}'. The topic is: "
90
        f"{req.topic_hint}.{ctx}\n\n"
91
        f"Write the full document body. No preamble, no meta-commentary. "
92
        f"Begin the document directly."
93
    )
94
 
95
 
96
def _topic_from_filename(name: str) -> str:
97
    """Heuristic: guess topic from filename when not otherwise specified."""
98
    n = name.lower()
99
    if "board" in n or "deck" in n:
100
        return "quarterly board meeting update"
101
    if "acquisition" in n or "m&a" in n or "pipeline" in n:
102
        return "shortlist of acquisition targets with preliminary valuations"
103
    if "legal" in n or "lawsuit" in n:
104
        return "legal memo with privileged work-product notation"
105
    if "comp" in n or "bonus" in n or "bands" in n:
106
        return "executive compensation band summary"
107
    if "incident" in n or "playbook" in n:
108
        return "internal incident response playbook"
109
    if "audit" in n or "findings" in n:
110
        return "internal security audit findings summary"
111
    if "api" in n or "key" in n:
112
        return "API key rotation plan with endpoint references"
113
    if "layoff" in n:
114
        return "workforce reduction planning notes"
115
    if "churn" in n:
116
        return "customer churn risk analysis"
117
    if "partnership" in n or "nda" in n:
118
        return "strategic partnership NDA draft negotiation notes"
119
    if "1on1" in n or "notes" in n:
120
        return "executive one-on-one meeting notes"
121
    if "vendor" in n or "contract" in n:
122
        return "vendor contract summary with renewal dates"
123
    return "internal business memo"
124
 
125
 
126
 
127
def _generate_ollama(
128
    req: DecoyRequest,
129
    ollama_url: str = DEFAULT_OLLAMA,
130
    model: str = DEFAULT_MODEL,
131
    timeout: float = 120.0,
132
) -> str:
133
    prompt = _prompt_for(req)
134
    r = httpx.post(
135
        f"{ollama_url.rstrip('/')}/api/generate",
136
        json={
137
            "model": model,
138
            "prompt": prompt,
139
            "system": DECOY_SYSTEM_PROMPT,
140
            "stream": False,
141
            "options": {"temperature": 0.8, "top_p": 0.9, "num_predict": 800},
142
        },
143
        timeout=timeout,
144
    )
145
    r.raise_for_status()
146
    return r.json()["response"]
147
 
148
 
149
def _generate_static(req: DecoyRequest) -> str:
150
    """Offline fallback. Good enough for testing; not production."""
151
    lines = [
152
        f"INTERNAL - {req.filename}",
153
        f"Topic: {req.topic_hint}",
154
        "",
155
        "Summary",
156
        "-------",
157
        f"This document covers the {req.topic_hint}. It is distributed to a",
158
        "limited group and should not be shared externally. Figures cited below",
159
        "are preliminary and subject to revision.",
160
        "",
161
        "Key points",
162
        "----------",
163
        "- Reviewed by: A. Smith, J. Chen",
164
        "- Next review: Q3 2026",
165
        "- Distribution: executive leadership only",
166
        "- Classification: CONFIDENTIAL - RESTRICTED",
167
        "",
168
        "Background",
169
        "----------",
170
    ]
171
    for i in range(30):
172
        lines.append(
173
            f"Paragraph {i+1}: standard corporate filler content for the "
174
            f"{req.topic_hint} topic, written to give plausible body to a "
175
            f"decoy document."
176
        )
177
    return "\n".join(lines)
178
 
179
 
180
def generate_decoy(
181
    req: DecoyRequest,
182
    backend: str = None,
183
    ollama_url: str = DEFAULT_OLLAMA,
184
    model: str = DEFAULT_MODEL,
185
) -> str:
186
    """Generate a single decoy document body. Returns the text content."""
187
    backend = backend or os.environ.get("OVERSIGHT_DECOY_BACKEND", "ollama")
188
 
189
    try:
190
        if backend == "ollama":
191
            return _generate_ollama(req, ollama_url=ollama_url, model=model)
192
    except Exception as e:
193
        print(f"[decoy] backend '{backend}' failed ({e}); falling back to static")
194
 
195
    return _generate_static(req)
196
 
197
 
198
def generate_decoy_set(
199
    n: int = 5,
200
    filenames: Optional[list[str]] = None,
201
    context: Optional[str] = None,
202
    backend: str = None,
203
) -> list[tuple[str, str]]:
204
    """
205
    Generate N decoys. Returns list of (filename, body) tuples.
206
    """
207
    names = filenames or random.sample(DEFAULT_DECOY_NAMES, min(n, len(DEFAULT_DECOY_NAMES)))
208
    out = []
209
    for name in names[:n]:
210
        req = DecoyRequest(
211
            filename=name,
212
            topic_hint=_topic_from_filename(name),
213
            context=context,
214
        )
215
        body = generate_decoy(req, backend=backend)
216
        out.append((name, body))
217
    return out