├── .githooks
    └── pre-commit
├── .github
    └── workflows
    │   ├── lint.yml
    │   ├── smoke.yml
    │   └── unit.yml
├── .gitignore
├── LICENSE
├── README.md
├── pyproject.toml
├── reddacted
    ├── __init__.py
    ├── api
    │   ├── __init__.py
    │   ├── api.py
    │   ├── list_models.py
    │   ├── reddit.py
    │   └── scraper.py
    ├── cli.py
    ├── cli_config.py
    ├── llm_detector.py
    ├── pii_detector.py
    ├── results.py
    ├── sentiment.py
    ├── styles.py
    ├── textual_cli.py
    ├── textual_ui.py
    ├── ui
    │   ├── __init__.py
    │   ├── comment_actions.py
    │   └── details_screen.py
    ├── utils
    │   ├── __init__.py
    │   ├── analysis.py
    │   ├── base.py
    │   ├── log_handler.py
    │   ├── panels.py
    │   ├── progress.py
    │   ├── report.py
    │   └── tables.py
    └── version.py
├── release.sh
├── requirements-dev.txt
├── requirements.txt
└── tests
    ├── __init__.py
    ├── templates
        └── reddit
        │   ├── article.json
        │   └── user.json
    ├── test_llm.py
    ├── test_scraper.py
    └── test_sentiment.py


/.githooks/pre-commit:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Remove trailing whitespace from all staged files
 3 | 
 4 | # Get list of staged files
 5 | staged_files=$(git diff --cached --name-only --diff-filter=ACMR | sed 's| |\\ |g')
 6 | 
 7 | # Remove trailing whitespace for each file
 8 | for file in $staged_files; do
 9 |     # Only process text files
10 |     if file --mime-encoding "$file" | grep -qE '(us-ascii|utf-8)'; then
11 |         # Portable whitespace removal using awk
12 |         awk '{ sub(/[ \t]+$/, ""); print }' "$file" > "$file.tmp" &&
13 |         mv "$file.tmp" "$file"
14 |     fi
15 | done
16 | 
17 | # Re-add modified files to staging
18 | git add -u
19 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: "Lint"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, master ]
 6 |   pull_request:
 7 |     branches: [ main, master ]
 8 |   merge_group:
 9 | 
10 | jobs:
11 |   lint:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v3
15 |     - name: Set up Python 3.12
16 |       uses: actions/setup-python@v4
17 |       with:
18 |         python-version: "3.12"
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install flake8 pytest
23 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
24 |     - name: Lint with flake8
25 |       run: |
26 |         # stop the build if there are Python syntax errors or undefined names
27 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
28 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
29 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
30 | 


--------------------------------------------------------------------------------
/.github/workflows/smoke.yml:
--------------------------------------------------------------------------------
 1 | name: "Smoke Tests"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, master ]
 6 |   pull_request:
 7 |     branches: [ main, master ]
 8 |   merge_group:
 9 | 
10 | jobs:
11 |   smoke-tests:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v4
15 |     - name: Set up Python 3.12
16 |       uses: actions/setup-python@v5
17 |       with:
18 |         python-version: "3.12"
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install uv
23 |         pip install -e ".[dev]"
24 |     - name: Run tests
25 |       run: |
26 |         pytest tests
27 | 


--------------------------------------------------------------------------------
/.github/workflows/unit.yml:
--------------------------------------------------------------------------------
 1 | name: "Unit Tests"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, master ]
 6 |   pull_request:
 7 |     branches: [ main, master ]
 8 |   merge_group:
 9 | 
10 | jobs:
11 |   unit-tests:
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.11", "3.12"]
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v4
19 |     - name: Set up Python ${{ matrix.python-version }}
20 |       uses: actions/setup-python@v5
21 |       with:
22 |         python-version: ${{ matrix.python-version }}
23 |         cache: 'pip'
24 | 
25 |     - name: Install package and dependencies
26 |       run: |
27 |         python -m pip install --upgrade pip
28 |         pip install -e ".[dev]"
29 |         pip install pytest-asyncio
30 | 
31 |     - name: Run tests
32 |       run: |
33 |         pytest tests/ -v
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Virtual Environment
24 | .env
25 | .venv
26 | env/
27 | venv/
28 | ENV/
29 | 
30 | # IDE
31 | .idea/
32 | .vscode/
33 | *.swp
34 | *.swo
35 | 
36 | # Testing
37 | .coverage
38 | htmlcov/
39 | .pytest_cache/
40 | .mypy_cache/
41 | .tox/
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | .hypothesis/
46 | 
47 | # Project specific
48 | *.log
49 | output/
50 | .DS_Store
51 | .aider*
52 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Taylor Wilsdon
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🛡️ reddacted
  2 | 
  3 | <div align="center">
  4 | 
  5 | ### AI-Powered Reddit Privacy Suite
  6 | 
  7 | [![Privacy Shield](https://img.shields.io/badge/Privacy-100%25_Client--Side_Processing-success)](https://github.com/taylorwilsdon)
  8 | [![AI Analysis](https://img.shields.io/badge/AI-PII_Detection-blueviolet)](https://github.com/taylorwilsdon/reddacted)
  9 | ![GitHub License](https://img.shields.io/github/license/taylorwilsdon/reddacted)
 10 | ![PyPI - Version](https://img.shields.io/pypi/v/reddacted)
 11 | [![PyPI Downloads](https://static.pepy.tech/badge/reddacted)](https://pepy.tech/projects/reddacted)
 12 | 
 13 | <p><i>Local LLM powered, highly performant privacy analysis leveraging AI, sentiment analysis & PII detection<br>to provide insights into your true privacy with bulk remediation</i></p>
 14 | 
 15 | <p><i>For aging engineers who want to protect their future political careers</i> 🏛️</p>
 16 | 
 17 | </div>
 18 | 
 19 | <div align="center">
 20 |   <img width="800" alt="reddacted demo" src="https://github.com/user-attachments/assets/934113f1-4a38-4985-935c-b247688ccac8">
 21 | </div>
 22 | 
 23 | <div align="center">
 24 |   <video width="800" src="https://github.com/user-attachments/assets/ef96ac1a-3b3b-4fb6-a912-1328b6a0d83a"></video>
 25 | 
 26 | </div>
 27 | 
 28 | ## ✨ Key Features
 29 | 
 30 | <table>
 31 |   <tr>
 32 |     <td align="center">🛡️<br/><b>PII Detection</b></td>
 33 |     <td>Analyze the content of comments to identify anything that might reveal PII that you may not want correlated with your anonymous username</td>
 34 |   </tr>
 35 |   <tr>
 36 |     <td align="center">🤫<br/><b>Sentiment Analysis</b></td>
 37 |     <td>Understand the emotional tone of your Reddit history, combined with upvote/downvote counts & privacy risks to choose which posts to reddact</td>
 38 |   </tr>
 39 |   <tr>
 40 |     <td align="center">🔒<br/><b>Zero-Trust Architecture</b></td>
 41 |     <td>Client-side execution only, no data leaves your machine unless you choose to use a hosted API. Fully compatible with all OpenAI compatible endpoints</td>
 42 |   </tr>
 43 |   <tr>
 44 |     <td align="center">⚡<br/><b>Self-Host Ready</b></td>
 45 |     <td>Use any model via Ollama, llama.cpp, vLLM or other platform capable of exposing an OpenAI-compatible endpoint. LiteLLM works just dandy.</td>
 46 |   </tr>
 47 |   <tr>
 48 |     <td align="center">📊<br/><b>Smart Cleanup</b></td>
 49 |     <td>Preserve valuable contributions while removing risky content - clean up your online footprint without blowing away everything</td>
 50 |   </tr>
 51 | </table>
 52 | 
 53 | ## 🔐 Can I trust this with my data?
 54 | 
 55 | <div align="center">
 56 | <p><i>You don't have to - read the code for yourself, only reddit is called</i></p>
 57 | </div>
 58 | 
 59 | ```bash
 60 | reddacted user yourusername --local-llm "http://localhost:11434"
 61 | ```
 62 | 
 63 | - ✅ Client-side execution only, no tracking or external calls
 64 | - ✅ Session-based authentication if you choose - it is optional unless you want to delete
 65 | - ✅ Keep your nonsense comments with lots of upvotes and good vibes without unintentionally doxing yourself
 66 | 
 67 | ```bash
 68 | reddacted user taylorwilsdon --limit 3
 69 | ```
 70 | 
 71 | ## 📋 Table of Contents
 72 | 
 73 | - [Key Features](#-key-features)
 74 | - [Can I trust this with my data?](#-can-i-trust-this-with-my-data)
 75 | - [Installation](#-installation)
 76 | - [Usage](#-usage)
 77 |   - [Available Commands](#available-commands)
 78 |   - [Common Arguments](#common-arguments)
 79 |   - [LLM Configuration](#llm-configuration)
 80 | - [How accurate is the PII detection?](#-how-accurate-is-the-pii-detection-really)
 81 | - [FAQ](#-faq)
 82 | - [Troubleshooting](#-troubleshooting)
 83 | - [Authentication](#-authentication)
 84 | - [Advanced Usage](#-advanced-usage)
 85 | - [Development](#-development)
 86 | - [Testing](#-testing)
 87 | - [Common Exceptions](#-common-exceptions)
 88 | - [Support & Community](#-support--community)
 89 | 
 90 | ## 📥 Installation
 91 | 
 92 | ```bash
 93 | # Install from brew (recommended)
 94 | brew install taylorwilsdon/tap/reddacted
 95 | 
 96 | # Install from PyPI (recommended)
 97 | pip install reddacted
 98 | 
 99 | # Or install from source
100 | git clone https://github.com/taylorwilsdon/reddacted.git
101 | cd reddacted
102 | pip install -e ".[dev]"  # Installs with development dependencies
103 | ```
104 | 
105 | ## 🚀 Usage
106 | 
107 | ```bash
108 | # Most basic possible quick start - this will walk you through selecting your LLM in the command line
109 | reddacted user spez
110 | 
111 | # Analyze a user's recent comments with local LLM specified
112 | reddacted user spez \
113 |   --limit 5 \
114 |   --local-llm "http://localhost:11434" \
115 |   --model "qwen2.5:3b" \
116 |   --sort new
117 | 
118 | # Analyze controversial comments with OpenAI
119 | export OPENAI_API_KEY="your-api-key"
120 | reddacted user spez \
121 |   --sort controversial \
122 |   --time month \
123 |   --model "gpt-4" \
124 |   --limit 10 \
125 |   --pii-only
126 | 
127 | # Analyze a specific subreddit post with PII filter disabled
128 | reddacted listing r/privacy abc123 \
129 |   --local-llm "http://localhost:11434" \
130 |   --model "qwen2.5:3b" \
131 |   --disable-pii \
132 |   --sort new
133 | 
134 | # Search for specific content (requires auth)
135 | reddacted user spez \
136 |   --enable-auth \
137 |   --text-match "python" \
138 |   --skip-text "deleted" \
139 |   --sort top \
140 |   --time all
141 | 
142 | # Bulk comment management
143 | reddacted delete abc123,def456 --batch-size 5  # Delete comments
144 | reddacted update abc123,def456                 # Replace with standard redaction message
145 | reddacted update abc123,def456 --use-random-string  # Replace with random UUID
146 | ```
147 | 
148 | ### Available Commands
149 | 
150 | | Command | Description |
151 | |---------|-------------|
152 | | `user` | Analyze a user's comment history |
153 | | `listing` | Analyze a specific post and its comments |
154 | | `delete` | Delete comments by their IDs |
155 | | `update` | Replace comment content with r/reddacted |
156 | 
157 | ### Common Arguments
158 | 
159 | | Argument | Description |
160 | |----------|-------------|
161 | | `--limit N` | Maximum comments to analyze (default: 100, 0 for unlimited) |
162 | | `--sort` | Sort method: hot, new, controversial, top (default: new) |
163 | | `--time` | Time filter: all, day, hour, month, week, year (default: all) |
164 | | `--output-file` | Save detailed analysis to a file |
165 | | `--enable-auth` | Enable Reddit API authentication |
166 | | `--disable-pii` | Skip PII detection |
167 | | `--pii-only` | Show only comments containing PII |
168 | | `--text-match` | Search for comments containing specific text |
169 | | `--skip-text` | Skip comments containing specific text pattern |
170 | | `--batch-size` | Comments per batch for delete/update (default: 10) |
171 | | `--use-random-string` | Use random UUID instead of standard message when updating comments |
172 | 
173 | ### LLM Configuration
174 | 
175 | | Argument | Description |
176 | |----------|-------------|
177 | | `--local-llm URL` | Local LLM endpoint (OpenAI compatible) |
178 | | `--openai-key KEY` | OpenAI API key |
179 | | `--openai-base URL` | Custom OpenAI API base URL |
180 | | `--model NAME` | Model to use (default: gpt-4 for OpenAI) |
181 | 
182 | <div class="note">
183 | <b>Note:</b> For cloud-based analysis using OpenAI, you can either use the <code>--openai-key</code> flag or set the environment variable:
184 | 
185 | ```bash
186 | export OPENAI_API_KEY="your-api-key"
187 | ```
188 | </div>
189 | 
190 | ## ❓ How accurate is the PII detection, really?
191 | 
192 | Surprisingly good. Good enough that I run it against my own stuff in delete mode. It's basically a defense-in-depth approach combining these methods:
193 | 
194 | <div class="detection-methods">
195 |   <div class="method">
196 |     <h3>📊 AI Detection</h3>
197 |     <p>Doesn't need a crazy smart model, don't waste your money on r1 or o1.</p>
198 |     <ul>
199 |       <li>Cheap & light models like qwen3:8b, gpt-4.1-nano, qwen2.5:7b, Mistral SSmall or gemma3:14b are all plenty</li>
200 |       <li>Don't use something too dumb or it will be inconsistent, a 0.5b model will produce unreliable results</li>
201 |       <li>Works fine with cheap models like qwen2.5:3b (potato can run this) and gpt-4o-mini (~15¢ per million tokens), but gets better with 7b and up</li>
202 |     </ul>
203 |   </div>
204 |   
205 |   <div class="method">
206 |     <h3>🔍 Pattern Matching</h3>
207 |     <p>50+ regex rules for common PII formats does a first past sweep for the obvious stuff</p>
208 |   </div>
209 |   
210 |   <div class="method">
211 |     <h3>🧠 Context Analysis</h3>
212 |     <p>Are you coming off as a dick? Perhaps that factors into your decision to clean up. Who could say, mine are all smiley faces.</p>
213 |   </div>
214 | </div>
215 | 
216 | ## 💡 FAQ
217 | 
218 | <details>
219 | <summary><b>Q: How does the AI handle false positives?</b></summary>
220 | <p>Adjust confidence threshold (default 0.7) per risk tolerance. You're building a repo from source off some random dude's github - don't run this and just delete a bunch of stuff blindly, you're a smart person. Review your results, and if it is doing something crazy, please tell me.</p>
221 | </details>
222 | 
223 | <details>
224 | <summary><b>Q: What LLMs are supported?</b></summary>
225 | <p><b>Local:</b> any model via Ollama, vLLM or other platform capable of exposing an openai-compatible endpoint.<br>
226 | <b>Cloud:</b> OpenAI-compatible endpoints</p>
227 | </details>
228 | 
229 | <details>
230 | <summary><b>Q: Is my data sent externally?</b></summary>
231 | <p>If you choose to use a hosted provider, yes - in cloud mode - local analysis stays fully private.</p>
232 | </details>
233 | 
234 | ## 🔧 Troubleshooting
235 | 
236 | If you get "command not found" after installation:
237 | 
238 | 1. Check Python scripts directory is in your PATH:
239 | 
240 | ```bash
241 | # Typical Linux/Mac location
242 | export PATH="$HOME/.local/bin:$PATH"
243 | 
244 | # Typical Windows location
245 | set PATH=%APPDATA%\Python\Python311\Scripts;%PATH%
246 | ```
247 | 
248 | 2. Verify installation location:
249 | 
250 | ```bash
251 | pip show reddacted
252 | ```
253 | 
254 | ## 🔑 Authentication
255 | 
256 | Before running any commands that require authentication, you'll need to set up your Reddit API credentials:
257 | 
258 | <div class="auth-steps">
259 |   <div class="step">
260 |     <h3>Step 1: Create a Reddit Account</h3>
261 |     <p>If you don't have one, sign up at <a href="https://www.reddit.com/account/register/">https://www.reddit.com/account/register/</a></p>
262 |   </div>
263 |   
264 |   <div class="step">
265 |     <h3>Step 2: Create a Reddit App</h3>
266 |     <ul>
267 |       <li>Go to <a href="https://www.reddit.com/prefs/apps">https://www.reddit.com/prefs/apps</a></li>
268 |       <li>Click "are you a developer? create an app..." at the bottom</li>
269 |       <li>Choose "script" as the application type</li>
270 |       <li>Set "reddacted" as both the name and description</li>
271 |       <li>Use "http://localhost:8080" as the redirect URI</li>
272 |       <li>Click "create app"</li>
273 |     </ul>
274 |   </div>
275 |   
276 |   <div class="step">
277 |     <h3>Step 3: Get Your Credentials</h3>
278 |     <p>After creating the app, note down:</p>
279 |     <ul>
280 |       <li>Client ID: The string under "personal use script"</li>
281 |       <li>Client Secret: The string labeled "secret"</li>
282 |     </ul>
283 |   </div>
284 |   
285 |   <div class="step">
286 |     <h3>Step 4: Set Environment Variables</h3>
287 |     
288 | ```bash
289 | export REDDIT_USERNAME=your-reddit-username
290 | export REDDIT_PASSWORD=your-reddit-password
291 | export REDDIT_CLIENT_ID=your-client-id
292 | export REDDIT_CLIENT_SECRET=your-client-secret
293 | ```
294 |   </div>
295 | </div>
296 | 
297 | These credentials are also automatically used if all environment variables are present, even without the `--enable-auth` flag.
298 | 
299 | ## 🧙‍♂️ Advanced Usage
300 | 
301 | ### Text Filtering
302 | 
303 | You can filter comments using these arguments:
304 | 
305 | | Argument | Description |
306 | |----------|-------------|
307 | | `--text-match "search phrase"` | Only analyze comments containing specific text (requires authentication) |
308 | | `--skip-text "skip phrase"` | Skip comments containing specific text pattern |
309 | 
310 | For example:
311 | 
312 | ```bash
313 | # Only analyze comments containing "python"
314 | reddacted user spez --text-match "python"
315 | 
316 | # Skip comments containing "deleted"
317 | reddacted user spez --skip-text "deleted"
318 | 
319 | # Combine both filters
320 | reddacted user spez --text-match "python" --skip-text "deleted"
321 | ```
322 | 
323 | ## 👨‍💻 Development
324 | 
325 | This project uses [UV](https://github.com/astral-sh/uv) for building and publishing. Here's how to set up your development environment:
326 | 
327 | 1. Create and activate a virtual environment:
328 | 
329 | ```bash
330 | python -m venv venv
331 | source venv/bin/activate  # On Windows: venv\Scripts\activate
332 | ```
333 | 
334 | 2. Install UV:
335 | 
336 | ```bash
337 | pip install uv
338 | ```
339 | 
340 | 3. Install in development mode with test dependencies:
341 | 
342 | ```bash
343 | pip install -e ".[dev]"
344 | ```
345 | 
346 | 4. Build the package:
347 | 
348 | ```bash
349 | uv build --sdist --wheel
350 | ```
351 | 
352 | 5. Create a new release:
353 | 
354 | ```bash
355 | ./release.sh
356 | ```
357 | 
358 | The release script will:
359 | - Build the package with UV
360 | - Create and push a git tag
361 | - Create a GitHub release
362 | - Update the Homebrew formula
363 | - Publish to PyPI (optional)
364 | 
365 | That's it! The package handles all other dependencies automatically, including NLTK data.
366 | 
367 | ## 🧪 Testing
368 | 
369 | Run the test suite:
370 | 
371 | ```bash
372 | pytest tests
373 | ```
374 | 
375 | Want to contribute? Great! Feel free to:
376 | - Open an Issue
377 | - Submit a Pull Request
378 | 
379 | ## ⚠️ Common Exceptions
380 | 
381 | <div class="exceptions">
382 |   <div class="exception">
383 |     <h3>too many requests</h3>
384 |     <p>If you're unauthenticated, reddit has relatively low rate limits for it's API. Either authenticate against your account, or just wait a sec and try again.</p>
385 |   </div>
386 |   
387 |   <div class="exception">
388 |     <h3>the page you requested does not exist</h3>
389 |     <p>Simply a 404, which means that the provided username does not point to a valid page.</p>
390 |   </div>
391 | </div>
392 | 
393 | > **Pro Tip**: Always review changes before executing deletions!
394 | 
395 | ## 🌐 Support & Community
396 | 
397 | <div align="center">
398 |   <p>Join our subreddit: <a href="https://reddit.com/r/reddacted">r/reddacted</a></p>
399 | </div>
400 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=68.0.0", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.uv]
 6 | # Configuration for UV package build
 7 | package = true
 8 | 
 9 | [project]
10 | name = "reddacted"
11 | version = "0.2.5"
12 | description = "Analyze Reddit comments for PII and other sensitive information using local or OpenAI API compatible LLMs and perform sentiment analysis, edit and remove comments."
13 | readme = "README.md"
14 | requires-python = ">=3.11"
15 | license = {file = "LICENSE"}
16 | authors = [
17 |     {name = "Taylor Wilsdon", email = "taylor@taylorwilsdon.com"}
18 | ]
19 | keywords = ["reddact", "reddacted", "reddit", "llm", "pii", "sentiment", "analysis", "nlp"]
20 | classifiers = [
21 |     "Development Status :: 3 - Alpha",
22 |     "Intended Audience :: Science/Research",
23 |     "License :: OSI Approved :: MIT License",
24 |     "Programming Language :: Python :: 3",
25 |     "Programming Language :: Python :: 3.11",
26 |     "Environment :: Console",
27 |     "Topic :: Text Processing :: Linguistic",
28 |     "Topic :: Scientific/Engineering :: Artificial Intelligence"
29 | ]
30 | 
31 | dependencies = [
32 |     "autopage>=0.5.2",
33 |     "praw>=7.7.1",
34 |     "nltk>=3.8.1",
35 |     "requests>=2.31.0",
36 |     "six>=1.16.0",
37 |     "httpx==0.27.2",
38 |     "openai>=1.3.7",
39 |     "rich>=13.7.0",
40 |     "textual==2.1.2",
41 |     "urllib3==2.3.0"
42 | ]
43 | 
44 | [project.urls]
45 | "Bug Tracker" = "https://github.com/taylorwilsdon/reddacted/issues"
46 | 
47 | [project.entry-points."reddacted.commands"]
48 | listing = "reddacted.cli:Listing"
49 | user = "reddacted.cli:User"
50 | 
51 | [project.optional-dependencies]
52 | dev = [
53 |     "pytest",
54 |     "black",
55 |     "mypy",
56 |     "ruff",
57 |     "uv",
58 |     "tomli>=2.0.0"  # Required for Homebrew formula generation
59 | ]
60 | 
61 | [project.scripts]
62 | reddacted = "reddacted.cli:main"
63 | 
64 | [tool.setuptools]
65 | packages = ["reddacted", "reddacted.ui", "reddacted.api", "reddacted.utils"]
66 | 
67 | [tool.black]
68 | line-length = 100
69 | target-version = ['py311']
70 | 
71 | [tool.ruff]
72 | line-length = 100
73 | target-version = "py311"
74 | select = ["E", "F", "B", "I"]
75 | 
76 | [tool.mypy]
77 | python_version = "3.11"
78 | warn_return_any = true
79 | warn_unused_configs = true
80 | disallow_untyped_defs = true
81 | 
82 | [tool.pytest.ini_options]
83 | asyncio_mode = "auto"
84 | testpaths = ["tests"]
85 | python_files = "test_*.py"
86 | python_classes = "Test*"
87 | python_functions = "test_*"
88 | addopts = "-v --tb=short"
89 | markers = [
90 |     "asyncio: mark test as async/await test",
91 | ]
92 | asyncio_default_fixture_loop_scope = "function"
93 | 


--------------------------------------------------------------------------------
/reddacted/__init__.py:
--------------------------------------------------------------------------------
1 | """reddacted - AI-Powered Reddit Privacy Suite"""
2 | 
3 | __version__ = "0.2.5"
4 | 


--------------------------------------------------------------------------------
/reddacted/api/__init__.py:
--------------------------------------------------------------------------------
1 | from .list_models import *
2 | 


--------------------------------------------------------------------------------
/reddacted/api/api.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import six
 3 | 
 4 | 
 5 | @six.add_metaclass(abc.ABCMeta)
 6 | class API(object):
 7 |     """Base API Interface
 8 | 
 9 |     The API is responsible for gathering data to perform a sentiment
10 |     analysis on.
11 |     """
12 | 
13 |     @abc.abstractmethod
14 |     def parse_listing(self, subreddit, article, limit=100, **kwargs):
15 |         """Parses a Listing Reddit Object.
16 | 
17 |         Args:
18 |             subreddit: Subreddit to parse
19 |             article: Article ID to parse
20 |             limit: Maximum number of comments to return (None for unlimited)
21 |         """
22 |         pass
23 | 
24 |     @abc.abstractmethod
25 |     def parse_user(self, username, limit=100, **kwargs):
26 |         """Parses a User Reddit Object.
27 | 
28 |         Args:
29 |             username: Username to parse
30 |             limit: Maximum number of comments to return (None for unlimited)
31 |         """
32 |         pass
33 | 


--------------------------------------------------------------------------------
/reddacted/api/list_models.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from typing import List, Optional
 3 | 
 4 | class ModelFetchError(Exception):
 5 |     """Custom exception for errors during model fetching."""
 6 |     pass
 7 | 
 8 | def fetch_available_models(base_url: str, api_key: Optional[str] = None) -> List[str]:
 9 |     """
10 |     Fetches available LLM models from a given base URL (OpenAI compatible API).
11 | 
12 |     Args:
13 |         base_url: The base URL of the LLM API (e.g., http://localhost:11434 or https://api.openai.com).
14 |         api_key: Optional API key required for authentication (e.g., OpenAI).
15 | 
16 |     Returns:
17 |         A list of available model IDs.
18 | 
19 |     Raises:
20 |         ModelFetchError: If there's an error fetching or parsing the models.
21 |         requests.exceptions.RequestException: If there's a connection error.
22 |     """
23 |     models_url = f"{base_url.rstrip('/')}/v1/models" # Ensure correct endpoint path
24 |     headers = {"User-Agent": "Reddacted-Config-UI"}
25 |     if api_key:
26 |         headers["Authorization"] = f"Bearer {api_key}"
27 | 
28 |     try:
29 |         response = requests.get(models_url, headers=headers, timeout=10) # Added timeout
30 |         response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
31 | 
32 |         models_data = response.json()
33 |         available_models = [m.get("id") for m in models_data.get("data", []) if m.get("id")]
34 | 
35 |         if not available_models:
36 |             raise ModelFetchError("No available models found in the API response.")
37 | 
38 |         return available_models
39 | 
40 |     except requests.exceptions.HTTPError as e:
41 |         raise ModelFetchError(f"HTTP Error fetching models: {e.response.status_code} - {e.response.text}") from e
42 |     except requests.exceptions.ConnectionError as e:
43 |         raise ModelFetchError(f"Connection error fetching models from {models_url}: {e}") from e
44 |     except requests.exceptions.Timeout as e:
45 |         raise ModelFetchError(f"Timeout fetching models from {models_url}: {e}") from e
46 |     except requests.exceptions.RequestException as e:
47 |         raise ModelFetchError(f"Error fetching models from {models_url}: {e}") from e
48 |     except (ValueError, KeyError) as e: # Handle potential JSON parsing or key errors
49 |          raise ModelFetchError(f"Error parsing model response from {models_url}: {e}") from e


--------------------------------------------------------------------------------
/reddacted/api/reddit.py:
--------------------------------------------------------------------------------
  1 | from types import BuiltinMethodType
  2 | import time
  3 | import os
  4 | from typing import List, Dict, Any, Optional # Added Optional
  5 | from typing import List, Dict, Any
  6 | import uuid  # Added for random string generation
  7 | import praw
  8 | from reddacted.api import api
  9 | from reddacted.utils.log_handler import get_logger, with_logging
 10 | from reddacted.utils.log_handler import handle_exception
 11 | 
 12 | logger = get_logger(__name__)
 13 | 
 14 | 
 15 | class AuthenticationRequiredError(Exception):
 16 |     """Raised when authentication is required but not configured"""
 17 | 
 18 |     pass
 19 | 
 20 | 
 21 | @with_logging(logger)
 22 | class Reddit(api.API):
 23 |     """The Reddit Class obtains data to perform sentiment analysis on
 24 |     using the Reddit API.
 25 | 
 26 |     It allows an unauthenticated user to obtain data to analyze various
 27 |     reddit objects.
 28 |     """
 29 | 
 30 |     def __init__(self, config: Optional[Dict[str, Any]] = None, use_random_string=False):
 31 |         """Initialize Reddit API client. Prioritizes credentials from config,
 32 |         then environment variables, falling back to read-only mode.
 33 | 
 34 |         Args:
 35 |             config: Optional dictionary containing configuration values (including credentials).
 36 |             use_random_string: Whether to use random UUIDs instead of standard message when updating comments.
 37 |         """
 38 |         self.authenticated = False
 39 |         self.reddit = None
 40 |         self.use_random_string = use_random_string
 41 |         config = config or {} # Ensure config is a dict
 42 | 
 43 |         logger.debug_with_context(f"Initializing Reddit client. Config provided: {bool(config)}, Use random string: {use_random_string}")
 44 | 
 45 |         # --- Try credentials from config first ---
 46 |         username = config.get("reddit_username")
 47 |         password = config.get("reddit_password")
 48 |         client_id = config.get("reddit_client_id")
 49 |         client_secret = config.get("reddit_client_secret")
 50 | 
 51 |         # Check if enable_auth is explicitly True in config, otherwise don't use config creds
 52 |         auth_enabled_in_config = config.get("enable_auth", False)
 53 | 
 54 |         if auth_enabled_in_config and all([username, password, client_id, client_secret]):
 55 |             logger.info_with_context("Attempting authentication using credentials from configuration (auth enabled).")
 56 |             try:
 57 |                 self.reddit = praw.Reddit(
 58 |                     client_id=client_id,
 59 |                     client_secret=client_secret,
 60 |                     password=password,
 61 |                     user_agent=f"reddacted u/{username}",
 62 |                     username=username,
 63 |                     check_for_async=False,
 64 |                 )
 65 |                 logger.info_with_context("Successfully authenticated with Reddit API using configuration.")
 66 |                 logger.debug_with_context(f"Granted scopes (config auth): {self.reddit.auth.scopes()}") # Log scopes
 67 |                 self.authenticated = True
 68 |                 return # Exit if successful
 69 |             except Exception as e:
 70 |                 logger.warning_with_context(f"Authentication with config credentials failed: {e}. Falling back...")
 71 |                 # Continue to try environment variables
 72 |         elif not auth_enabled_in_config and any([username, password, client_id, client_secret]):
 73 |              logger.info_with_context("Credentials found in config, but 'enable_auth' is false. Skipping config auth attempt.")
 74 | 
 75 | 
 76 |         # --- Fallback to environment variables ---
 77 |         logger.debug_with_context("Checking environment variables for Reddit credentials.")
 78 |         env_username = os.environ.get("REDDIT_USERNAME")
 79 |         env_password = os.environ.get("REDDIT_PASSWORD")
 80 |         env_client_id = os.environ.get("REDDIT_CLIENT_ID")
 81 |         env_client_secret = os.environ.get("REDDIT_CLIENT_SECRET")
 82 | 
 83 |         if all([env_username, env_password, env_client_id, env_client_secret]):
 84 |             # Only use env vars if config auth wasn't explicitly enabled and successful
 85 |             if not (auth_enabled_in_config and self.authenticated):
 86 |                 logger.info_with_context("Attempting authentication using credentials from environment variables.")
 87 |                 try:
 88 |                     self.reddit = praw.Reddit(
 89 |                         client_id=env_client_id,
 90 |                         client_secret=env_client_secret,
 91 |                         password=env_password,
 92 |                         user_agent=f"reddacted u/{env_username}",
 93 |                         username=env_username,
 94 |                         check_for_async=False,
 95 |                     )
 96 |                     logger.info_with_context("Successfully authenticated with Reddit API using environment variables.")
 97 |                     logger.debug_with_context(f"Granted scopes (env auth): {self.reddit.auth.scopes()}") # Log scopes
 98 |                     self.authenticated = True
 99 |                     return # Exit if successful
100 |                 except Exception as e:
101 |                     logger.warning_with_context(f"Authentication with environment variable credentials failed: {e}. Falling back...")
102 |                     # Continue to try read-only
103 |             else:
104 |                  logger.debug_with_context("Skipping environment variable auth attempt as config auth was enabled and successful.")
105 | 
106 |         # --- Fallback to read-only mode ---
107 |         if not self.authenticated: # Only attempt read-only if not already authenticated
108 |             missing_sources = []
109 |             if not auth_enabled_in_config or not all([username, password, client_id, client_secret]):
110 |                 missing_sources.append("configuration")
111 |             if not all([env_username, env_password, env_client_id, env_client_secret]):
112 |                 missing_sources.append("environment variables")
113 | 
114 |             logger.warning_with_context(
115 |                 f"Reddit API authentication credentials not found or incomplete in { ' or '.join(missing_sources) }. "
116 |                 "Falling back to read-only mode. Some features like comment deletion/update will be unavailable."
117 |             )
118 |             try:
119 |                 # Use client_id/secret from config OR env vars if available for read-only
120 |                 read_only_client_id = config.get("reddit_client_id") or env_client_id
121 |                 read_only_client_secret = config.get("reddit_client_secret") or env_client_secret
122 | 
123 |                 if read_only_client_id and read_only_client_secret:
124 |                      logger.debug_with_context("Attempting read-only initialization with client_id/secret.")
125 |                      self.reddit = praw.Reddit(
126 |                          client_id=read_only_client_id,
127 |                          client_secret=read_only_client_secret,
128 |                          user_agent="reddacted:read_only_client_v3" # Updated user agent slightly
129 |                      )
130 |                      logger.info_with_context("Successfully initialized read-only Reddit client (with client ID/secret).")
131 |                 elif read_only_client_id:
132 |                      logger.debug_with_context("Attempting read-only initialization with client_id only.")
133 |                      self.reddit = praw.Reddit(
134 |                          client_id=read_only_client_id,
135 |                          user_agent="reddacted:read_only_client_v3"
136 |                      )
137 |                      logger.info_with_context("Successfully initialized read-only Reddit client (with client ID only).")
138 |                 else:
139 |                      # PRAW requires at least client_id for read-only access usually.
140 |                      # If neither config nor env vars provide it, initialization will likely fail here.
141 |                      logger.error_with_context("Cannot initialize read-only Reddit client: Missing 'client_id' in both config and environment variables.")
142 |                      # Optionally, raise an error or let the PRAW error propagate
143 |                      # raise ValueError("Missing required client_id for Reddit API access.")
144 |                      # For now, let PRAW handle the potential error if it occurs without client_id
145 |                      self.reddit = praw.Reddit(user_agent="reddacted:read_only_client_v3") # This line might fail
146 |                      logger.info_with_context("Attempted read-only Reddit client initialization (without client ID/secret - may fail).")
147 | 
148 | 
149 |             except Exception as e:
150 |                 # Log the specific PRAW error if initialization fails
151 |                 logger.error_with_context(f"Failed to initialize read-only client: {str(e)}")
152 |                 # self.reddit remains None
153 | 
154 |     @with_logging(logger)
155 |     def parse_listing(self, subreddit, article, limit=100, **kwargs):
156 |         """Parses a listing and extracts the comments from it.
157 | 
158 |         :param subreddit: a subreddit
159 |         :param article: an article associated with the subreddit
160 |         :param limit: maximum number of comments to return (None for unlimited)
161 |         :return: a list of comments from an article
162 |         """
163 |         if self.reddit is None:
164 |             logger.error_with_context("Reddit client initialization failed - cannot fetch comments")
165 |             return []
166 | 
167 |         mode = "authenticated" if self.authenticated else "read-only"
168 |         logger.info_with_context(f"Fetching comments for article '{article}' in {mode} mode")
169 |         logger.debug_with_context(
170 |             f"Parsing listing for subreddit={subreddit}, article={article}, limit={limit}"
171 |         )
172 |         submission = self.reddit.submission(id=article)
173 |         logger.debug_with_context(f"Retrieved submission: title='{submission.title}'")
174 |         logger.debug_with_context("Expanding 'more comments' links")
175 |         submission.comments.replace_more(limit=None)
176 |         comments = []
177 | 
178 |         for comment in submission.comments.list():
179 |             comment_data = {
180 |                 "text": comment.body.rstrip(),
181 |                 "upvotes": comment.ups,
182 |                 "downvotes": comment.downs,
183 |                 "permalink": comment.permalink,
184 |                 "id": comment.id,
185 |             }
186 |             logger.debug_with_context(
187 |                 f"Processing comment: ups={comment.ups}, downs={comment.downs}, text_preview='{comment.body[:50]}...'"
188 |             )
189 |             comments.append(comment_data)
190 | 
191 |         return comments[:limit] if limit else comments
192 | 
193 |     def _process_comments(
194 |         self,
195 |         comment_ids: list[str],
196 |         action: str,
197 |         batch_size: int = 10,
198 |         update_content: str = None,  # Added parameter for update text
199 |     ) -> dict[str, any]:
200 |         """
201 |         Process comments in batches with rate limiting.
202 | 
203 |         :param comment_ids: List of comment IDs to process.
204 |         :param action: Action to perform ('delete' or 'update').
205 |         :param batch_size: Number of comments to process per batch.
206 |         :param update_content: The text to use when updating comments (only used if action='update').
207 |         :return: Dict with results and statistics.
208 |         """
209 |         logger.debug_with_context("Starting _process_comments")
210 |         if not self.authenticated:
211 |             raise AuthenticationRequiredError(f"Full authentication required for comment {action}")
212 | 
213 |         results = {
214 |             "processed": 0,
215 |             "success": 0,
216 |             "failures": 0,
217 |             "successful_ids": [],
218 |             "failed_ids": [],
219 |             "errors": [],
220 |         }
221 | 
222 |         for i in range(0, len(comment_ids), batch_size):
223 |             batch = comment_ids[i : i + batch_size]
224 |             try:
225 |                 for comment_id in batch:
226 |                     try:
227 |                         comment = self.reddit.comment(id=comment_id)
228 |                         if action == "delete":
229 |                             logger.debug_with_context(f"Deleting comment ID {comment.id}") # Use comment_id for clarity
230 |                             comment.delete()
231 |                             results["successful_ids"].append(comment_id)
232 |                             results["success"] += 1
233 |                         elif action == "update":
234 |                             logger.debug_with_context(f"Updating comment ID {comment.id} with content: '{update_content[:50]}...'") # Use comment_id
235 |                             if update_content is None:
236 |                                 # Should not happen if called via update_comments, but provides a fallback.
237 |                                 logger.warning_with_context(f"No update_content provided for comment {comment_id}, skipping edit.")
238 |                             else:
239 |                                 comment.edit(update_content)
240 |                                 results["successful_ids"].append(comment_id)
241 |                                 results["success"] += 1
242 |                     except Exception as e:
243 |                         results["failures"] += 1
244 |                         results["failed_ids"].append(comment_id)
245 |                         results["errors"].append({"comment_id": comment_id, "error": str(e)})
246 |                     # Respect Reddit's API rate limit (1 req/sec)
247 |                     time.sleep(1.1)
248 | 
249 |                 results["processed"] += len(batch)
250 |             except praw.exceptions.APIException as e:
251 |                 handle_exception(e, "Reddit API Rate Limit Exceeded")
252 |                 time.sleep(60)  # Wait 1 minute before retrying
253 |                 continue
254 | 
255 |         return results
256 | 
257 |     def delete_comments(self, comment_ids: list[str], batch_size: int = 10) -> dict[str, any]:
258 |         """
259 |         Delete comments in batches with rate limiting
260 |         :param comment_ids: List of comment IDs to delete
261 |         :param batch_size: Number of comments to process per batch
262 |         :return: Dict with results and statistics
263 |         """
264 |         return self._process_comments(comment_ids, "delete", batch_size)
265 | 
266 |     def update_comments(
267 |         self,
268 |         comment_ids: list[str],
269 |         batch_size: int = 10,
270 |         use_random_string: bool = None,  # Can be explicitly provided or use instance default
271 |     ) -> dict[str, any]:
272 |         """
273 |         Update comments in batches with rate limiting.
274 | 
275 |         Replaces content either with a standard redaction message or a random UUID.
276 | 
277 |         :param comment_ids: List of comment IDs to update.
278 |         :param batch_size: Number of comments to process per batch.
279 |         :param use_random_string: If True, replace content with a random UUID; otherwise, use the standard message.
280 |                                  If None, uses the value set during Reddit instance initialization.
281 |         :return: Dict with results and statistics.
282 |         """
283 |         # Use instance default if not explicitly provided
284 |         if use_random_string is None:
285 |             use_random_string = self.use_random_string
286 |             
287 |         if use_random_string:
288 |             content_to_write = str(uuid.uuid4())
289 |             logger.info_with_context(f"Updating comments with random UUIDs. Example: {content_to_write}")
290 |         else:
291 |             content_to_write = "This comment has been reddacted to preserve online privacy - see r/reddacted for more info"
292 |             logger.info_with_context("Updating comments with standard redaction message.")
293 | 
294 |         return self._process_comments(
295 |             comment_ids, "update", batch_size, update_content=content_to_write
296 |         )
297 | 
298 |     @with_logging(logger)
299 |     def search_comments(
300 |         self, query: str, subreddit: str = None, limit: int = 100
301 |     ) -> List[Dict[str, Any]]:
302 |         """Search for comments containing specific text.
303 | 
304 |         Args:
305 |             query: Text to search for
306 |             subreddit: Optional subreddit to limit search to
307 |             limit: Maximum number of results to return
308 | 
309 |         Returns:
310 |             List of comment dictionaries
311 | 
312 |         Raises:
313 |             AuthenticationRequiredError: If not authenticated
314 |         """
315 |         if not self.authenticated:
316 |             raise AuthenticationRequiredError("Authentication required for comment search")
317 | 
318 |         logger.debug_with_context(f"Searching for '{query}' in {subreddit or 'all'}")
319 | 
320 |         try:
321 |             comments = []
322 |             search_params = {"q": query, "limit": limit, "type": "comment"}
323 |             if subreddit:
324 |                 results = self.reddit.subreddit(subreddit).search(**search_params)
325 |             else:
326 |                 results = self.reddit.subreddit("all").search(**search_params)
327 | 
328 |             for result in results:
329 |                 if isinstance(result, praw.models.Comment):
330 |                     comments.append(
331 |                         {
332 |                             "text": result.body.rstrip(),
333 |                             "upvotes": result.ups,
334 |                             "downvotes": result.downs,
335 |                             "permalink": result.permalink,
336 |                             "id": result.id,
337 |                         }
338 |                     )
339 |                 if len(comments) >= limit:
340 |                     break
341 | 
342 |             return comments
343 |         except Exception as e:
344 |             handle_exception(e, f"Failed to search for '{query}'", debug=True)
345 |             return []
346 | 
347 |     @with_logging(logger)
348 |     def parse_user(self, username, limit=100, sort="new", time_filter="all", **kwargs):
349 |         """Parses a listing and extracts the comments from it.
350 | 
351 |         :param username: a user
352 |         :param limit: maximum number of comments to return (None for unlimited)
353 |         :param sort: Sort method ('hot', 'new', 'controversial', 'top')
354 |         :param time_filter: Time filter for 'top' ('all', 'day', 'hour', 'month', 'week', 'year')
355 |         :return: a list of comments from a user
356 |         :raises: prawcore.exceptions.NotFound if user doesn't exist
357 |         :raises: prawcore.exceptions.Forbidden if user is private/banned
358 |         """
359 |         if self.reddit is None:
360 |             logger.error_with_context("Reddit client initialization failed - cannot fetch comments")
361 |             return []
362 | 
363 |         mode = "authenticated" if self.authenticated else "read-only"
364 |         logger.info_with_context(f"Fetching comments for user '{username}' in {mode} mode")
365 |         logger.debug_with_context(f"Using sort method: {sort}")
366 |         try:
367 |             redditor = self.reddit.redditor(username)
368 |             comments = []
369 | 
370 |             # Get the appropriate comment listing based on sort
371 |             if sort == "hot":
372 |                 comment_listing = redditor.comments.hot(limit=limit)
373 |             elif sort == "new":
374 |                 comment_listing = redditor.comments.new(limit=limit)
375 |             elif sort == "controversial":
376 |                 comment_listing = redditor.comments.controversial(
377 |                     limit=limit, time_filter=time_filter
378 |                 )
379 |             elif sort == "top":
380 |                 comment_listing = redditor.comments.top(limit=limit, time_filter=time_filter)
381 |             else:
382 |                 comment_listing = redditor.comments.new(limit=limit)  # default to new
383 | 
384 |             for comment in comment_listing:
385 |                 comment_data = {
386 |                     "text": comment.body.rstrip(),
387 |                     "upvotes": comment.ups,
388 |                     "downvotes": comment.downs,
389 |                     "permalink": comment.permalink,
390 |                     "id": comment.id,
391 |                 }
392 | 
393 |                 # If text matching is enabled, only include matching comments
394 |                 if "text_match" in kwargs:
395 |                     logger.debug_with_context(
396 |                         f"Text match enabled: searching for '{kwargs['text_match']}' in comment {comment_data['id']}"
397 |                     )
398 |                     if kwargs["text_match"].lower() in comment_data["text"].lower():
399 |                         logger.debug_with_context(f"Match found in comment {comment_data['id']}")
400 |                         comments.append(comment_data)
401 |                     else:
402 |                         logger.debug_with_context(f"No match found in comment {comment_data['id']}")
403 |                 else:
404 |                     logger.debug_with_context(
405 |                         f"No text match filter, including comment {comment_data['id']}"
406 |                     )
407 |                     comments.append(comment_data)
408 | 
409 |                 if len(comments) >= limit:
410 |                     break
411 | 
412 |             return comments
413 |         except Exception as e:
414 |             handle_exception(e, f"Failed to fetch comments for user '{username}'", debug=True)
415 |             return []
416 | 


--------------------------------------------------------------------------------
/reddacted/api/scraper.py:
--------------------------------------------------------------------------------
  1 | from types import BuiltinMethodType
  2 | import requests
  3 | from reddacted.api import api
  4 | from reddacted.utils.log_handler import get_logger, with_logging
  5 | from reddacted.utils.log_handler import handle_exception
  6 | 
  7 | logger = get_logger(__name__)
  8 | 
  9 | 
 10 | class Scraper(api.API):
 11 |     """The Reddit Class obtains data to perform sentiment analysis by
 12 |     scraping the Reddit json endpoint.
 13 | 
 14 |     It allows an unauthenticated user to obtain data to analyze various
 15 |     reddit objects.
 16 |     """
 17 | 
 18 |     def __init__(self):
 19 |         """Initialize Scraper"""
 20 |         pass
 21 | 
 22 |     @with_logging(logger)
 23 |     def parse_listing(self, subreddit, article, limit=100, **kwargs):
 24 |         """Parses a listing and extracts the comments from it.
 25 | 
 26 |         :param subreddit: a subreddit
 27 |         :param article: an article associated with the subreddit
 28 |         :return: a list of comments from an article.
 29 |         """
 30 |         logger.debug_with_context(
 31 |             f"Parsing listing for subreddit={subreddit}, article={article}, limit={limit}"
 32 |         )
 33 |         url = f"https://www.reddit.com/r/{subreddit}/{article}.json?limit={limit}"
 34 |         headers = kwargs.get("headers")
 35 |         logger.debug_with_context(f"Request URL: {url}")
 36 |         logger.debug_with_context(f"Request headers: {headers}")
 37 |         try:
 38 |             response = requests.get(url, headers=headers)
 39 |             logger.debug_with_context(f"Response status code: {response.status_code}")
 40 |         except Exception as e:
 41 |             handle_exception(e, "Error obtaining article information", debug=True)
 42 |             return []
 43 | 
 44 |         comments = []
 45 |         json_resp = response.json()
 46 |         logger.debug_with_context(f"Retrieved {len(json_resp)} top-level JSON objects")
 47 | 
 48 |         for top in range(0, len(json_resp)):
 49 |             logger.debug_with_context(f"Processing top-level object {top+1}/{len(json_resp)}")
 50 |             if json_resp[top]["data"]["children"]:
 51 |                 children = json_resp[top]["data"]["children"]
 52 |                 for child in range(0, len(children)):
 53 |                     data = children[child]["data"]
 54 |                     if "body" in data:
 55 |                         # remove empty spaces and weird reddit strings
 56 |                         comment_text = data["body"].rstrip()
 57 |                         comment_text = " ".join(comment_text.split())
 58 |                         comment_text = comment_text.replace("&amp;#x200B;", "")
 59 |                         if comment_text != "":
 60 |                             comment_data = {
 61 |                                 "text": comment_text,
 62 |                                 "upvotes": data["ups"],
 63 |                                 "downvotes": data["downs"],
 64 |                                 "permalink": data["permalink"],
 65 |                                 "id": data["id"],
 66 |                             }
 67 |                             logger.debug_with_context(
 68 |                                 f"Added comment: ups={data['ups']}, downs={data['downs']}, text_preview='{comment_text[:50]}...'"
 69 |                             )
 70 |                             comments.append(comment_data)
 71 | 
 72 |         logger.debug_with_context(f"Returning {len(comments)} processed comments")
 73 |         return comments
 74 | 
 75 |     @with_logging(logger)
 76 |     def parse_user(self, username, limit=100, sort="new", time_filter="all", **kwargs):
 77 |         """Parses a listing and extracts the comments from it.
 78 | 
 79 |         :param username: a user
 80 |         :param limit: maximum number of comments to return
 81 |         :param sort: Sort method ('hot', 'new', 'controversial', 'top')
 82 |         :param time_filter: Time filter for 'top' ('all', 'day', 'hour', 'month', 'week', 'year')
 83 |         :return: a list of comments from a user.
 84 |         """
 85 |         url = f"https://www.reddit.com/user/{username}.json?limit={limit}&sort={sort}"
 86 |         if sort in ["top", "controversial"]:
 87 |             url += f"&t={time_filter}"
 88 |         logger.debug_with_context(f"Completed scraping for user {username}")
 89 |         headers = kwargs.get("headers")
 90 |         try:
 91 |             response = requests.get(url, headers=headers)
 92 |         except Exception as e:
 93 |             handle_exception(e, "Error obtaining user information", debug=True)
 94 |             return []
 95 | 
 96 |         comments = []
 97 |         json_resp = response.json()
 98 | 
 99 |         if json_resp["data"]["children"]:
100 |             children = json_resp["data"]["children"]
101 |             for child in range(0, len(children)):
102 |                 data = children[child]["data"]
103 |                 if "body" in data:
104 |                     # remove empty spaces and weird reddit strings
105 |                     comment_text = data["body"].rstrip()
106 |                     comment_text = " ".join(comment_text.split())
107 |                     comment_text = comment_text.replace("&amp;#x200B;", "")
108 |                     if comment_text != "":
109 |                         comments.append(
110 |                             {
111 |                                 "text": comment_text,
112 |                                 "upvotes": data["ups"],
113 |                                 "downvotes": data["downs"],
114 |                                 "permalink": data["permalink"],
115 |                                 "id": data["id"],
116 |                             }
117 |                         )
118 |         logger.debug_with_context(f"Reddact is scraping {url}...")
119 |         return comments
120 | 


--------------------------------------------------------------------------------
/reddacted/cli_config.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import os.path
  4 | from typing import Optional, Dict, Any, TYPE_CHECKING, Tuple, List
  5 | 
  6 | from textual.widgets import Input, Label, Checkbox # Needed for validate_inputs
  7 | 
  8 | # Forward reference for type hinting ConfigApp to avoid circular import
  9 | if TYPE_CHECKING:
 10 |     from reddacted.textual_cli import ConfigApp
 11 | 
 12 | # --- Constants ---
 13 | VALID_SORT_OPTIONS = ["hot", "new", "controversial", "top"]
 14 | VALID_TIME_OPTIONS = ["all", "day", "hour", "month", "week", "year"]
 15 | URL_REGEX = r"^(http|https)://[^\s/$.?#].[^\s]*$"
 16 | CONFIG_FILE = "config.json"
 17 | 
 18 | # Environment Variable Keys (Example - adjust if needed)
 19 | ENV_VARS_MAP = {
 20 |     "REDDIT_USERNAME": "reddit_username",
 21 |     "REDDIT_PASSWORD": "reddit_password",
 22 |     "REDDIT_CLIENT_ID": "reddit_client_id",
 23 |     "REDDIT_CLIENT_SECRET": "reddit_client_secret",
 24 |     "OPENAI_API_KEY": "openai_key",
 25 | }
 26 | 
 27 | # --- Configuration File I/O ---
 28 | 
 29 | def load_config_from_file(filepath: str) -> Tuple[Dict[str, Any], Optional[str]]:
 30 |     """
 31 |     Loads configuration from a JSON file.
 32 | 
 33 |     Args:
 34 |         filepath: The path to the configuration file.
 35 | 
 36 |     Returns:
 37 |         A tuple containing:
 38 |             - A dictionary with the loaded configuration values (empty if file not found or error).
 39 |             - An optional notification message (string) for success or error.
 40 |     """
 41 |     config_values = {}
 42 |     notification = None
 43 |     if os.path.exists(filepath):
 44 |         try:
 45 |             with open(filepath, "r") as f:
 46 |                 config_values = json.load(f)
 47 |             notification = f"Configuration loaded from '{filepath}'."
 48 |         except json.JSONDecodeError:
 49 |             notification = f"Error decoding '{filepath}'. Using defaults."
 50 |             config_values = {}
 51 |         except Exception as e:
 52 |             notification = f"Error loading config file '{filepath}': {e}"
 53 |             config_values = {}
 54 |     else:
 55 |         notification = f"No configuration file found at '{filepath}'. Using defaults."
 56 |     return config_values, notification
 57 | 
 58 | def save_config_to_file(filepath: str, config_data: Dict[str, Any]) -> Optional[str]:
 59 |     """
 60 |     Saves the configuration dictionary to a JSON file.
 61 | 
 62 |     Args:
 63 |         filepath: The path to the configuration file.
 64 |         config_data: The dictionary containing configuration values to save.
 65 | 
 66 |     Returns:
 67 |         An optional notification message (string) for success or error.
 68 |     """
 69 |     notification = None
 70 |     try:
 71 |         with open(filepath, "w") as f:
 72 |             json.dump(config_data, f, indent=4) # Write with indentation
 73 |         notification = f"Configuration saved successfully to '{filepath}'."
 74 |     except IOError as e:
 75 |         notification = f"Error saving configuration to '{filepath}': {e}"
 76 |     except Exception as e: # Catch other potential errors
 77 |         notification = f"An unexpected error occurred during save to '{filepath}': {e}"
 78 |     return notification
 79 | 
 80 | # --- Configuration Merging & Processing ---
 81 | 
 82 | def merge_configs(file_config: Dict[str, Any], initial_config: Dict[str, Any]) -> Dict[str, Any]:
 83 |     """
 84 |     Merges configuration from file and initial values (CLI/env).
 85 |     Initial values take precedence. Handles boolean type conversions.
 86 | 
 87 |     Args:
 88 |         file_config: Configuration loaded from the file.
 89 |         initial_config: Configuration provided via CLI arguments or environment variables.
 90 | 
 91 |     Returns:
 92 |         The final merged configuration dictionary.
 93 |     """
 94 |     merged_config = file_config.copy() # Start with file config
 95 | 
 96 |     # Process and merge initial_config, giving it precedence
 97 |     processed_initial_config = {}
 98 |     boolean_keys = ["enable_auth", "pii_only", "use_openai_api", "write_to_file"] # Keys expected to be boolean
 99 |     for key, value in initial_config.items():
100 |         if isinstance(value, str):
101 |             if value.lower() in ('true', '1', 'yes'):
102 |                 processed_initial_config[key] = True
103 |             elif value.lower() in ('false', '0', 'no'):
104 |                 processed_initial_config[key] = False
105 |             else:
106 |                 processed_initial_config[key] = value # Keep as string if not boolean-like
107 |         elif isinstance(value, int) and key in boolean_keys:
108 |             processed_initial_config[key] = bool(value) # Convert int to bool for specific keys
109 |         else:
110 |             processed_initial_config[key] = value # Keep other types as is
111 | 
112 |     merged_config.update(processed_initial_config) # Update with processed initial values
113 |     return merged_config
114 | 
115 | 
116 | # --- Input Validation ---
117 | 
118 | def validate_inputs(app: 'ConfigApp') -> Tuple[bool, List[str]]:
119 |     """
120 |     Validate all visible and required inputs in the ConfigApp.
121 | 
122 |     Args:
123 |         app: The instance of the ConfigApp.
124 | 
125 |     Returns:
126 |         A tuple containing:
127 |             - A boolean indicating if all validations passed.
128 |             - A list of validation failure messages.
129 |     """
130 |     is_valid = True
131 |     summary_messages = []
132 | 
133 |     # Validate standard Input widgets with validators
134 |     for input_widget in app.query(Input):
135 |         if input_widget.display and not input_widget.disabled:
136 |             # Special case: Skip validation for batch_size if it's empty
137 |             if input_widget.id == "batch_size" and not input_widget.value.strip():
138 |                 input_widget.remove_class("-invalid") # Ensure it's not marked invalid if empty
139 |                 input_widget.add_class("-valid")
140 |                 continue # Skip the rest of the validation for this input
141 | 
142 |             # Clear previous invalid state
143 |             input_widget.remove_class("-invalid")
144 |             input_widget.add_class("-valid") # Assume valid initially
145 | 
146 |             if input_widget.validators:
147 |                 validation_result = input_widget.validate(input_widget.value)
148 |                 if validation_result is not None and not validation_result.is_valid:
149 |                     is_valid = False
150 |                     # Find label via DOM traversal
151 |                     label_text = input_widget.id # Default to ID
152 |                     try:
153 |                         container = input_widget.parent
154 |                         if container:
155 |                             label_widget = container.query(Label).first()
156 |                             if label_widget:
157 |                                 label_text = str(label_widget.renderable).strip().rstrip(':') # Use renderable text, clean up
158 |                     except Exception:
159 |                         pass # Keep default ID if traversal fails
160 |                     summary_messages.extend([f"{label_text}: {desc}" for desc in validation_result.failure_descriptions])
161 |                     input_widget.remove_class("-valid")
162 |                     input_widget.add_class("-invalid")
163 | 
164 |     # Specific check for output_file if write_to_file is checked
165 |     write_cb = app.query_one("#write_to_file_checkbox", Checkbox)
166 |     output_input = app.query_one("#output_file", Input)
167 |     if write_cb.value and not output_input.value.strip():
168 |         is_valid = False
169 |         summary_messages.append("Output File Path: Cannot be empty when 'Write to File' is checked.")
170 |         output_input.remove_class("-valid")
171 |         output_input.add_class("-invalid")
172 |     elif write_cb.value: # If checked and not empty, ensure it's marked valid (if not already invalid by validator)
173 |         if "-invalid" not in output_input.classes:
174 |              output_input.remove_class("-invalid")
175 |              output_input.add_class("-valid")
176 | 
177 | 
178 |     # Specific check for Reddit auth fields if enable_auth is checked
179 |     auth_cb = app.query_one("#enable_auth", Checkbox)
180 |     if auth_cb.value:
181 |         auth_fields = ["reddit_username", "reddit_password", "reddit_client_id", "reddit_client_secret"]
182 |         for field_id in auth_fields:
183 |             auth_input = app.query_one(f"#{field_id}", Input)
184 |             if not auth_input.value.strip():
185 |                 is_valid = False
186 |                 # Find label via DOM traversal
187 |                 label_text = field_id # Default to ID
188 |                 try:
189 |                     container = auth_input.parent
190 |                     if container:
191 |                         label_widget = container.query(Label).first()
192 |                         if label_widget:
193 |                             label_text = str(label_widget.renderable).strip().rstrip(':') # Use renderable text, clean up
194 |                 except Exception:
195 |                     pass # Keep default ID if traversal fails
196 |                 summary_messages.append(f"{label_text}: Cannot be empty when 'Enable Auth' is checked.")
197 |                 auth_input.remove_class("-valid")
198 |                 auth_input.add_class("-invalid")
199 |             else: # If not empty, ensure it's marked valid (if not already invalid by validator)
200 |                  if "-invalid" not in auth_input.classes:
201 |                     auth_input.remove_class("-invalid")
202 |                     auth_input.add_class("-valid")
203 | 
204 |     return is_valid, summary_messages


--------------------------------------------------------------------------------
/reddacted/llm_detector.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import asyncio
  3 | from typing import Tuple, Dict, Any, List, Optional
  4 | import openai
  5 | from reddacted.utils.log_handler import get_logger, with_logging
  6 | from reddacted.utils.log_handler import handle_exception
  7 | 
  8 | logger = get_logger(__name__)
  9 | 
 10 | 
 11 | @with_logging(logger)
 12 | class LLMDetector:
 13 |     """Uses LLM to detect potential PII and personal information in text,
 14 |     and can suggest sarcastic replacements."""
 15 | 
 16 |     DEFAULT_PROMPT = """
 17 |     Analyze the following text for any information that could potentially identify the author or reveal personal details about them.
 18 |     Consider both explicit PII (like names, addresses) and implicit personal information (like specific life events, locations, relationships).
 19 | 
 20 |    YOU MUST Respond in JSON format with these fields. DO NOT CHANGE FIELD NAMES, THEY ARE VERY IMPORTANT.
 21 |     - has_pii: boolean
 22 |     - confidence: float (0-1)
 23 |     - details: list of findings with type and example from the comment text
 24 |     - reasoning: detailed explanation of why this content might identify the author
 25 |     - risk_factors: list of specific elements that contribute to the risk score
 26 | 
 27 |     Text to analyze: {text}
 28 |     """
 29 | 
 30 |     REPLACEMENT_PROMPT_TEMPLATE = """
 31 |     You are a creative writing assistant specializing in sarcastic and nonsensical rewrites.
 32 |     Your task is to rewrite the following text, replacing any identified personal information with humorous, absurd, or sarcastic placeholders. Maintain the original structure and tone as much as possible, but ensure all sensitive details are obscured.
 33 | 
 34 |     Original Text:
 35 |     "{original_text}"
 36 | 
 37 |     Identified Personal Information Details:
 38 |     {pii_details}
 39 | 
 40 |     Rewrite the text, replacing the identified information with sarcastic/nonsensical content.
 41 |     ONLY output the rewritten text. Do not include explanations, apologies, or any text other than the rewritten version.
 42 |     """
 43 | 
 44 |     def __init__(
 45 |         self, api_key: str, api_base: str = None, model: str = "gpt-3.5-turbo", headers: dict = None
 46 |     ):
 47 |         self.model = model
 48 |         self.client_config = {
 49 |             "api_key": api_key,
 50 |         }
 51 |         if headers:
 52 |             self.client_config["default_headers"] = headers
 53 |         if api_base:
 54 |             self.client_config["base_url"] = api_base
 55 | 
 56 |     async def analyze_batch(self, texts: List[str]) -> List[Tuple[float, Dict[str, Any]]]:
 57 |         """
 58 |         Analyze a batch of texts using LLM for potential personal information.
 59 |         Returns list of tuples (risk_score, details).
 60 |         """
 61 |         batch_size = 10
 62 |         results = []
 63 |         try:
 64 |             client = openai.AsyncOpenAI(**self.client_config)
 65 |         except openai.AuthenticationError as e:
 66 |             error_msg = str(e)
 67 |             if "Incorrect API key provided" in error_msg:
 68 |                 # Extract the redacted key if present
 69 |                 key_preview = (
 70 |                     error_msg.split("provided: ")[1].split(".")[0]
 71 |                     if "provided: " in error_msg
 72 |                     else "UNKNOWN"
 73 |                 )
 74 |                 raise ValueError(f"Invalid API key (provided: {key_preview})") from e
 75 |             raise ValueError("Authentication failed - please check your API key") from e
 76 |         except openai.APIError as e:
 77 |             raise ConnectionError(f"API error: {e.message}") from e
 78 | 
 79 |         try:
 80 |             for i in range(0, len(texts), batch_size):
 81 |                 batch = texts[i : i + batch_size]
 82 |                 tasks = []
 83 |                 for text in batch:
 84 |                     task = client.chat.completions.create(
 85 |                         model=self.model,
 86 |                         messages=[
 87 |                             {"role": "system", "content": "/no-think You are a privacy analysis assistant."},
 88 |                             {"role": "user", "content": self.DEFAULT_PROMPT.format(text=text)},
 89 |                         ],
 90 |                         temperature=0.1,
 91 |                     )
 92 |                     logger.debug_with_context(f"Using API base: {client.base_url}")
 93 |                     logger.debug_with_context(f"Using model: {self.model}")
 94 |                     tasks.append(task)
 95 | 
 96 |                 logger.info_with_context(f"Awaiting {len(tasks)} LLM analysis tasks...")
 97 |                 batch_responses = await asyncio.gather(*tasks)
 98 |                 logger.info_with_context("LLM analysis tasks completed.")
 99 | 
100 |                 for response in batch_responses:
101 |                     try:
102 |                         raw_response = response.choices[0].message.content.strip()
103 |                         logger.debug_with_context(f"\n🤖 Raw LLM Response:\n{raw_response}\n")
104 |                         # Handle for qwen3 series thinking models
105 |                         if "</think>" in raw_response:
106 |                             raw_response = raw_response.split("</think>")[1]
107 |                         try:
108 |                             # First attempt direct parse, sometimes stupid LLM messes up formatting
109 |                             analysis = json.loads(raw_response)
110 |                         except json.JSONDecodeError:
111 |                             # If that fails, try to extract JSON from markdown blocks
112 |                             if "```json" in raw_response:
113 |                                 logger.debug_with_context(f"\nAttempting to extract json from markdown in {raw_response}\n")
114 |                                 json_content = (
115 |                                     raw_response.split("```json")[1].split("```")[0].strip()
116 |                                 )
117 |                                 analysis = json.loads(json_content)
118 |                             else:
119 |                                 logger.debug_with_context(f"Failed to extract json from markdown in {raw_response}\n")
120 |                                 raise
121 | 
122 |                         # Calculate risk score based on confidence and PII presence
123 |                         confidence = float(analysis.get("confidence", 0.0))
124 |                         has_pii = analysis.get("has_pii", False)
125 | 
126 |                         logger.debug_with_context(f"Parsed confidence: {confidence}")
127 |                         logger.debug_with_context(f"Parsed has_pii: {has_pii}")
128 | 
129 |                         if has_pii:
130 |                             risk_score = confidence
131 |                         else:
132 |                             risk_score = 0.0
133 |                             analysis = {
134 |                                 "has_pii": False,
135 |                                 "confidence": 0.0,
136 |                                 "details": [],
137 |                                 "risk_factors": [],
138 |                                 "reasoning": "No PII detected",
139 |                             }
140 | 
141 |                         results.append((risk_score, analysis))
142 |                     except Exception as e:
143 |                         logger.warning_with_context(f"Failed to parse LLM analysis response: {e}")
144 |                         results.append((0.0, {"error": f"LLM response parsing failed: {e}"}))
145 |             return results
146 | 
147 |         except Exception as e:
148 |             logger.error_with_context("AI analysis failed")
149 |             logger.error_with_context(f"Batch LLM analysis failed: {str(e)}")
150 |             error_msg = str(e)
151 |             if isinstance(e, ValueError) and "Invalid API key" in error_msg:
152 |                 # Format a user-friendly error message
153 |                 return [
154 |                     (
155 |                         0.0,
156 |                         {
157 |                             "error": "Authentication Failed",
158 |                             "details": error_msg,
159 |                             "help": "Please check your OpenAI API key configuration",
160 |                         },
161 |                     )
162 |                 ] * len(texts)
163 |             return [
164 |                 (
165 |                     0.0,
166 |                     {
167 |                         "error": "LLM Analysis Failed",
168 |                         "details": error_msg,
169 |                         "help": "Please try again or contact support if the issue persists",
170 |                     },
171 |                 )
172 |             ] * len(texts)
173 | 
174 |     async def analyze_text(self, text: str) -> Tuple[float, Dict[str, Any]]:
175 |         """
176 |         Analyze a single text using LLM for potential personal information.
177 |         Returns tuple of (risk_score, details).
178 |         """
179 |         try:
180 |             results = await self.analyze_batch([text])
181 |             return results[0]
182 |         except Exception as e:
183 |             logger.error_with_context(f"LLM analysis failed: {str(e)}")
184 |             return 0.0, {"error": str(e)}
185 | 
186 |     async def suggest_replacement(self, text: str, analysis: Dict[str, Any]) -> Optional[str]:
187 |         """
188 |         Suggests a sarcastic/nonsensical replacement for the text, obscuring PII.
189 | 
190 |         Args:
191 |             text: The original text.
192 |             analysis: The analysis result dictionary from analyze_text/analyze_batch.
193 | 
194 |         Returns:
195 |             The suggested replacement text, or None if no PII was found or an error occurred.
196 |         """
197 |         if not analysis or not analysis.get("has_pii"):
198 |             logger.info_with_context("No PII found, skipping replacement suggestion.")
199 |             return None
200 | 
201 |         pii_details_list = analysis.get("details", [])
202 |         if not pii_details_list:
203 |              logger.warning_with_context("has_pii is True, but no details found. Skipping replacement.")
204 |              return None
205 | 
206 |         # Format PII details for the prompt
207 |         pii_details_str = "\n".join([f"- Type: {item.get('type', 'N/A')}, Example: {item.get('example', 'N/A')}" for item in pii_details_list])
208 | 
209 |         prompt = self.REPLACEMENT_PROMPT_TEMPLATE.format(
210 |             original_text=text,
211 |             pii_details=pii_details_str
212 |         )
213 | 
214 |         try:
215 |             # Create a client instance for this specific call
216 |             client = openai.AsyncOpenAI(**self.client_config)
217 |             logger.debug_with_context("Requesting replacement suggestion from LLM.")
218 |             logger.debug_with_context(f"Replacement Prompt:\n{prompt}")
219 | 
220 |             response = await client.chat.completions.create(
221 |                 model=self.model,
222 |                 messages=[
223 |                     {"role": "system", "content": "You are a creative writing assistant specializing in sarcastic and nonsensical rewrites."},
224 |                     {"role": "user", "content": prompt},
225 |                 ],
226 |                 temperature=0.7, # Slightly higher temp for creativity
227 |             )
228 | 
229 |             replacement_text = response.choices[0].message.content.strip()
230 |             logger.debug_with_context(f"Raw replacement suggestion:\n{replacement_text}")
231 | 
232 |             # Basic check to ensure it's not empty or just whitespace
233 |             if not replacement_text:
234 |                  logger.warning_with_context("LLM returned an empty replacement suggestion.")
235 |                  return None
236 | 
237 |             return replacement_text
238 | 
239 |         except openai.AuthenticationError as e:
240 |             error_msg = str(e)
241 |             key_preview = "UNKNOWN"
242 |             if "Incorrect API key provided" in error_msg and "provided: " in error_msg:
243 |                  key_preview = error_msg.split("provided: ")[1].split(".")[0]
244 |             logger.error_with_context(f"Authentication failed for replacement suggestion (key: {key_preview}): {e}")
245 |             # Propagate a clear error message or handle as needed downstream
246 |             # For now, returning None as the function signature suggests optional return
247 |             return None
248 |         except openai.APIError as e:
249 |             logger.error_with_context(f"API error during replacement suggestion: {e.message}")
250 |             return None
251 |         except Exception as e:
252 |             logger.error_with_context(f"Unexpected error during replacement suggestion: {str(e)}")
253 |             return None
254 | 


--------------------------------------------------------------------------------
/reddacted/pii_detector.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from dataclasses import dataclass
  3 | from typing import List, Tuple
  4 | from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
  5 | from reddacted.utils.log_handler import get_logger, with_logging
  6 | from reddacted.utils.log_handler import handle_exception
  7 | 
  8 | logger = get_logger(__name__)
  9 | 
 10 | 
 11 | @dataclass
 12 | class PIIMatch:
 13 |     """Represents a PII match found in text"""
 14 | 
 15 |     type: str
 16 |     value: str
 17 |     confidence: float
 18 | 
 19 | 
 20 | class PIIDetector:
 21 |     """Detects various types of personally identifiable information in text"""
 22 | 
 23 |     # Common PII patterns
 24 |     PATTERNS = {
 25 |         "email": (r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", 0.95),
 26 |         "phone": (r"\b(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b", 0.85),
 27 |         "ssn": (r"\b\d{3}-\d{2}-\d{4}\b", 0.97),
 28 |         "credit_card": (r"\b(?:\d{4}[- ]?){3}\d{4}\b", 0.95),
 29 |         "address": (
 30 |             r"\b\d{2,5}\s+(?:[A-Za-z]+\s)+(?:street|st|avenue|ave|road|rd|boulevard|blvd|lane|ln|drive|dr)\.?\b",
 31 |             0.65,
 32 |         ),
 33 |         "name_pattern": (r"\b(?:Mr\.|Mrs\.|Ms\.|Dr\.)\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", 0.7),
 34 |     }
 35 | 
 36 |     # Keywords that might indicate PII context
 37 |     CONTEXT_KEYWORDS = [
 38 |         "name is",
 39 |         "live at",
 40 |         "address",
 41 |         "reach me",
 42 |         "phone",
 43 |         "email",
 44 |         "contact",
 45 |         "call me",
 46 |         "ssn",
 47 |         "social security",
 48 |         "credit card",
 49 |         "driver license",
 50 |     ]
 51 | 
 52 |     COMMON_FALSE_POSITIVES = [
 53 |         r"\b\d+ (llm|ai|gpu|cpu|ram|mb|gb|ghz|mhz|api)\b",
 54 |         r"\b\d+ (times|years|days|hours|minutes|seconds)\b",
 55 |         r"\b\d+(?:st|nd|rd|th)\b",
 56 |         r"\b\d+[km]?b?\b",
 57 |     ]
 58 | 
 59 |     def __init__(self):
 60 |         self.compiled_patterns = {
 61 |             name: re.compile(pattern, re.IGNORECASE) for name, (pattern, _) in self.PATTERNS.items()
 62 |         }
 63 |         self.false_positive_patterns = [
 64 |             re.compile(pattern, re.IGNORECASE) for pattern in self.COMMON_FALSE_POSITIVES
 65 |         ]
 66 | 
 67 |     @with_logging(logger)
 68 |     def analyze_text(self, text: str) -> List[PIIMatch]:
 69 |         """
 70 |         Analyze text for potential PII.
 71 |         Returns a list of PIIMatch objects for each PII instance found.
 72 |         """
 73 |         matches = []
 74 | 
 75 |         # First check for false positives
 76 |         if any(fp.search(text) for fp in self.false_positive_patterns):
 77 |             return []
 78 | 
 79 |         # Validate matches against known false positive contexts
 80 |         for pii_type, (_, confidence) in self.PATTERNS.items():
 81 |             pattern = self.compiled_patterns[pii_type]
 82 |             for match in pattern.finditer(text):
 83 |                 full_match = match.group(0)
 84 | 
 85 |                 # Additional validation per type
 86 |                 if pii_type == "phone" and len(full_match.replace("-", "").replace(" ", "")) < 10:
 87 |                     continue
 88 | 
 89 |                 if pii_type == "address" and not any(c.isalpha() for c in full_match.split()[-2]):
 90 |                     continue
 91 | 
 92 |                 matches.append(PIIMatch(pii_type, full_match, confidence))
 93 | 
 94 |         # Contextual confidence boost with cap
 95 |         context_boost = (
 96 |             0.15
 97 |             if any(
 98 |                 re.search(rf"\b{re.escape(kw)}\b", text, re.IGNORECASE)
 99 |                 for kw in self.CONTEXT_KEYWORDS
100 |             )
101 |             else 0.0
102 |         )
103 | 
104 |         for match in matches:
105 |             match.confidence = min(1.0, match.confidence + context_boost)
106 | 
107 |         return matches
108 | 
109 |     @with_logging(logger)
110 |     def get_pii_risk_score(self, text: str, progress=None) -> Tuple[float, List[PIIMatch]]:
111 |         """
112 |         Calculate overall PII risk score for a text and return matches.
113 |         Returns a tuple of (risk_score, matches).
114 |         """
115 |         matches = self.analyze_text(text)
116 |         if not matches:
117 |             return 0.0, []
118 | 
119 |         # Weighted average with type weights
120 |         type_weights = {
121 |             "ssn": 1.2,
122 |             "credit_card": 1.2,
123 |             "email": 1.0,
124 |             "phone": 0.9,
125 |             "address": 0.7,
126 |             "name_pattern": 0.6,
127 |         }
128 | 
129 |         total_weight = sum(type_weights.get(match.type, 1.0) for match in matches)
130 |         weighted_sum = sum(
131 |             match.confidence * type_weights.get(match.type, 1.0) for match in matches
132 |         )
133 | 
134 |         return min(1.0, weighted_sum / total_weight), matches
135 | 


--------------------------------------------------------------------------------
/reddacted/results.py:
--------------------------------------------------------------------------------
  1 | from reddacted.api.reddit import Reddit
  2 | #!/usr/bin/env python
  3 | # -*- coding: utf-8 -*-
  4 | 
  5 | from typing import List, Dict, Any, Optional
  6 | 
  7 | from rich.columns import Columns
  8 | from rich.console import Group
  9 | from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
 10 | 
 11 | from reddacted.utils.log_handler import get_logger, with_logging
 12 | from reddacted.utils.report import (
 13 |     generate_analysis_report,
 14 |     should_show_result,
 15 | )
 16 | from reddacted.utils.tables import TableFormatter
 17 | from reddacted.utils.panels import PanelFormatter
 18 | from reddacted.utils.analysis import AnalysisResult
 19 | from reddacted.textual_ui import show_results
 20 | 
 21 | 
 22 | logger = get_logger(__name__)
 23 | 
 24 | 
 25 | class ResultsFormatter(TableFormatter, PanelFormatter):
 26 |     """Handles formatting and display of analysis results."""
 27 | 
 28 |     def __init__(self):
 29 |         TableFormatter.__init__(self)
 30 |         PanelFormatter.__init__(self)
 31 |         # Use module-level logger instance
 32 |         self.total_pii_comments = 0
 33 |         self.total_llm_pii_comments = 0
 34 |         self.use_random_string = False  # Default to False
 35 | 
 36 |     @with_logging(logger)
 37 |     def create_progress(self) -> Progress:
 38 |         """Creates a unified progress context manager."""
 39 |         if not hasattr(self, "_progress"):
 40 |             self._progress = Progress(
 41 |                 SpinnerColumn(spinner_name="dots"),
 42 |                 TextColumn("[bold blue]{task.description}"),
 43 |                 TimeElapsedColumn(),
 44 |                 transient=True,
 45 |             )
 46 |         return self._progress
 47 | 
 48 |     @with_logging(logger)
 49 |     def generate_output_file(
 50 |         self,
 51 |         filename: str,
 52 |         comments: List[Dict[str, Any]],
 53 |         url: str,
 54 |         results: List[AnalysisResult],
 55 |         overall_score: float,
 56 |         overall_sentiment: str,
 57 |     ) -> None:
 58 |         """Outputs a file containing a detailed sentiment and PII analysis per comment."""
 59 |         progress = self.create_progress()
 60 |         with progress:
 61 |             progress_task = progress.add_task(
 62 |                 "📝 Generating analysis report...", total=len(comments)
 63 |             )
 64 |             try:
 65 |                 stats = generate_analysis_report(
 66 |                     filename=filename,
 67 |                     comments=comments,
 68 |                     url=url,
 69 |                     results=results,
 70 |                     overall_score=overall_score,
 71 |                     overall_sentiment=overall_sentiment,
 72 |                     pii_only=getattr(self, "pii_only", False),
 73 |                 )
 74 |                 self.total_pii_comments = stats["total_pii_comments"]
 75 |                 self.total_llm_pii_comments = stats["total_llm_pii_comments"]
 76 |                 self._print_completion_message(filename, comments, results, progress)
 77 |             except Exception as e:
 78 |                 logger.exception("Failed to generate output file: %s", e) # Use module logger
 79 |                 raise
 80 | 
 81 |     @with_logging(logger)
 82 |     def print_config(self, config: Dict[str, Any]) -> None:
 83 |         """Prints the active configuration using the provided config dictionary."""
 84 |         progress = self.create_progress()
 85 | 
 86 |         # Extract values needed for panels from the config dict
 87 |         auth_enabled = config.get("enable_auth", False)
 88 |         pii_enabled = True # Assuming PII is always enabled for now
 89 |         pii_only = config.get("pii_only", False)
 90 |         limit_val = config.get("limit", 20)
 91 |         limit = None if limit_val == 0 else limit_val
 92 |         sort = config.get("sort", "new")
 93 |         use_random_string = config.get("use_random_string", False) # Get from config
 94 | 
 95 |         # Construct llm_config dict for the features panel if applicable
 96 |         llm_config = None
 97 |         if config.get("model"):
 98 |             llm_config = {
 99 |                 "api_key": config.get("openai_key") if config.get("use_openai_api") else "sk-not-needed",
100 |                 "api_base": config.get("local_llm") if not config.get("use_openai_api") else "https://api.openai.com/v1",
101 |                 "model": config.get("model"),
102 |             }
103 |             # Adjust api_base for local LLM if needed (redundant with Sentiment.__init__ but safe)
104 |             if not config.get("use_openai_api") and llm_config.get("api_base"):
105 |                 base_url = llm_config["api_base"].rstrip('/')
106 |                 if not base_url.endswith('/v1'):
107 |                     llm_config["api_base"] = f"{base_url}/v1"
108 |         elif config.get("openai_key") or config.get("local_llm"):
109 |              llm_config = { # Handle case where URL/key provided but no model
110 |                 "api_key": config.get("openai_key") if config.get("use_openai_api") else "sk-not-needed",
111 |                 "api_base": config.get("local_llm") if not config.get("use_openai_api") else "https://api.openai.com/v1",
112 |                 "model": None,
113 |             }
114 |              if not config.get("use_openai_api") and llm_config.get("api_base"):
115 |                 base_url = llm_config["api_base"].rstrip('/')
116 |                 if not base_url.endswith('/v1'):
117 |                     llm_config["api_base"] = f"{base_url}/v1"
118 | 
119 | 
120 |         with progress:
121 |             progress.console.print("\n[bold cyan]Active Configuration[/]")
122 |             features_panel = self.create_features_panel(
123 |                 auth_enabled, pii_enabled, llm_config, pii_only, limit, sort,
124 |                 use_random_string=use_random_string # Use value from config
125 |             )
126 |             panels = [features_panel]
127 |             # Pass the full config to create_auth_panel
128 |             auth_panel = self.create_auth_panel(config)
129 |             panels.append(auth_panel)
130 |             progress.console.print(Columns(panels))
131 | 
132 |     @with_logging(logger)
133 |     def print_comments(
134 |         self,
135 |         comments: List[Dict[str, Any]],
136 |         url: str,
137 |         results: List[AnalysisResult],
138 |         overall_score: float,
139 |         overall_sentiment: str,
140 |         reddit_api: 'Reddit', # Added reddit_api
141 |     ) -> None:
142 |         """Prints out analysis of user comments using Textual UI."""
143 |         filtered_results = [
144 |             r for r in results if should_show_result(r, getattr(self, "pii_only", False))
145 |         ]
146 |         if not filtered_results and getattr(self, "pii_only", False):
147 |             logger.info_with_context("No comments with high PII risk found.") # Use module logger with context
148 |             print("No comments with high PII risk found.")
149 |             return
150 | 
151 |         # Show interactive results view
152 |         show_results(
153 |             url=url,
154 |             comments=comments,
155 |             results=filtered_results,
156 |             overall_score=overall_score,
157 |             overall_sentiment=overall_sentiment,
158 |             reddit_api=reddit_api, # Pass reddit_api
159 |             use_random_string=getattr(self, "use_random_string", False),
160 |         )
161 | 
162 |     def _print_completion_message(
163 |         self,
164 |         filename: str,
165 |         comments: List[Dict[str, Any]],
166 |         results: List[AnalysisResult],
167 |         progress: Progress,
168 |     ) -> None:
169 |         """Prints completion message with file info and action panel."""
170 |         high_risk_comments = [
171 |             r
172 |             for r in results
173 |             if r.pii_risk_score > 0.5 or (r.llm_findings and r.llm_findings.get("has_pii", False))
174 |         ]
175 |         comment_ids = [r.comment_id for r in high_risk_comments]
176 |         completion_panel = self.create_completion_panel(
177 |             filename, len(comments), self.total_pii_comments, self.total_llm_pii_comments
178 |         )
179 |         if comment_ids:
180 |             actions_panel = self.create_action_panel(results, use_random_string=getattr(self, "use_random_string", False))
181 |             progress.console.print(Group(completion_panel, actions_panel))
182 |         else:
183 |             progress.console.print(completion_panel)
184 | 


--------------------------------------------------------------------------------
/reddacted/sentiment.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Standard library
  5 | import asyncio
  6 | import re
  7 | from os import environ
  8 | from typing import List, Dict, Any, Optional, Tuple
  9 | 
 10 | # Third-party
 11 | import nltk
 12 | from nltk.sentiment.vader import SentimentIntensityAnalyzer
 13 | from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
 14 | from rich.console import Console
 15 | 
 16 | # Local
 17 | from reddacted.utils.log_handler import get_logger, with_logging
 18 | 
 19 | # Initialize rich console
 20 | console = Console()
 21 | from reddacted.utils.log_handler import handle_exception
 22 | from reddacted.api.scraper import Scraper
 23 | from reddacted.api.reddit import Reddit
 24 | from reddacted.pii_detector import PIIDetector
 25 | from reddacted.llm_detector import LLMDetector
 26 | from reddacted.results import ResultsFormatter, AnalysisResult
 27 | 
 28 | logger = get_logger(__name__)
 29 | 
 30 | _COMMENT_ANALYSIS_HEADERS = {"User-agent": "reddacted"}
 31 | 
 32 | 
 33 | # Sentiment constants
 34 | HAPPY_SENTIMENT = "😁"
 35 | SAD_SENTIMENT = "😕"
 36 | NEUTRAL_SENTIMENT = "😐"
 37 | 
 38 | 
 39 | class Sentiment:
 40 |     """Performs the LLM PII & sentiment analysis on a given set of Reddit Objects."""
 41 | 
 42 |     def __init__(self, config: Dict[str, Any]):
 43 |         """Initialize Sentiment Analysis using a configuration dictionary.
 44 | 
 45 |         Args:
 46 |             config (Dict[str, Any]): Dictionary containing all configuration settings.
 47 |         """
 48 |         logger.debug_with_context(f"Initializing Sentiment Analyzer with config: {config}")
 49 | 
 50 |         # Extract values from config, providing defaults
 51 |         self.auth_enabled = config.get("enable_auth", False)
 52 |         self.pii_enabled = True # Assuming PII is always enabled for now, adjust if needed
 53 |         self.llm_config = None # Will be constructed below if needed
 54 |         self.pii_only = config.get("pii_only", False)
 55 |         self.sort = config.get("sort", "new")
 56 |         # Default limit to 20 if missing, map 0 to None (unlimited)
 57 |         limit_val = config.get("limit", 20)
 58 |         self.limit = None if limit_val == 0 else limit_val
 59 |         self.skip_text = config.get("skip_text")
 60 |         self.use_random_string = config.get("use_random_string", False)
 61 | 
 62 |         # Download required NLTK data if not already present
 63 |         try:
 64 |             nltk.data.find("sentiment/vader_lexicon")
 65 |         except LookupError:
 66 |             logger.debug("Downloading required NLTK data...")
 67 |             nltk.download("vader_lexicon", quiet=True)
 68 | 
 69 |         # Initialize necessary variables
 70 |         self.llm_detector = None  # Initialize llm_detector early
 71 |         # Initialize batch processing attributes
 72 |         self._llm_batch = []
 73 |         self._llm_batch_indices = []
 74 |         self._pending_results = []
 75 | 
 76 |         try:
 77 |             self.api = Scraper() # Default to Scraper
 78 |             self.score = 0
 79 |             self.sentiment = NEUTRAL_SENTIMENT
 80 |             self.headers = _COMMENT_ANALYSIS_HEADERS
 81 |             self.pii_detector = PIIDetector() if self.pii_enabled else None
 82 |             logger.debug_with_context(
 83 |                 "Initialized base attributes with configuration: "
 84 |                 f"auth_enabled={self.auth_enabled}, "
 85 |                 f"pii_enabled={self.pii_enabled}, "
 86 |                 f"pii_only={self.pii_only}, "
 87 |                 f"sort={self.sort}, "
 88 |                 f"limit={self.limit}"
 89 |             )
 90 | 
 91 |             logger.debug_with_context("Base sentiment analyzer initialized")
 92 |         except Exception as e:
 93 |             handle_exception(e, "Failed to initialize base Sentiment analyzer")
 94 |             logger.error_with_context("Failed to initialize base Sentiment analyzer")
 95 |             raise
 96 | 
 97 |         # Construct LLM config dictionary if applicable
 98 |         if config.get("model"):
 99 |             # If model is specified but no LLM URL, default to local
100 |             if not config.get("local_llm") and not config.get("openai_key"):
101 |                 config["local_llm"] = "http://localhost:11434" # Modify config directly or use a local var
102 |                 logger.warning_with_context("No LLM URL specified, defaulting to local")
103 | 
104 |             self.llm_config = {
105 |                 "api_key": config.get("openai_key") if config.get("use_openai_api") else "sk-not-needed",
106 |                 "api_base": config.get("local_llm") if not config.get("use_openai_api") else "https://api.openai.com/v1",
107 |                 "model": config.get("model"),
108 |             }
109 |             # Adjust api_base for local LLM if needed
110 |             if not config.get("use_openai_api") and self.llm_config["api_base"]:
111 |                 base_url = self.llm_config["api_base"].rstrip('/')
112 |                 if not base_url.endswith('/v1'):
113 |                     self.llm_config["api_base"] = f"{base_url}/v1"
114 |         elif config.get("openai_key") or config.get("local_llm"):
115 |              # Handle case where URL/key is provided but no model selected yet
116 |              self.llm_config = {
117 |                 "api_key": config.get("openai_key") if config.get("use_openai_api") else "sk-not-needed",
118 |                 "api_base": config.get("local_llm") if not config.get("use_openai_api") else "https://api.openai.com/v1",
119 |                 "model": None, # Explicitly set model to None
120 |             }
121 |              if not config.get("use_openai_api") and self.llm_config["api_base"]:
122 |                 base_url = self.llm_config["api_base"].rstrip('/')
123 |                 if not base_url.endswith('/v1'):
124 |                     self.llm_config["api_base"] = f"{base_url}/v1"
125 |              logger.warning_with_context("LLM URL/Key provided, but no model selected. LLM analysis may be limited.")
126 | 
127 | 
128 |         # Initialize LLM detector if config was constructed
129 |         if self.llm_config:
130 |             logger.debug_with_context(f"Constructed LLM Config: {self.llm_config}")
131 |             try:
132 |                 api_key = self.llm_config.get("api_key")
133 |                 api_base = self.llm_config.get("api_base")
134 |                 model = self.llm_config.get("model") # Already extracted
135 | 
136 |                 logger.debug_with_context(f"LLM Config - API Base: {api_base}, Model: {model}")
137 |                 # Initialize LLM detector if we have sufficient configuration
138 |                 if not model:
139 |                     logger.warning_with_context("No model specified in config - LLM analysis disabled")
140 |                     self.llm_detector = None
141 |                 elif not api_base:
142 |                     logger.error_with_context("Missing API base URL - required for both local and OpenAI")
143 |                     self.llm_detector = None
144 |                 elif api_base == "https://api.openai.com/v1" and not api_key:
145 |                     logger.error_with_context("Missing API key - required for OpenAI API")
146 |                     self.llm_detector = None
147 |                 else:
148 |                     self.llm_detector = LLMDetector(
149 |                         api_key=api_key,
150 |                         api_base=api_base,
151 |                         model=model,
152 |                     )
153 |                     logger.info_with_context("LLM Detector initialized")
154 |             except Exception as e:
155 |                 logger.error_with_context(f"Failed to initialize LLM Detector: {str(e)}")
156 |                 self.llm_detector = None
157 |         else:
158 |             logger.info_with_context("No LLM config provided or model missing, LLM analysis disabled.")
159 | 
160 |         # Initialize Reddit API if auth is enabled, passing the config
161 |         if self.auth_enabled:
162 |             logger.debug_with_context("Authentication enabled, initializing Reddit API with config")
163 |             # Pass the full config dictionary and the specific use_random_string preference
164 |             self.api = Reddit(config=config, use_random_string=self.use_random_string)
165 |             logger.debug_with_context("Reddit API initialized")
166 |         else:
167 |             logger.debug_with_context("Authentication not enabled, using default Scraper API")
168 |             # self.api remains the Scraper instance initialized earlier
169 | 
170 |         self.formatter = ResultsFormatter()
171 |         self.formatter.pii_only = self.pii_only
172 |         self.formatter.use_random_string = self.use_random_string # Use instance variable
173 |         # Pass the entire config dictionary to print_config
174 |         self.formatter.print_config(config)
175 | 
176 |     @with_logging(logger)
177 |     async def _analyze(self, comments):
178 |         """Analyzes comments for both sentiment and PII content.
179 |         :param comments: comments to perform analysis on.
180 |         :return: tuple of (sentiment_score, list of AnalysisResult objects)
181 |         """
182 |         logger.debug_with_context("Starting _analyze function")
183 |         sentiment_analyzer = SentimentIntensityAnalyzer()
184 |         final_score = 0
185 |         results: List[AnalysisResult] = [] # Final results list
186 |         _llm_batch: List[str] = [] # Batch of comments for LLM
187 |         _llm_result_indices: List[int] = [] # Indices in 'results' corresponding to _llm_batch items
188 | 
189 |         cleanup_regex = re.compile("<.*?>")
190 |         total_comments = len(comments)
191 |         progress = Progress(
192 |             SpinnerColumn(spinner_name="dots"),
193 |             TextColumn("[bold blue]{task.description}"),
194 |             TimeElapsedColumn(),
195 |             transient=True,
196 |         )
197 |         with progress:
198 |             main_task = progress.add_task(
199 |                 f"Received {total_comments} comments, processing...", total=total_comments
200 |             )
201 |             pii_task = progress.add_task("🔍 PII Analysis", visible=False, total=1)
202 |             llm_task = progress.add_task("🤖 LLM Analysis", visible=False, total=1)
203 |             for i, comment_data in enumerate(comments, 1):
204 |                 try:
205 |                     clean_comment = re.sub(cleanup_regex, "", str(comment_data["text"]))
206 | 
207 |                     # Skip already reddacted comments
208 |                     if self.skip_text and self.skip_text in clean_comment:
209 |                         logger.debug_with_context(f"Skipping already reddacted comment {i}")
210 |                         progress.update(main_task, advance=1)
211 |                         continue
212 |                     progress.update(
213 |                         main_task,
214 |                         description=f"[bold blue]💭 Processing comment[/] [cyan]{i}[/]/[cyan]{total_comments}[/]",
215 |                     )
216 |                     # Sentiment analysis
217 |                     all_scores = sentiment_analyzer.polarity_scores(clean_comment)
218 |                     score = all_scores["compound"]
219 |                     final_score += score
220 |                     # PII analysis
221 |                     pii_risk_score, pii_matches = 0.0, []
222 |                     if self.pii_enabled:
223 |                         progress.update(pii_task, visible=True)
224 |                         progress.update(pii_task, description=f"🔍 Scanning comment {i} for PII")
225 |                         pii_risk_score, pii_matches = self.pii_detector.get_pii_risk_score(
226 |                             clean_comment
227 |                         )
228 |                         progress.update(pii_task, visible=False)
229 |                         
230 |                     # Create the initial result object
231 |                     result = AnalysisResult(
232 |                         comment_id=comment_data["id"],
233 |                         sentiment_score=score,
234 |                         sentiment_emoji=self._get_sentiment(score),
235 |                         pii_risk_score=pii_risk_score,
236 |                         pii_matches=pii_matches,
237 |                         text=clean_comment,
238 |                         upvotes=comment_data["upvotes"],
239 |                         downvotes=comment_data["downvotes"],
240 |                         permalink=comment_data["permalink"],
241 |                         llm_risk_score=0.0, # Placeholder
242 |                         llm_findings=None, # Placeholder
243 |                     )
244 |                     results.append(result) # Add initial result to final list
245 | 
246 |                     # If LLM is enabled, add to batch for later processing
247 |                     if self.llm_detector:
248 |                         _llm_batch.append(clean_comment)
249 |                         _llm_result_indices.append(len(results) - 1) # Store index of the result we just added
250 |                         logger.debug_with_context(f"Added comment {i} to LLM batch (size: {len(_llm_batch)})")
251 | 
252 |                         # Process batch if full
253 |                         if len(_llm_batch) >= 10:
254 |                             batch_size = len(_llm_batch)
255 |                             try:
256 |                                 progress.update(llm_task, visible=True)
257 |                                 progress.update(llm_task, description=f"[bold blue]🤖 Processing LLM batch[/] ([cyan]{batch_size}[/] items)")
258 |                                 batch_llm_results = await self.llm_detector.analyze_batch(_llm_batch)
259 |                                 logger.debug_with_context(f"Successfully processed LLM batch of {batch_size} items")
260 | 
261 |                                 # Update results in place
262 |                                 for result_idx, (llm_risk_score, findings) in zip(_llm_result_indices, batch_llm_results):
263 |                                     results[result_idx].llm_risk_score = llm_risk_score
264 |                                     results[result_idx].llm_findings = findings
265 |                                     if findings and findings.get("has_pii"):
266 |                                         results[result_idx].pii_risk_score = max(results[result_idx].pii_risk_score, llm_risk_score)
267 |                                 logger.debug_with_context(f"Updated {batch_size} results with LLM data")
268 | 
269 |                             except Exception as e:
270 |                                 logger.error_with_context(f"Failed to process LLM batch: {str(e)}")
271 |                             finally:
272 |                                 progress.update(llm_task, description=f"[bold green]✅ LLM batch complete[/] ([cyan]{batch_size}[/] items analyzed)", visible=False)
273 |                                 # Clear batch lists for next batch
274 |                                 _llm_batch = []
275 |                                 _llm_result_indices = []
276 |                     else:
277 |                          logger.warning_with_context(f"Skipping LLM analysis for comment {i} - detector not initialized")
278 | 
279 |                     progress.update(main_task, advance=1)
280 |                 except Exception as e:
281 |                     logger.error_with_context(f"Error processing comment {i}: {e}")
282 |                     # Ensure progress advances even on error
283 |                     progress.update(main_task, advance=1)
284 |                     continue
285 | 
286 |             # --- Process any remaining items in the LLM batch after the loop ---
287 |             if self.llm_detector and _llm_batch:
288 |                 batch_size = len(_llm_batch)
289 |                 try:
290 |                     progress.update(llm_task, visible=True)
291 |                     progress.update(llm_task, description=f"[bold blue]🤖 Processing final LLM batch[/] ([cyan]{batch_size}[/] items)")
292 |                     batch_llm_results = await self.llm_detector.analyze_batch(_llm_batch)
293 |                     logger.debug_with_context(f"Successfully processed final LLM batch of {batch_size} items")
294 | 
295 |                     # Update results in place
296 |                     for result_idx, (llm_risk_score, findings) in zip(_llm_result_indices, batch_llm_results):
297 |                         results[result_idx].llm_risk_score = llm_risk_score
298 |                         results[result_idx].llm_findings = findings
299 |                         if findings and findings.get("has_pii"):
300 |                              results[result_idx].pii_risk_score = max(results[result_idx].pii_risk_score, llm_risk_score)
301 |                     logger.debug_with_context(f"Updated {batch_size} results with final LLM data")
302 | 
303 |                 except Exception as e:
304 |                     logger.error_with_context(f"Failed to process final LLM batch: {str(e)}")
305 |                 finally:
306 |                      progress.update(llm_task, description=f"[bold green]✅ Final LLM batch complete[/] ([cyan]{batch_size}[/] items analyzed)", visible=False)
307 |                      # No need to clear batch lists here as they are local to the function call
308 | 
309 |             # --- Calculate final score and return ---
310 |             try:
311 |                 # Use len(results) which accurately reflects processed comments
312 |                 num_processed = len(results)
313 |                 if num_processed == 0:
314 |                      logger.warning("No comments were successfully processed.")
315 |                      return 0.0, []
316 |                 # Calculate score based on processed comments' sentiment scores
317 |                 final_score = sum(r.sentiment_score for r in results) # Recalculate final_score based on actual results
318 |                 rounded_final = round(final_score / num_processed, 4) # Use num_processed
319 |                 logger.debug_with_context(f"Final sentiment score calculated: {rounded_final}")
320 |                 return rounded_final, results
321 |             except ZeroDivisionError: # Should be caught by num_processed check, but keep for safety
322 |                 logger.error_with_context("Division by zero error during final score calculation.")
323 |                 return 0.0, []
324 | 
325 |     @with_logging(logger)
326 |     def _get_sentiment(self, score):
327 |         """Obtains the sentiment using a sentiment score.
328 |         :param score: the sentiment score.
329 |         :return: sentiment from score.
330 |         """
331 |         logger.debug_with_context(f"Calculating sentiment for score {score}")
332 |         if score == 0:
333 |             return NEUTRAL_SENTIMENT
334 |         elif score > 0:
335 |             return HAPPY_SENTIMENT
336 |         else:
337 |             return SAD_SENTIMENT
338 | 
339 |     @with_logging(logger)
340 |     def _get_comments(self, source_type: str, identifier: str, **kwargs) -> List[Dict[str, Any]]:
341 |         """Unified comment fetching method"""
342 |         logger.debug_with_context(
343 |             f"Fetching comments for {source_type} '{identifier}' with kwargs: {kwargs}"
344 |         )
345 | 
346 |         # Get the appropriate fetch method
347 |         fetch_method = {"user": self.api.parse_user, "listing": self.api.parse_listing}[source_type]
348 | 
349 |         # Handle text search if specified
350 |         if text_match := kwargs.pop("text_match", None):
351 |             if source_type == "user":
352 |                 # For users, we pass the text_match to parse_user
353 |                 return fetch_method(
354 |                     identifier,
355 |                     headers=self.headers,
356 |                     limit=self.limit,
357 |                     text_match=text_match,
358 |                     **kwargs,
359 |                 )
360 |             else:
361 |                 # For subreddits, use search_comments
362 |                 return self.api.search_comments(
363 |                     query=text_match, subreddit=kwargs.get("subreddit"), limit=self.limit
364 |                 )
365 | 
366 |         # Default comment fetching
367 |         if source_type == "listing":
368 |             # Split subreddit/article for listing type
369 |             subreddit = identifier.split("/")[0]
370 |             article = identifier.split("/")[1]
371 |             return fetch_method(
372 |                 subreddit, article, headers=self.headers, limit=self.limit, **kwargs
373 |             )
374 |         else:
375 |             return fetch_method(identifier, headers=self.headers, limit=self.limit, **kwargs)
376 | 
377 |     @with_logging(logger)
378 |     def _run_analysis_flow(
379 |         self, comments: List[Dict[str, Any]]
380 |     ) -> Tuple[float, List[AnalysisResult]]:
381 |         """Centralized analysis execution"""
382 |         logger.debug_with_context("Starting analysis flow")
383 |         logger.debug_with_context(f"Processing {len(comments)} comments")
384 |         logger.debug_with_context(f"LLM Detector status: {'Initialized' if self.llm_detector else 'Not initialized'}")
385 |         
386 |         try:
387 |             loop = asyncio.get_running_loop()
388 |             logger.debug_with_context("Using existing event loop")
389 |             # If we have a running loop, use it
390 |             future = asyncio.ensure_future(self._analyze(comments), loop=loop)
391 |             result = loop.run_until_complete(future)
392 |             logger.info_with_context("Analysis completed")
393 |             return result
394 |         except RuntimeError:
395 |             # No running event loop, create a new one
396 |             logger.debug_with_context("No running loop found, creating new one")
397 |             return asyncio.run(self._analyze(comments))
398 | 
399 |     @with_logging(logger)
400 |     def get_sentiment(
401 |         self, source_type: str, identifier: str, output_file: Optional[str] = None, **kwargs
402 |     ) -> None:
403 |         """Unified sentiment analysis entry point"""
404 |         logger.debug_with_context(
405 |             f"get_sentiment called with source_type={source_type}, identifier={identifier}"
406 |         )
407 |         comments = self._get_comments(source_type, identifier, **kwargs)
408 |         self.score, self.results = self._run_analysis_flow(comments)
409 |         self.sentiment = self._get_sentiment(self.score)
410 |         if output_file:
411 |             self.formatter.generate_output_file(
412 |                 output_file, comments, identifier, self.results, self.score, self.sentiment
413 |             )
414 |         else:
415 |             self.formatter.print_comments(
416 |                 comments, identifier, self.results, self.score, self.sentiment, self.api # Pass self.api
417 |             )
418 | 


--------------------------------------------------------------------------------
/reddacted/styles.py:
--------------------------------------------------------------------------------
  1 | """Centralized styling configuration for the application."""
  2 | 
  3 | from textual.color import Color
  4 | 
  5 | # Textual CSS
  6 | TEXTUAL_CSS = """
  7 | Screen {
  8 |     background: #0e333d;  /* A custom dark blend derived from bg_0 (#103c48) */
  9 | }
 10 | 
 11 | Header {
 12 |     dock: top;
 13 |     background: #184956;
 14 |     color: #cad8d9;
 15 |     height: 3;
 16 |     content-align: center middle;
 17 |     border-bottom: heavy #58a3ff;
 18 | }
 19 | 
 20 | Footer {
 21 |     dock: bottom;
 22 |     background: #184956;
 23 |     color: #cad8d9;
 24 |     height: 1;
 25 | }
 26 | 
 27 | StatsDisplay {
 28 |     height: auto;
 29 |     padding: 1;
 30 |     background: #103c48;
 31 |     border: heavy #4695f7;
 32 |     margin: 1;
 33 | }
 34 | 
 35 | DataTable {
 36 |     height: auto;
 37 |     margin: 1;
 38 |     border: heavy #4695f7;
 39 | }
 40 | 
 41 | .stats-text {
 42 |     text-align: center;
 43 | }
 44 | 
 45 | CommentActionScreen {
 46 |     align: center middle;
 47 | }
 48 | 
 49 | /* Details Screen Styles */
 50 | DetailsScreen {
 51 |     background: #0e333d;
 52 |     layout: vertical;
 53 | }
 54 | 
 55 | .details-title {
 56 |     text-align: center;
 57 |     background: #184956;
 58 |     color: #cad8d9;
 59 |     padding: 1;
 60 |     text-style: bold;
 61 |     border-bottom: solid #58a3ff;
 62 |     width: 100%;
 63 |     box-sizing: border-box;
 64 | }
 65 | 
 66 | .details-scroll {
 67 |     width: 100%;
 68 |     height: 1fr;
 69 |     padding: 1 2;
 70 | }
 71 | 
 72 | /* Header Information */
 73 | .header-info {
 74 |     background: #103c48;
 75 |     border: solid #4695f7;
 76 |     padding: 1;
 77 |     height: auto;
 78 |     text-align: center;
 79 | }
 80 | 
 81 | .details-id {
 82 |     color: #cad8d9;
 83 | }
 84 | 
 85 | .details-risk-high {
 86 |     color: #ef4444;
 87 |     text-style: bold;
 88 | }
 89 | 
 90 | .details-risk-medium {
 91 |     color: #f59e0b;
 92 |     text-style: bold;
 93 | }
 94 | 
 95 | .details-risk-low {
 96 |     color: #22c55e;
 97 |     text-style: bold;
 98 | }
 99 | 
100 | .details-sentiment {
101 |     color: #cad8d9;
102 | }
103 | 
104 | .details-votes {
105 |     color: #cad8d9;
106 | }
107 | 
108 | .section-header {
109 |     background: #184956;
110 |     color: #cad8d9;
111 |     padding: 0 1;
112 |     margin-top: 1;
113 |     text-style: bold;
114 | }
115 | 
116 | .subsection-header {
117 |     color: #cad8d9;
118 |     text-style: italic;
119 |     margin-top: 1;
120 | }
121 | 
122 | .details-text {
123 |     padding: 1;
124 |     background: #103c48;
125 |     border: solid #4695f7;
126 | }
127 | 
128 | .pii-matches-container, .llm-findings-container, .llm-stats {
129 |     padding: 1;
130 |     background: #103c48;
131 |     border: solid #4695f7;
132 | }
133 | 
134 | .details-pii-item, .details-llm-item {
135 |     padding-left: 1;
136 |     color: #cad8d9;
137 |     border: solid #4695f7;
138 | }
139 | 
140 | .details-reasoning {
141 |     padding: 1;
142 |     background: #103c48;
143 |     border: solid #4695f7;
144 | }
145 | 
146 | .details-llm-risk {
147 |     padding-left: 1;
148 |     background: #103c48;
149 |     color: #cad8d9;
150 | }
151 | 
152 | .details-has-pii-yes {
153 |     padding-left: 1;
154 |     color: #ef4444;
155 |     text-style: bold;
156 | }
157 | 
158 | .details-has-pii-no {
159 |     padding-left: 1;
160 |     color: #22c55e;
161 | }
162 | 
163 | .details-actions {
164 |     dock: bottom;
165 |     height: auto;
166 |     width: 100%;
167 |     padding: 1;
168 |     background: #184956;
169 |     border-top: solid #58a3ff;
170 |     align: center middle;
171 | }
172 | 
173 | .details-actions Button {
174 |     margin: 0 1;
175 |     min-width: 16;
176 | }
177 | """
178 | 
179 | # Color System
180 | COLORS = {
181 |     "primary": Color.parse("#6366f1"),  # Indigo
182 |     "secondary": Color.parse("#a855f7"),  # Purple
183 |     "success": Color.parse("#22c55e"),  # Green
184 |     "warning": Color.parse("#f59e0b"),  # Amber
185 |     "error": Color.parse("#ef4444"),  # Red
186 |     "surface": Color.parse("#1e293b"),  # Slate
187 |     "background": Color.parse("#0f172a"),  # Dark slate
188 |     "text": Color.parse("#f8fafc"),  # Light slate
189 |     "muted": Color.parse("#64748b"),  # Medium slate
190 | }
191 | 
192 | # Typography
193 | TYPOGRAPHY = {
194 |     "h1": ("24px", "bold"),
195 |     "h2": ("20px", "bold"),
196 |     "h3": ("16px", "bold"),
197 |     "body": ("14px", "normal"),
198 |     "small": ("12px", "normal"),
199 | }
200 | 
201 | # Spacing Scale
202 | SPACING = {"xs": 4, "sm": 8, "md": 16, "lg": 24, "xl": 32}
203 | 
204 | # Component-specific styles
205 | TABLE_STYLES = {
206 |     "header_style": "bold magenta",
207 |     "border": "rounded",
208 |     "padding": (0, 1),
209 |     "collapse_padding": True,
210 | }
211 | 
212 | PANEL_STYLES = {"border_style": "blue", "padding": (1, 1)}
213 | 
214 | 
215 | # Risk level styles
216 | def get_risk_style(score: float) -> str:
217 |     """Get appropriate color style based on risk score."""
218 |     if score > 0.5:
219 |         return "error"
220 |     elif score > 0.2:
221 |         return "warning"
222 |     return "success"
223 | 
224 | 
225 | # Status styles
226 | def get_status_style(enabled: bool) -> str:
227 |     """Get appropriate color style based on status."""
228 |     return "success" if enabled else "error"
229 | 


--------------------------------------------------------------------------------
/reddacted/textual_ui.py:
--------------------------------------------------------------------------------
  1 | from reddacted.api.reddit import Reddit
  2 | from textual.app import App, ComposeResult
  3 | from textual.containers import Container
  4 | from textual.widgets import DataTable, Header, Footer, Static
  5 | from textual.binding import Binding
  6 | from textual import message
  7 | 
  8 | from rich.text import Text
  9 | from typing import List, Optional
 10 | import uuid  # Added for random UUID generation
 11 | 
 12 | from reddacted.utils.analysis import AnalysisResult
 13 | from reddacted.ui.comment_actions import CommentActionScreen
 14 | from reddacted.ui.details_screen import DetailsScreen
 15 | from reddacted.styles import TEXTUAL_CSS
 16 | 
 17 | 
 18 | class ResultsSummary(DataTable):
 19 |     """An interactive data table showing analysis results."""
 20 | 
 21 |     def __init__(self, results: List[AnalysisResult]):
 22 |         super().__init__()
 23 |         self.results = results
 24 | 
 25 |     def on_mount(self) -> None:
 26 |         """Set up the table when mounted."""
 27 |         # Add columns
 28 |         self.add_columns("Risk", "Sentiment", "Comment Preview", "Votes", "ID")
 29 | 
 30 |         # Add rows from results
 31 |         for result in self.results:
 32 |             # Format risk score with color based on value
 33 |             risk_score = f"{result.pii_risk_score:.0%}"
 34 |             risk_style = (
 35 |                 "red"
 36 |                 if result.pii_risk_score > 0.7
 37 |                 else "yellow" if result.pii_risk_score > 0.4 else "green"
 38 |             )
 39 |             risk_cell = Text(risk_score, style=risk_style)
 40 | 
 41 |             # Format sentiment with emoji
 42 |             sentiment = Text(f"{result.sentiment_emoji} {result.sentiment_score:.2f}")
 43 | 
 44 |             # Format comment preview with link
 45 |             preview = (result.text[:67] + "...") if len(result.text) > 70 else result.text
 46 |             preview_cell = Text(preview, style="link blue")
 47 | 
 48 |             # Format votes
 49 |             vote_style = (
 50 |                 "green"
 51 |                 if result.upvotes > result.downvotes
 52 |                 else "red" if result.downvotes > result.upvotes else "dim"
 53 |             )
 54 |             vote_display = Text(
 55 |                 (
 56 |                     f"⬆️ {result.upvotes:>3}"
 57 |                     if result.upvotes > result.downvotes
 58 |                     else f"⬇️ {result.downvotes:>3}" if result.downvotes > result.upvotes else "0"
 59 |                 ),
 60 |                 style=vote_style,
 61 |             )
 62 | 
 63 |             self.add_row(risk_cell, sentiment, preview_cell, vote_display, result.comment_id)
 64 | 
 65 |     def on_data_table_row_selected(self) -> None:
 66 |         """Handle row selection by mouse click."""
 67 |         # Trigger the view details action in the parent application
 68 |         if self.cursor_row is not None:
 69 |             self.app.action_view_details()
 70 | 
 71 |     def on_data_table_cell_selected(self) -> None:
 72 |         """Handle cell selection."""
 73 |         if self.cursor_row is not None:
 74 |             self.app.action_view_details()
 75 | 
 76 | 
 77 | class StatsDisplay(Static):
 78 |     """Displays overall statistics."""
 79 | 
 80 |     def __init__(self, url: str, comment_count: int, overall_score: float, overall_sentiment: str):
 81 |         super().__init__()
 82 |         self.url = url
 83 |         self.comment_count = comment_count
 84 |         self.overall_score = overall_score
 85 |         self.overall_sentiment = overall_sentiment
 86 | 
 87 |     def compose(self) -> ComposeResult:
 88 |         stat1 = Static(f"📊 Analysis Results for: {self.url}")
 89 |         stat2 = Static(f"💬 Total Comments: {self.comment_count}")
 90 |         stat3 = Static(f"📈 Overall Score: {self.overall_score:.2f}")
 91 |         stat4 = Static(f"🎭 Overall Sentiment: {self.overall_sentiment}")
 92 | 
 93 |         # Add stats-text class to all stats
 94 |         for stat in [stat1, stat2, stat3, stat4]:
 95 |             stat.add_class("stats-text")
 96 |             yield stat
 97 | 
 98 | 
 99 | class TextualResultsView(App):
100 |     """Main Textual app for displaying analysis results."""
101 | 
102 |     CSS = TEXTUAL_CSS
103 |     title = "reddacted"
104 | 
105 |     BINDINGS = [
106 |         Binding("q", "quit", "Quit", show=True),
107 |         Binding("enter", "view_details", "View Details", show=True),
108 |         Binding("e", "edit_comment", "Edit Comment", show=True),
109 |         Binding("d", "delete_comment", "Delete Comment", show=True),
110 |     ]
111 | 
112 |     def on_mount(self) -> None:
113 |         self.title = "reddacted"  # This controls the main header title
114 |         self.sub_title = "Analysis Results"  # Optional: Controls the subtitle
115 | 
116 |     def _get_selected_comment_id(self) -> Optional[str]:
117 |         """Get the comment ID of the currently selected row."""
118 |         table = self.query_one(ResultsSummary)
119 |         if table.cursor_row is not None:
120 |             return table.get_row_at(table.cursor_row)[-1]
121 |         return None
122 | 
123 |     def action_edit_comment(self) -> None:
124 |         """Handle editing the selected comment."""
125 |         if comment_id := self._get_selected_comment_id():
126 |             self.push_screen(CommentActionScreen(comment_id, "edit", self.reddit_api, self.use_random_string))
127 | 
128 |     def action_delete_comment(self) -> None:
129 |         """Handle deleting the selected comment."""
130 |         if comment_id := self._get_selected_comment_id():
131 |             self.push_screen(CommentActionScreen(comment_id, "delete", self.reddit_api, self.use_random_string))
132 | 
133 |     def on_action_completed(self, event: message.Message) -> None:
134 |         """Handle completion of comment actions."""
135 |         table = self.query_one(ResultsSummary)
136 | 
137 |         # Find the row index and result for the affected comment
138 |         for i, r in enumerate(self.results):
139 |             if r.comment_id == event.comment_id:
140 |                 if event.action == "delete":
141 |                     # Remove from table and results
142 |                     table.remove_row(i)
143 |                     self.results.pop(i)
144 |                 elif event.action == "edit":
145 |                     # Update the result text based on random string status
146 |                     if hasattr(event, "use_random_string") and event.use_random_string:
147 |                         r.text = f"[Random UUID: {uuid.uuid4()}]"
148 |                         # Update cell in table
149 |                         table.update_cell(i, 2, Text(r.text, style="link blue"))
150 |                     else:
151 |                         # Standard message
152 |                         r.text = "This comment has been reddacted to preserve online privacy - see r/reddacted for more info"
153 |                         # Update cell in table with shortened version for display
154 |                         table.update_cell(i, 2, Text("r/reddacted", style="link blue"))
155 |                 break
156 | 
157 |     def __init__(
158 |         self,
159 |         url: str,
160 |         comments: List[dict],
161 |         results: List[AnalysisResult],
162 |         overall_score: float,
163 |         overall_sentiment: str,
164 |         reddit_api: 'Reddit', # Added reddit_api
165 |         use_random_string: bool = False,
166 |     ):
167 |         super().__init__()
168 |         self.url = url
169 |         self.comments = comments
170 |         self.results = results
171 |         self.overall_score = overall_score
172 |         self.overall_sentiment = overall_sentiment
173 |         self.reddit_api = reddit_api # Store the api instance
174 |         self.use_random_string = use_random_string
175 | 
176 |     def compose(self) -> ComposeResult:
177 |         """Create child widgets for the app."""
178 |         yield Header(show_clock=True)
179 |         yield Container(
180 |             StatsDisplay(self.url, len(self.comments), self.overall_score, self.overall_sentiment),
181 |             ResultsSummary(self.results),
182 |         )
183 |         yield Footer()
184 | 
185 |     def action_view_details(self) -> None:
186 |         """Handle viewing details of selected row."""
187 |         if comment_id := self._get_selected_comment_id():
188 |             result = next((r for r in self.results if r.comment_id == comment_id), None)
189 |             if result:
190 |                 self.push_screen(DetailsScreen(result, self.reddit_api, self.use_random_string)) # Pass reddit_api
191 |             else:
192 |                 self.notify(f"No result found for comment ID: {comment_id}")
193 |         else:
194 |             self.notify("No comment ID found")
195 | 
196 | 
197 | def show_results(
198 |     url: str,
199 |     comments: List[dict],
200 |     results: List[AnalysisResult],
201 |     overall_score: float,
202 |     overall_sentiment: str,
203 |     reddit_api: 'Reddit', # Added reddit_api
204 |     use_random_string: bool = False,
205 | ) -> None:
206 |     """Display results using the Textual UI."""
207 |     app = TextualResultsView(
208 |         url=url,
209 |         comments=comments, # Pass original comments list
210 |         results=results,
211 |         overall_score=overall_score,
212 |         overall_sentiment=overall_sentiment,
213 |         reddit_api=reddit_api, # Pass reddit_api
214 |         use_random_string=use_random_string,
215 |     )
216 |     app.run()
217 | 


--------------------------------------------------------------------------------
/reddacted/ui/__init__.py:
--------------------------------------------------------------------------------
1 | """UI components for the reddacted package."""
2 | 
3 | from .details_screen import *


--------------------------------------------------------------------------------
/reddacted/ui/comment_actions.py:
--------------------------------------------------------------------------------
  1 | from textual.app import ComposeResult
  2 | from textual.screen import Screen
  3 | from textual.containers import Center, Vertical
  4 | from textual.widgets import Button, Static, Label
  5 | from textual.binding import Binding
  6 | from textual import message
  7 | 
  8 | from reddacted.api.reddit import Reddit
  9 | from reddacted.styles import TEXTUAL_CSS
 10 | 
 11 | 
 12 | class CommentActionScreen(Screen):
 13 |     """Screen for confirming and executing comment actions."""
 14 | 
 15 |     BINDINGS = [
 16 |         Binding("escape", "cancel", "Cancel", show=True),
 17 |     ]
 18 | 
 19 |     def __init__(self, comment_id: str, action: str, reddit_api: Reddit, use_random_string: bool = False):
 20 |         """Initialize the action screen.
 21 | 
 22 |         Args:
 23 |             comment_id: The ID of the comment to act on
 24 |             action: Either 'edit' or 'delete'
 25 |             reddit_api: The authenticated Reddit API instance.
 26 |             use_random_string: Whether to use a random UUID instead of standard message
 27 |         """
 28 |         super().__init__()
 29 |         self.comment_id = comment_id
 30 |         self.action = action
 31 |         self.use_random_string = use_random_string # Keep this for logic within the screen
 32 |         self.api = reddit_api # Use the passed authenticated instance
 33 | 
 34 |     def compose(self) -> ComposeResult:
 35 |         """Create child widgets."""
 36 |         action_text = "edit" if self.action == "edit" else "delete"
 37 |         # Show Reddit API status
 38 |         api_status = "Initialized" if self.api is not None else "Not Initialized"
 39 | 
 40 |         # Show random string status
 41 |         random_status = "Using random UUID" if self.use_random_string else "Using standard message"
 42 | 
 43 |         with Vertical():
 44 |             with Center():
 45 |                 yield Label(f"Are you sure you want to {action_text} comment {self.comment_id}?")
 46 |                 yield Label(f"Reddit API: {api_status}", classes="header-info", markup=False)
 47 |                 yield Label(f"{random_status}", classes="header-info", markup=False)
 48 |                 yield Button("Confirm", variant="error", id="confirm")
 49 |                 yield Button("Cancel", variant="primary", id="cancel")
 50 |             yield Static("", id="status")
 51 | 
 52 |     def on_button_pressed(self, event: Button.Pressed) -> None:
 53 |         """Handle button presses."""
 54 |         if event.button.id == "cancel":
 55 |             self.app.pop_screen()
 56 |         elif event.button.id == "confirm":
 57 |             self._execute_action()
 58 | 
 59 |     def action_cancel(self) -> None:
 60 |         """Handle escape key."""
 61 |         self.app.pop_screen()
 62 | 
 63 |     def _execute_action(self) -> None:
 64 |         """Execute the requested action."""
 65 |         status = self.query_one("#status", Static)
 66 |         try:
 67 |             if self.action == "edit":
 68 |                 result = self.api.update_comments(
 69 |                     [self.comment_id],
 70 |                     use_random_string=self.use_random_string
 71 |                 )
 72 |                 action_text = "edited"
 73 |             else:  # delete
 74 |                 result = self.api.delete_comments([self.comment_id])
 75 |                 action_text = "deleted"
 76 | 
 77 |             if result["success"] > 0:
 78 |                 # Notify parent to refresh
 79 |                 self.app.post_message(self.ActionCompleted(
 80 |                     self.comment_id,
 81 |                     self.action,
 82 |                     use_random_string=self.use_random_string
 83 |                 ))
 84 | 
 85 |                 # Close the screen after a short delay to show success
 86 |                 def close_screen():
 87 |                     self.app.pop_screen()
 88 | 
 89 |                 self.set_timer(0.5, close_screen)
 90 |                 status.update(f"✅ Successfully {action_text} comment")
 91 |             else:
 92 |                 status.update(f"❌ Failed to {self.action} comment")
 93 |         except Exception as e:
 94 |             status.update(f"❌ Error: {str(e)}")
 95 | 
 96 |     class ActionCompleted(message.Message):
 97 |         """Message sent when action is completed successfully."""
 98 | 
 99 |         def __init__(self, comment_id: str, action: str, use_random_string: bool = False):
100 |             self.comment_id = comment_id
101 |             self.action = action
102 |             self.use_random_string = use_random_string
103 |             super().__init__()
104 | 
105 |         @property
106 |         def message_type(self) -> str:
107 |             return "action_completed"
108 | 


--------------------------------------------------------------------------------
/reddacted/ui/details_screen.py:
--------------------------------------------------------------------------------
  1 | from reddacted.api.reddit import Reddit
  2 | from textual.app import ComposeResult
  3 | from textual.screen import Screen
  4 | from textual.containers import Vertical, ScrollableContainer, Horizontal
  5 | from textual.widgets import Static, Label, Markdown, Button
  6 | from textual.binding import Binding
  7 | from textual import message
  8 | 
  9 | from typing import List, Dict, Any, Optional
 10 | 
 11 | from reddacted.utils.report import format_llm_detail
 12 | from reddacted.ui.comment_actions import CommentActionScreen
 13 | 
 14 | 
 15 | class DetailsScreen(Screen):
 16 |     """Screen for displaying detailed PII analysis for a comment."""
 17 | 
 18 |     BINDINGS = [
 19 |         Binding("escape", "go_back", "Return to Results", show=True),
 20 |         Binding("b", "go_back", "Back", show=True),
 21 |         Binding("e", "edit_comment", "Edit Comment", show=True),
 22 |         Binding("d", "delete_comment", "Delete Comment", show=True),
 23 |     ]
 24 | 
 25 |     def __init__(self, result, reddit_api: 'Reddit', use_random_string=False):
 26 |         """Initialize the details screen.
 27 | 
 28 |         Args:
 29 |             result: The AnalysisResult object containing the comment data
 30 |             reddit_api: The authenticated Reddit API instance.
 31 |             use_random_string: Whether to use random UUIDs instead of standard message
 32 |         """
 33 |         super().__init__()
 34 |         self.result = result
 35 |         self.reddit_api = reddit_api
 36 |         self.use_random_string = use_random_string
 37 | 
 38 |     def compose(self) -> ComposeResult:
 39 |         """Create child widgets for the screen."""
 40 |         yield Label("Comment Details", classes="details-title")
 41 | 
 42 |         with ScrollableContainer(classes="details-scroll"):
 43 |             # Header information section
 44 |             with Vertical(classes="header-info"):
 45 |                 # Result ID
 46 |                 yield Static(f"ID: {self.result.comment_id}", classes="details-id")
 47 | 
 48 |                 # Risk score with appropriate coloring
 49 |                 risk_class = self._get_risk_class()
 50 |                 yield Static(
 51 |                     f"Risk Score: {self.result.pii_risk_score:.0%}",
 52 |                     classes=f"details-risk-{risk_class}",
 53 |                 )
 54 | 
 55 |                 yield Static(
 56 |                     f"Sentiment: {self.result.sentiment_emoji} {self.result.sentiment_score:.2f}",
 57 |                     classes="details-sentiment",
 58 |                 )
 59 |                 yield Static(
 60 |                     f"Votes: ⬆️ {self.result.upvotes} ⬇️ {self.result.downvotes}",
 61 |                     classes="details-votes",
 62 |                 )
 63 | 
 64 |             # Comment text section
 65 |             yield Label("Comment Text", classes="section-header")
 66 |             yield Markdown(self.result.text, classes="details-text")
 67 | 
 68 |             # Pattern-based PII section
 69 |             if self.result.pii_matches:
 70 |                 yield Label("Pattern-based PII Detected", classes="section-header")
 71 |                 with Vertical(classes="pii-matches-container"):
 72 |                     for pii in self.result.pii_matches:
 73 |                         yield Static(
 74 |                             f"• {pii.type} (confidence: {pii.confidence:.2f})",
 75 |                             classes="details-pii-item",
 76 |                         )
 77 | 
 78 |             # LLM analysis section
 79 |             if self.result.llm_findings:
 80 |                 yield Label("LLM Privacy Analysis", classes="section-header")
 81 |                 yield Static(
 82 |                     f"Risk Score: {self.result.llm_risk_score:.2f}",
 83 |                     classes="details-llm-risk",
 84 |                 )
 85 |                 findings = self.result.llm_findings
 86 |                 has_pii = findings.get("has_pii", False)
 87 |                 yield Static(
 88 |                     f"PII Detected: {'Yes' if has_pii else 'No'}",
 89 |                     classes=f"details-has-pii-{'yes' if has_pii else 'no'}",
 90 |                 )
 91 |                 if isinstance(findings, dict):
 92 |                     if details_raw := findings.get("details"):
 93 |                         yield Label("Findings:", classes="subsection-header")
 94 |                         # Handle case where details might be a string instead of a list
 95 |                         details_list = []
 96 |                         if isinstance(details_raw, str):
 97 |                             # Split string by newlines and remove empty lines
 98 |                             details_list = [d.strip() for d in details_raw.split('\n') if d.strip()]
 99 |                         elif isinstance(details_raw, list):
100 |                             details_list = details_raw # Assume it's the correct list format
101 |                         else:
102 |                             # Log or handle unexpected type if necessary
103 |                             self.app.notify(f"Unexpected type for LLM findings details: {type(details_raw)}", severity="warning", title="LLM Data Warning")
104 | 
105 |                         for detail in details_list:
106 |                             formatted_detail = format_llm_detail(detail, self.app)
107 |                             yield Static(
108 |                                 "• " + formatted_detail,
109 |                                 classes="details-llm-item"
110 |                             )
111 |                     if reasoning := findings.get("reasoning"):
112 |                         yield Label("Reasoning:", classes="subsection-header")
113 |                         yield Markdown(reasoning, classes="details-reasoning")
114 | 
115 |         # Action buttons at the bottom
116 |         with Horizontal(classes="details-actions"):
117 |             yield Button("Back", variant="default", id="back-btn")
118 |             yield Button("Reddact Comment", variant="primary", id="edit-btn")
119 |             yield Button("Delete Comment", variant="error", id="delete-btn")
120 | 
121 |     def _get_risk_class(self) -> str:
122 |         """Get risk class based on PII risk score."""
123 |         if self.result.pii_risk_score > 0.7:
124 |             return "high"
125 |         elif self.result.pii_risk_score > 0.4:
126 |             return "medium"
127 |         else:
128 |             return "low"
129 | 
130 |     def on_button_pressed(self, event: Button.Pressed) -> None:
131 |         """Handle button press events."""
132 |         button_id = event.button.id
133 |         if button_id == "back-btn":
134 |             self.action_go_back()
135 |         elif button_id == "edit-btn":
136 |             self.action_edit_comment()
137 |         elif button_id == "delete-btn":
138 |             self.action_delete_comment()
139 | 
140 |     def on_comment_action_screen_action_completed(self, event: message.Message) -> None:
141 |         """Handle action_completed events from CommentActionScreen."""
142 |         if event.action == "edit":
143 |             # Include random string status in notification if available
144 |             random_status = " with random UUID" if hasattr(event, "use_random_string") and event.use_random_string else ""
145 |             self.app.notify(f"Comment {self.result.comment_id} successfully edited{random_status}")
146 |         else:
147 |             self.app.notify(f"Comment {self.result.comment_id} successfully deleted")
148 | 
149 |         # Return to main screen by popping twice (action screen + details screen)
150 |         self.app.pop_screen()  # Remove CommentActionScreen
151 |         self.app.pop_screen()  # Remove DetailsScreen
152 | 
153 | 
154 |     def action_edit_comment(self) -> None:
155 |         """Handle editing the current comment."""
156 |         self.app.push_screen(CommentActionScreen(self.result.comment_id, "edit", self.reddit_api, self.use_random_string)) # Pass reddit_api
157 | 
158 |     def action_delete_comment(self) -> None:
159 |         """Handle deleting the current comment."""
160 |         self.app.push_screen(CommentActionScreen(self.result.comment_id, "delete", self.reddit_api, self.use_random_string)) # Pass reddit_api
161 | 
162 |     def action_go_back(self) -> None:
163 |         """Return to the results screen."""
164 |         self.app.pop_screen()
165 | 
166 |     class DetailActionComplete(message.Message):
167 |         """Message sent when returning to main screen."""
168 | 
169 |         def __init__(self, comment_id: str, action: str = None):
170 |             self.comment_id = comment_id
171 |             self.action = action
172 |             super().__init__()
173 | 


--------------------------------------------------------------------------------
/reddacted/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility functions and helpers for the reddacted package
3 | """
4 | 


--------------------------------------------------------------------------------
/reddacted/utils/analysis.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Dict, Any, Optional
 3 | 
 4 | 
 5 | @dataclass
 6 | class AnalysisResult:
 7 |     """Holds the results of both sentiment and PII analysis."""
 8 | 
 9 |     comment_id: str
10 |     sentiment_score: float
11 |     sentiment_emoji: str
12 |     pii_risk_score: float
13 |     pii_matches: List[Any]
14 |     permalink: str
15 |     text: str
16 |     upvotes: int = 0
17 |     downvotes: int = 0
18 |     llm_risk_score: float = 0.0
19 |     llm_findings: Optional[Dict[str, Any]] = None
20 | 


--------------------------------------------------------------------------------
/reddacted/utils/base.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict, Any, Optional
 2 | from rich.text import Text
 3 | 
 4 | from reddacted.utils.log_handler import get_logger, with_logging
 5 | 
 6 | 
 7 | class BaseFormatter:
 8 |     """Base class for formatters with shared utilities."""
 9 | 
10 |     def __init__(self):
11 |         # Logger not currently used in this base class
12 |         pass
13 | 
14 |     def _get_risk_style(self, score: float) -> str:
15 |         """Determines text style based on risk score."""
16 |         if score > 0.5:
17 |             return "red"
18 |         elif score > 0.2:
19 |             return "yellow"
20 |         else:
21 |             return "green"
22 | 
23 |     def _format_status(
24 |         self, enabled: bool, true_text: str = "Enabled", false_text: str = "Disabled"
25 |     ) -> Text:
26 |         """Formats a status text based on a boolean value."""
27 |         return Text(true_text if enabled else false_text, style="green" if enabled else "red")
28 | 


--------------------------------------------------------------------------------
/reddacted/utils/log_handler.py:
--------------------------------------------------------------------------------
  1 | # reddacted/utils/log_handler.py
  2 | import logging
  3 | import sys
  4 | import inspect
  5 | import traceback
  6 | from functools import wraps
  7 | from typing import Callable, Any, Optional, Union, Dict, TypeVar, Type
  8 | 
  9 | from rich.console import Console
 10 | from rich.panel import Panel
 11 | from rich.text import Text
 12 | from rich.traceback import Traceback
 13 | 
 14 | # --- Type Definitions ---
 15 | T = TypeVar("T")
 16 | LoggerType = logging.Logger
 17 | LogLevel = Union[int, str]
 18 | 
 19 | # --- Globals ---
 20 | console = Console()
 21 | 
 22 | # === Logging Setup and Configuration ===
 23 | 
 24 | def setup_logging(initial_level: LogLevel = logging.INFO) -> None:
 25 |     """Configure root logger with file and console handlers."""
 26 |     root_logger = logging.getLogger()
 27 |     # Set root to DEBUG to capture everything, handlers control output level
 28 |     root_logger.setLevel(logging.DEBUG)
 29 | 
 30 |     # Prevent duplicate handlers if called multiple times
 31 |     if root_logger.hasHandlers():
 32 |         root_logger.handlers.clear()
 33 | 
 34 |     formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s:%(lineno)d - %(message)s')
 35 | 
 36 |     # File Handler (writes to reddacted.log in current directory)
 37 |     try:
 38 |         file_handler = logging.FileHandler('reddacted.log', mode='a')
 39 |         file_handler.setLevel(initial_level) # Set initial level
 40 |         file_handler.setFormatter(formatter)
 41 |         root_logger.addHandler(file_handler)
 42 |     except Exception as e:
 43 |         # Fallback or notify if file logging fails
 44 |         sys.stderr.write(f"Error setting up file logger: {e}\n")
 45 | 
 46 |     # Console Handler (stderr)
 47 |     console_handler = logging.StreamHandler(sys.stderr)
 48 |     # Console handler is INFO unless global level is DEBUG
 49 |     console_level = logging.INFO if initial_level != logging.DEBUG else logging.DEBUG
 50 |     console_handler.setLevel(console_level)
 51 |     console_handler.setFormatter(formatter)
 52 |     root_logger.addHandler(console_handler)
 53 | 
 54 |     # Set initial level for httpx (less noisy)
 55 |     logging.getLogger("httpx").setLevel(logging.WARNING if initial_level > logging.DEBUG else logging.DEBUG)
 56 | 
 57 | 
 58 | def set_global_logging_level(level: LogLevel) -> None:
 59 |     """Set the global logging level for root logger and handlers.
 60 | 
 61 |     Args:
 62 |         level: The logging level to set globally. Can be an integer level or string name.
 63 | 
 64 |     Note:
 65 |         This affects all existing loggers in the hierarchy.
 66 |         Some third-party loggers may be set to specific levels for noise reduction.
 67 |     """
 68 |     root_logger = logging.getLogger()
 69 |     root_logger.setLevel(level) # Set root level first
 70 | 
 71 |     # Adjust handler levels
 72 |     for handler in root_logger.handlers:
 73 |         if isinstance(handler, logging.FileHandler):
 74 |             handler.setLevel(level) # File handler matches global level
 75 |         elif isinstance(handler, logging.StreamHandler):
 76 |             # Set console handler level directly to the requested global level
 77 |             handler.setLevel(level)
 78 | 
 79 |     # Adjust specific noisy loggers
 80 |     httpx_level = logging.WARNING if level > logging.DEBUG else logging.DEBUG
 81 |     logging.getLogger("httpx").setLevel(httpx_level)
 82 | 
 83 | 
 84 | # === Logger Retrieval and Contextual Logging ===
 85 | 
 86 | def get_logger(name: str) -> LoggerType:
 87 |     """Get or create a logger with consistent formatting and contextual logging methods.
 88 | 
 89 |     Args:
 90 |         name: The name of the logger, typically __name__
 91 | 
 92 |     Returns:
 93 |         A Logger instance with additional contextual logging methods
 94 | 
 95 |     Example:
 96 |         >>> logger = get_logger(__name__) # Level is now controlled globally
 97 |         >>> logger.info_with_context("Starting process")
 98 |     """
 99 |     logger = logging.getLogger(name)
100 |     # Level is inherited from root logger and its handlers
101 | 
102 |     # Check if methods already exist to avoid adding them multiple times
103 |     if not hasattr(logger, "debug_with_context"):
104 |         def make_log_method(log_level: int) -> Callable[[str, Optional[Callable]], None]:
105 |             def log_method(msg: str, func: Optional[Callable] = None) -> None:
106 |                 log_with_context(logger, log_level, msg, func)
107 | 
108 |             return log_method
109 | 
110 |         # Add typed convenience methods
111 |         setattr(logger, "debug_with_context", make_log_method(logging.DEBUG))
112 |         setattr(logger, "info_with_context", make_log_method(logging.INFO))
113 |         setattr(logger, "warning_with_context", make_log_method(logging.WARNING))
114 |         setattr(logger, "error_with_context", make_log_method(logging.ERROR))
115 |         setattr(logger, "critical_with_context", make_log_method(logging.CRITICAL))
116 | 
117 |     return logger
118 | 
119 | 
120 | def get_log_context(func: Callable[..., Any], frame: Optional[Any] = None) -> str:
121 |     """Get standardized logging context with file, function, and line number.
122 | 
123 |     Args:
124 |         func: The function from which the log was called
125 |         frame: Optional stack frame, will get caller's frame if None
126 | 
127 |     Returns:
128 |         A formatted string with module, function and line information
129 |     """
130 |     if frame is None:
131 |         frame = sys._getframe(2)  # Get caller's frame
132 |     return f"[{func.__module__}:{func.__name__}:{frame.f_lineno}]"
133 | 
134 | 
135 | def log_with_context(
136 |     logger: LoggerType, level: int, msg: str, func: Optional[Callable[..., Any]] = None
137 | ) -> None:
138 |     """Log a message with standardized context information.
139 | 
140 |     Args:
141 |         logger: The logger instance to use
142 |         level: The logging level for this message
143 |         msg: The message to log
144 |         func: Optional function to use for context, defaults to caller
145 | 
146 |     Note:
147 |         This function automatically adds context information including:
148 |         - Module name
149 |         - Function name
150 |         - Line number
151 |     """
152 |     frame = sys._getframe(1)
153 |     if func is None:
154 |         # Try to get caller's context if func is not provided
155 |         try:
156 |             frame = sys._getframe(2) # Go one level deeper to get the caller of log_with_context
157 |             func_name = frame.f_code.co_name
158 |             module_name = frame.f_globals.get("__name__", "unknown_module")
159 |             context = f"[{module_name}:{func_name}:{frame.f_lineno}]"
160 |         except (ValueError, AttributeError):
161 |              context = "[unknown_context]" # Fallback if frame inspection fails
162 |     else:
163 |         context = get_log_context(func, frame)
164 |     logger.log(level, f"{context} {msg}")
165 | 
166 | 
167 | # === Exception Handling and Formatting ===
168 | 
169 | def format_exception(exc: Exception, include_trace: bool = False) -> str:
170 |     """Format exception with source location and clean message
171 | 
172 |     Args:
173 |         exc: The exception to format
174 |         include_trace: Whether to include full traceback
175 | 
176 |     Returns:
177 |         Formatted error message with location and optional trace
178 | 
179 |     Raises:
180 |         TypeError: If exc is not an Exception instance
181 |     """
182 |     if not isinstance(exc, Exception):
183 |         raise TypeError("exc must be an Exception instance")
184 | 
185 |     # Get the exception chain
186 |     exc_chain = []
187 |     current = exc
188 |     while current:
189 |         exc_chain.append(current)
190 |         # Prefer __cause__ for explicit chaining, fallback to __context__
191 |         current = getattr(current, '__cause__', None) or getattr(current, '__context__', None)
192 |         # Avoid infinite loops with self-referential contexts
193 |         if current in exc_chain:
194 |             break
195 | 
196 | 
197 |     # Get traceback information
198 |     tb = getattr(exc, "__traceback__", None) or sys.exc_info()[2]
199 |     module_name = "unknown_module"
200 |     func_name = "unknown_function"
201 |     line_no = 0
202 | 
203 |     if tb:
204 |         # Find the deepest relevant frame in the traceback
205 |         relevant_tb = tb
206 |         while relevant_tb.tb_next:
207 |             relevant_tb = relevant_tb.tb_next
208 | 
209 |         try:
210 |             frame = relevant_tb.tb_frame
211 |             func_name = frame.f_code.co_name
212 |             line_no = relevant_tb.tb_lineno
213 |             module = inspect.getmodule(frame)
214 |             module_name = module.__name__ if module else "unknown_module"
215 |         except (AttributeError, ValueError):
216 |             # Fallback if frame access fails
217 |             pass # Keep defaults
218 | 
219 |     # Build the error message
220 |     messages = []
221 |     for i, e in enumerate(reversed(exc_chain)):
222 |         try:
223 |             error_type = e.__class__.__name__
224 |             error_msg = str(e)
225 |         except Exception:
226 |             error_type = "UnknownError"
227 |             error_msg = "Failed to format error message"
228 | 
229 |         if i == 0: # Original exception
230 |             messages.append(f"[bold red]{error_type}[/]: {error_msg}")
231 |         else: # Caused by / Context
232 |             messages.append(f"[dim]Caused by: {error_type}: {error_msg}[/]")
233 | 
234 |     location = f"[dim]Location: {module_name}.{func_name}(), line {line_no}[/]"
235 | 
236 |     if include_trace and tb is not None:
237 |         try:
238 |             # Use rich Traceback for pretty printing
239 |             rich_trace = Traceback.from_exception(
240 |                 exc_type=type(exc),
241 |                 exc_value=exc,
242 |                 traceback=tb,
243 |                 show_locals=False # Keep it concise by default
244 |             )
245 |             # Convert rich Traceback to string for return
246 |             trace_str = "\n".join(str(line) for line in console.render_lines(rich_trace))
247 |             return "\n".join(messages + [location, "", trace_str])
248 |         except Exception as format_err:
249 |             # Fallback if rich traceback formatting fails
250 |             fallback_trace = "".join(traceback.format_exception(type(exc), exc, tb))
251 |             return "\n".join(messages + [location, "", f"Failed to format traceback with Rich: {format_err}\n{fallback_trace}"])
252 | 
253 |     return "\n".join(messages + [location])
254 | 
255 | 
256 | def handle_exception(exc: Exception, context: Optional[str] = None, debug: bool = False) -> None:
257 |     """Logs and prints a formatted exception with optional context and debugging.
258 | 
259 |     Args:
260 |         exc: The exception to handle
261 |         context: Optional context about what was happening
262 |         debug: Whether to include full traceback in logs and output
263 |     """
264 |     # Use get_logger internally to ensure we have a logger instance
265 |     internal_logger = get_logger(__name__)
266 | 
267 |     # Log the full exception details for debugging purposes
268 |     # exc_info=True automatically includes traceback if available
269 |     log_context = f"Error in {context or 'unknown context'}"
270 |     internal_logger.error(log_context, exc_info=exc if debug else False)
271 | 
272 |     # Format the error message for console output
273 |     error_msg = format_exception(exc, include_trace=debug)
274 |     if context:
275 |         # Prepend the user-provided context to the formatted message
276 |         error_msg = f"[yellow]Context:[/yellow] {context}\n{error_msg}"
277 | 
278 |     # Print the formatted error to the console using Rich Panel
279 |     console.print(
280 |         Panel(Text.from_markup(error_msg), title="[bold red]Error[/]", border_style="red", expand=False)
281 |     )
282 | 
283 | 
284 | # === Decorator ===
285 | 
286 | def with_logging(logger: LoggerType) -> Callable[[Callable[..., T]], Callable[..., T]]:
287 |     """Decorator that adds logging context and exception handling to methods.
288 | 
289 |     Args:
290 |         logger: The logger instance to use
291 | 
292 |     Returns:
293 |         A decorator function that wraps the original function with logging
294 | 
295 |     Example:
296 |         >>> logger = get_logger(__name__)
297 |         >>> @with_logging(logger)
298 |         >>> def process_data(data: dict) -> None:
299 |         >>>     # Function implementation
300 |     """
301 | 
302 |     def decorator(func: Callable[..., T]) -> Callable[..., T]:
303 |         @wraps(func)
304 |         def wrapper(*args: Any, **kwargs: Any) -> T:
305 |             frame = sys._getframe(1) # Get caller's frame (wrapper's caller)
306 |             context_str = get_log_context(func, frame) # Use the original func for context
307 | 
308 |             try:
309 |                 logger.debug(f"{context_str} Starting {func.__name__}")
310 |                 result = func(*args, **kwargs)
311 |                 logger.debug(f"{context_str} Completed {func.__name__}")
312 |                 return result
313 |             except Exception as e:
314 |                 error_msg_context = f"Exception in {func.__name__}"
315 |                 # Call the local handle_exception function directly
316 |                 # Determine debug flag based on logger's effective level
317 |                 is_debug = logger.getEffectiveLevel() <= logging.DEBUG
318 |                 handle_exception(e, error_msg_context, debug=is_debug)
319 |                 raise # Re-raise the exception after handling
320 | 
321 |         return wrapper
322 | 
323 |     return decorator


--------------------------------------------------------------------------------
/reddacted/utils/panels.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import List, Dict, Any, Optional, Tuple
  3 | from itertools import zip_longest
  4 | 
  5 | from rich.panel import Panel
  6 | from rich.columns import Columns
  7 | from rich.console import Group
  8 | from rich.text import Text
  9 | from rich.table import Table
 10 | 
 11 | from reddacted.utils.analysis import AnalysisResult
 12 | from reddacted.utils.base import BaseFormatter
 13 | from reddacted.utils.log_handler import get_logger, with_logging
 14 | from reddacted.utils.tables import TableFormatter
 15 | from reddacted.utils.report import format_llm_detail
 16 | 
 17 | 
 18 | class PanelFormatter(BaseFormatter):
 19 |     """Handles creation and formatting of Rich panels for the Reddit comment analysis UI."""
 20 | 
 21 |     def __init__(self):
 22 |         super().__init__()
 23 |         # Logger not currently used in this class
 24 |         self.table_formatter = TableFormatter()
 25 | 
 26 |     def create_features_panel(
 27 |         self,
 28 |         auth_enabled: bool,
 29 |         pii_enabled: bool,
 30 |         llm_config: Optional[Dict[str, Any]],
 31 |         pii_only: bool,
 32 |         limit: int,
 33 |         sort: str,
 34 |         use_random_string: bool = False,
 35 |     ) -> Panel:
 36 |         """Creates a panel displaying the features configuration."""
 37 |         # Create a table with two columns
 38 |         features_table = Table(
 39 |             show_header=False, box=None, padding=(0, 2), collapse_padding=True, expand=True
 40 |         )
 41 |         features_table.add_column("Left", ratio=1, justify="left")
 42 |         features_table.add_column("Right", ratio=1, justify="left")
 43 | 
 44 |         # Define all config items
 45 |         config_items = [
 46 |             ("🔐 Authentication", self._format_status(auth_enabled)),
 47 |             ("🔍 PII Detection", self._format_status(pii_enabled)),
 48 |             (
 49 |                 "🤖 LLM Analysis",
 50 |                 (
 51 |                     Text(llm_config["model"], style="green") # Display model name if available
 52 |                     if llm_config and llm_config.get("model")
 53 |                     else Text("Not Selected", style="yellow") # Indicate if URL/Key provided but no model
 54 |                     if llm_config
 55 |                     else self._format_status(False) # Disabled if no LLM config at all
 56 |                 ),
 57 |             ),
 58 |             ("🎯 PII-Only Filter", self._format_status(pii_only, "Active", "Inactive")),
 59 |             ("🎲 Random String", self._format_status(use_random_string, "Enabled", "Disabled")),
 60 |             ("📊 Comment Limit", Text(f"{limit}" if limit else "Unlimited", style="cyan")),
 61 |             ("📑 Sort Preference", Text(f"{sort}" if sort else "New", style="cyan")),
 62 |         ]
 63 | 
 64 |         # Split items into two columns
 65 |         mid_point = (len(config_items) + 1) // 2
 66 |         left_items = config_items[:mid_point]
 67 |         right_items = config_items[mid_point:]
 68 | 
 69 |         # Create formatted text for each column
 70 |         for left, right in zip_longest(left_items, right_items, fillvalue=None):
 71 |             left_text = Text.assemble(f"{left[0]}: ", left[1]) if left else Text("")
 72 |             right_text = Text.assemble(f"{right[0]}: ", right[1]) if right else Text("")
 73 |             features_table.add_row(left_text, right_text)
 74 | 
 75 |         return Panel(
 76 |             features_table,
 77 |             title="[bold]Features[/]",
 78 |             border_style="blue",
 79 |             padding=(1, 1),
 80 |             expand=True,
 81 |         )
 82 | 
 83 |     def create_auth_panel(self, config: Dict[str, Any]) -> Panel:
 84 |         """Creates a panel displaying the authentication status and values based on config and environment."""
 85 |         auth_enabled = config.get("enable_auth", False)
 86 |         auth_texts = []
 87 | 
 88 |         # Determine status based on config first, then environment
 89 |         username_config = config.get("reddit_username")
 90 |         client_id_config = config.get("reddit_client_id")
 91 |         username_env = os.environ.get("REDDIT_USERNAME")
 92 |         client_id_env = os.environ.get("REDDIT_CLIENT_ID")
 93 | 
 94 |         # --- Username Status ---
 95 |         username_value = None
 96 |         username_style = "red"
 97 |         username_source = ""
 98 | 
 99 |         if auth_enabled and username_config:
100 |             username_value = username_config
101 |             username_style = "green"
102 |             username_source = " (Config)"
103 |         elif username_env:
104 |             username_value = username_env
105 |             username_style = "blue"
106 |             username_source = " (Env Var)"
107 | 
108 |         if username_value:
109 |              auth_texts.append(
110 |                  Text.assemble("REDDIT_USERNAME: ", (username_value, username_style), username_source)
111 |              )
112 |         else:
113 |              auth_texts.append(Text("REDDIT_USERNAME: Not Set", style="red"))
114 | 
115 | 
116 |         # --- Client ID Status ---
117 |         client_id_value = None
118 |         client_id_style = "red"
119 |         client_id_source = ""
120 | 
121 |         if auth_enabled and client_id_config:
122 |             client_id_value = client_id_config
123 |             client_id_style = "green"
124 |             client_id_source = " (Config)"
125 |         elif client_id_env:
126 |             client_id_value = client_id_env
127 |             client_id_style = "blue"
128 |             client_id_source = " (Env Var)"
129 | 
130 |         if client_id_value:
131 |              # Display only first/last few chars of client_id for brevity/security if desired
132 |              # display_client_id = f"{client_id_value[:4]}...{client_id_value[-4:]}" if len(client_id_value) > 8 else client_id_value
133 |              display_client_id = client_id_value # Show full ID for now
134 |              auth_texts.append(
135 |                  Text.assemble("REDDIT_CLIENT_ID: ", (display_client_id, client_id_style), client_id_source)
136 |              )
137 |         else:
138 |              auth_texts.append(Text("REDDIT_CLIENT_ID: Not Set", style="red"))
139 | 
140 |         # Note: We don't display password or secret for security
141 | 
142 |         return Panel(Group(*auth_texts), title="[bold]Auth Status[/]", border_style="yellow")
143 | 
144 |     def create_stats_panel(
145 |         self, url: str, total_comments: int, score: float, sentiment: str
146 |     ) -> Panel:
147 |         """Creates a panel displaying the sentiment analysis summary."""
148 |         # Create metrics table
149 |         metrics_table = Table(show_header=False, box=None, padding=(0, 2), collapse_padding=True)
150 |         metrics_table.add_column("Icon", justify="right", style="bold")
151 |         metrics_table.add_column("Label", style="bold")
152 |         metrics_table.add_column("Value", justify="left")
153 | 
154 |         # Add rows with proper spacing and alignment
155 |         metrics_table.add_row(
156 |             "🔍",
157 |             "Analysis for:",
158 |             (
159 |                 f"[link=https://reddit.com/u/{url}]{url}[/]"
160 |                 if url.startswith("u/")
161 |                 else f"[cyan]{url}[/]"
162 |             ),
163 |         )
164 |         metrics_table.add_row("📊", "Comments analyzed:", f"[cyan bold]{total_comments:>4}[/]")
165 |         metrics_table.add_row(
166 |             "🎭", "Overall Sentiment:", f"[cyan bold]{score:>6.2f}[/] {sentiment}"
167 |         )
168 | 
169 |         return Panel(
170 |             metrics_table,
171 |             title="[bold]Sentiment Analysis Summary[/]",
172 |             border_style="blue",
173 |             padding=(1, 1),
174 |         )
175 | 
176 |     def create_comment_panel(self, result: AnalysisResult, index: int) -> Panel:
177 |         """Creates a panel for a single comment."""
178 |         sub_panels = [self.create_basic_info_panel(result)]
179 |         if result.pii_matches:
180 |             sub_panels.append(self.create_pii_panel(result))
181 |         if result.llm_findings:
182 |             sub_panels.append(self.create_llm_panel(result))
183 |         return Panel(Columns(sub_panels), title=f"[bold]Comment {index}[/]", border_style="cyan")
184 | 
185 |     def create_basic_info_panel(self, result: AnalysisResult) -> Panel:
186 |         """Creates a panel displaying basic comment information."""
187 |         # Create metrics table
188 |         metrics_table = Table(show_header=False, box=None, padding=(0, 2), collapse_padding=True)
189 |         metrics_table.add_column("Icon", justify="right", style="bold")
190 |         metrics_table.add_column("Label", style="bold")
191 |         metrics_table.add_column("Value", justify="left")
192 | 
193 |         # Risk score styling
194 |         risk_score_style = "red bold" if result.pii_risk_score > 0.5 else "green bold"
195 | 
196 |         # Add rows with proper spacing and alignment
197 |         metrics_table.add_row(
198 |             "🎭",
199 |             "Sentiment:",
200 |             f"[cyan bold]{result.sentiment_score:>6.2f}[/] {result.sentiment_emoji}",
201 |         )
202 |         metrics_table.add_row(
203 |             "🔒", "Privacy Risk:", f"[{risk_score_style}]{result.pii_risk_score:>6.2f}[/]"
204 |         )
205 |         # Format votes based on whether they're positive or negative
206 |         vote_display = (
207 |             f"[green]⬆️ {result.upvotes:>3}[/]"
208 |             if result.upvotes > result.downvotes
209 |             else (
210 |                 f"[red]⬇️ {result.downvotes:>3}[/]"
211 |                 if result.downvotes > result.upvotes
212 |                 else f"[dim]0[/]"
213 |             )
214 |         )
215 | 
216 |         # Combine comment text and metrics
217 |         basic_info = Group(
218 |             Text(result.text, style="white"), Text("─" * 50, style="dim"), metrics_table
219 |         )
220 | 
221 |         return Panel(basic_info, title="[bold]Basic Info[/]", border_style="blue", padding=(1, 1))
222 | 
223 |     def create_pii_panel(self, result: AnalysisResult) -> Panel:
224 |         """Creates a panel displaying pattern-based PII matches."""
225 |         pii_contents = [
226 |             Text(f"• {pii.type} (confidence: {pii.confidence:.2f})", style="cyan")
227 |             for pii in result.pii_matches
228 |         ]
229 |         return Panel(
230 |             Group(*pii_contents), title="[bold]Pattern-based PII Detected[/]", border_style="yellow"
231 |         )
232 | 
233 |     def create_llm_panel(self, result: AnalysisResult) -> Panel:
234 |         """Creates a panel displaying LLM analysis findings."""
235 |         # Create metrics table similar to basic info panel
236 |         metrics_table = Table(show_header=False, box=None, padding=(0, 2), collapse_padding=True)
237 |         metrics_table.add_column("Icon", justify="right", style="bold")
238 |         metrics_table.add_column("Label", style="bold")
239 |         metrics_table.add_column("Value", justify="left")
240 | 
241 |         if isinstance(result.llm_findings, dict) and "error" in result.llm_findings:
242 |             error_group = self.create_llm_error_content(result.llm_findings["error"])
243 |             return Panel(error_group, title="[bold]LLM Analysis[/]", border_style="red")
244 | 
245 |         # Risk score styling
246 |         risk_style = "red bold" if result.llm_risk_score > 0.5 else "green bold"
247 |         pii_style = "red bold" if result.llm_findings.get("has_pii", False) else "green bold"
248 | 
249 |         # Add main metrics rows
250 |         metrics_table.add_row(
251 |             "🎯", "Risk Score:", f"[{risk_style}]{result.llm_risk_score:>6.2f}[/]"
252 |         )
253 |         metrics_table.add_row(
254 |             "🔍",
255 |             "PII Detected:",
256 |             f"[{pii_style}]{'Yes' if result.llm_findings.get('has_pii') else 'No':>6}[/]",
257 |         )
258 | 
259 |         # Create content groups
260 |         content_groups = [metrics_table]
261 | 
262 |         # Add findings if present
263 |         if details := result.llm_findings.get("details"):
264 |             content_groups.extend(
265 |                 [
266 |                     Text("\n📋 Findings:", style="bold"),
267 |                     *[Text(f"  • {format_llm_detail(detail)}", style="cyan") for detail in details],
268 |                 ]
269 |             )
270 | 
271 |         # Add risk factors if present
272 |         if risk_factors := result.llm_findings.get("risk_factors"):
273 |             content_groups.extend(
274 |                 [
275 |                     Text("\n⚠️ Risk Factors:", style="bold"),
276 |                     *[Text(f"  • {factor}", style="yellow") for factor in risk_factors],
277 |                 ]
278 |             )
279 | 
280 |         return Panel(
281 |             Group(*content_groups),
282 |             title="[bold]LLM Analysis[/]",
283 |             border_style="magenta",
284 |             padding=(1, 1),
285 |         )
286 | 
287 |     def create_llm_error_content(self, error_msg: str) -> Group:
288 |         """Creates content for LLM analysis errors."""
289 |         error_table = Table(show_header=False, box=None, padding=(0, 2))
290 |         error_table.add_column(style="red")
291 |         error_table.add_row("❌ LLM Analysis Failed")
292 |         error_table.add_row(f"Error: {error_msg}")
293 |         error_table.add_row(
294 |             "Please check your OpenAI API key and ensure you have sufficient credits."
295 |         )
296 |         return Group(error_table)
297 | 
298 |     def create_summary_panel(self, summary_table: Table) -> Panel:
299 |         """Creates a panel displaying the action summary."""
300 |         return Panel(
301 |             summary_table, title="[bold]Output Review[/]", border_style="green", padding=(1, 4)
302 |         )
303 | 
304 |     def create_action_panel(self, filtered_results: List[AnalysisResult], use_random_string: bool = False) -> Panel:
305 |         """Creates a panel displaying actions for high-risk comments."""
306 |         high_risk_comments = [
307 |             r
308 |             for r in filtered_results
309 |             if r.pii_risk_score > 0.5 or (r.llm_findings and r.llm_findings.get("has_pii", False))
310 |         ]
311 |         comment_ids = [r.comment_id for r in high_risk_comments]
312 |         if comment_ids:
313 |             action_text = Group(
314 |                 Text("Ready-to-use commands for high-risk comments:", style="bold yellow"),
315 |                 Text(
316 |                     f"Delete comments:\nreddacted delete {' '.join(comment_ids)}",
317 |                     style="italic red",
318 |                 ),
319 |                 Text(
320 |                     f"\nReddact (edit) comments:" +
321 |                     (f"\nreddacted update {' '.join(comment_ids)} --use-random-string" if use_random_string else
322 |                      f"\nreddacted update {' '.join(comment_ids)}"),
323 |                     style="italic blue",
324 |                 ),
325 |             )
326 |         else:
327 |             action_text = Text("No high-risk comments found.", style="green")
328 |         return Panel(action_text, border_style="yellow", title="[bold]Actions[/]")
329 | 
330 |     def create_completion_panel(
331 |         self,
332 |         filename: str,
333 |         total_comments: int,
334 |         total_pii_comments: int,
335 |         total_llm_pii_comments: int,
336 |     ) -> Panel:
337 |         """Creates a panel for the completion message with file info."""
338 |         return Panel(
339 |             Text.assemble(
340 |                 ("📄 Report saved to ", "bold blue"),
341 |                 (f"{filename}\n", "bold yellow"),
342 |                 ("🗒️  Total comments: ", "bold blue"),
343 |                 (f"{total_comments}\n", "bold cyan"),
344 |                 ("🔐 PII detected in: ", "bold blue"),
345 |                 (f"{total_pii_comments} ", "bold red"),
346 |                 (f"({total_pii_comments/total_comments:.1%})\n", "dim"),
347 |                 ("🤖 LLM findings in: ", "bold blue"),
348 |                 (f"{total_llm_pii_comments} ", "bold magenta"),
349 |                 (f"({total_llm_pii_comments/total_comments:.1%})", "dim"),
350 |             ),
351 |             title="[bold green]Analysis Complete[/]",
352 |             border_style="green",
353 |             padding=(1, 4),
354 |         )
355 | 


--------------------------------------------------------------------------------
/reddacted/utils/progress.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
 4 | 
 5 | from reddacted.utils.log_handler import get_logger, with_logging
 6 | from reddacted.utils.base import BaseFormatter
 7 | 
 8 | logger = get_logger(__name__)
 9 | 
10 | class ProgressManager(BaseFormatter):
11 |     """Manages progress bars and indicators."""
12 | 
13 |     def __init__(self):
14 |         super().__init__()
15 |         self._progress: Optional[Progress] = None
16 | 
17 |     @with_logging(logger)
18 |     def create_progress(self) -> Progress:
19 |         """Creates a unified progress context manager."""
20 |         if not hasattr(self, "_progress") or self._progress is None:
21 |             self._progress = Progress(
22 |                 SpinnerColumn(spinner_name="dots"),
23 |                 TextColumn("[bold blue]{task.description}"),
24 |                 TimeElapsedColumn(),
25 |                 transient=True,
26 |             )
27 |         return self._progress
28 | 


--------------------------------------------------------------------------------
/reddacted/utils/report.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Report Generator Module
  3 | 
  4 | This module provides functions to generate analysis reports.
  5 | It extracts the report header and output file generation logic from ResultsScreen,
  6 | allowing for a single-call report generation.
  7 | """
  8 | 
  9 | from typing import List, Dict, Any, Optional, TYPE_CHECKING
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from dataclasses import dataclass
 13 | 
 14 |     @dataclass
 15 |     class AnalysisResult:
 16 |         """Type hint for analysis result structure"""
 17 | 
 18 |         comment_id: str
 19 |         sentiment_score: float
 20 |         sentiment_emoji: str
 21 |         pii_risk_score: float
 22 |         pii_matches: List[Any]
 23 |         text: str
 24 |         upvotes: int
 25 |         downvotes: int
 26 |         llm_risk_score: float
 27 |         llm_findings: Optional[Dict[str, Any]]
 28 | 
 29 | 
 30 | def write_report_header(
 31 |     target, url: str, overall_score: float, overall_sentiment: str, num_comments: int
 32 | ) -> None:
 33 |     """
 34 |     Writes the header section of the analysis report.
 35 |     """
 36 |     target.write(f"# Analysis Report for '{url}'\n\n")
 37 |     target.write(f"- **Overall Sentiment Score**: {overall_score:.2f}\n")
 38 |     target.write(f"- **Overall Sentiment**: {overall_sentiment}\n")
 39 |     target.write(f"- **Comments Analyzed**: {num_comments}\n\n")
 40 |     target.write("---\n\n")
 41 | 
 42 | 
 43 | __all__ = [
 44 |     "generate_analysis_report",
 45 |     "should_show_result",
 46 |     "format_llm_detail",
 47 | ]
 48 | 
 49 | 
 50 | def should_show_result(result: "AnalysisResult", pii_only: bool = False) -> bool:
 51 |     """
 52 |     Determines if a result should be shown based on PII detection settings.
 53 |     """
 54 |     if not pii_only:
 55 |         return True
 56 |     has_pattern_pii = result.pii_risk_score > 0.0
 57 |     has_llm_pii = (
 58 |         result.llm_findings is not None
 59 |         and isinstance(result.llm_findings, dict)
 60 |         and result.llm_findings.get("has_pii", False)
 61 |         and result.llm_findings.get("confidence", 0.0) > 0.0
 62 |     )
 63 |     return has_pattern_pii or has_llm_pii
 64 | 
 65 | 
 66 | def format_llm_detail(detail: Any, app=None) -> str:
 67 |     """Formats LLM detail information."""
 68 |     if isinstance(detail, dict):
 69 |         formatted = (
 70 |             f"{detail.get('type', 'Finding')}: {detail.get('example', 'N/A')}"
 71 |             or f"{detail.get('finding', 'N/A')}: {detail.get('reasoning', '')}"
 72 |         )
 73 |         return formatted.replace('\n', ' ')  # Replace newlines with spaces
 74 |     return str(detail)
 75 | 
 76 | 
 77 | def write_comment_details(target, result: "AnalysisResult", index: int) -> None:
 78 |     """
 79 |     Writes detailed analysis for a single comment.
 80 |     """
 81 |     target.write(f"## Comment {index}\n\n")
 82 |     target.write(f"**Text**: {result.text}\n\n")
 83 |     target.write(f"- Sentiment Score: `{result.sentiment_score:.2f}` {result.sentiment_emoji}\n")
 84 |     target.write(f"- PII Risk Score: `{result.pii_risk_score:.2f}`\n")
 85 |     target.write(f"- Votes: ⬆️ `{result.upvotes}` ⬇️ `{result.downvotes}`\n")
 86 |     target.write(f"- Comment ID: `{result.comment_id}`\n\n")
 87 |     if result.pii_matches:
 88 |         target.write("### Pattern-based PII Detected\n")
 89 |         for pii in result.pii_matches:
 90 |             target.write(f"- **{pii.type}** (confidence: {pii.confidence:.2f})\n")
 91 |         target.write("\n")
 92 |     if result.llm_findings:
 93 |         target.write("### LLM Privacy Analysis\n")
 94 |         target.write(f"- **Risk Score**: `{result.llm_risk_score:.2f}`\n")
 95 |         if isinstance(result.llm_findings, dict):
 96 |             target.write(
 97 |                 f"- **PII Detected**: {'Yes' if result.llm_findings.get('has_pii') else 'No'}\n"
 98 |             )
 99 |             if details := result.llm_findings.get("details"):
100 |                 target.write("\n#### Findings\n")
101 |                 for detail in details:
102 |                     target.write(f"- {format_llm_detail(detail)}\n")
103 |             if reasoning := result.llm_findings.get("reasoning"):
104 |                 target.write(f"\n#### Reasoning\n{reasoning}\n")
105 |         target.write("\n")
106 |     target.write("---\n\n")
107 | 
108 | 
109 | def write_summary_section(
110 |     target,
111 |     total_comments: int,
112 |     sentiment_scores: List[float],
113 |     max_risk_score: float,
114 |     riskiest_comment: str,
115 |     total_pii_comments: int = 0,
116 |     total_llm_pii_comments: int = 0,
117 | ) -> None:
118 |     """
119 |     Writes the summary section of the analysis report.
120 |     """
121 |     average_sentiment = sum(sentiment_scores) / len(sentiment_scores) if sentiment_scores else 0.0
122 |     target.write("\n# Summary\n\n")
123 |     target.write(f"- Total Comments Analyzed: {total_comments}\n")
124 |     target.write(
125 |         f"- Comments with PII Detected: {total_pii_comments} ({total_pii_comments/total_comments:.1%})\n"
126 |     )
127 |     target.write(
128 |         f"- Comments with LLM Privacy Risks: {total_llm_pii_comments} ({total_llm_pii_comments/total_comments:.1%})\n"
129 |     )
130 |     target.write(f"- Average Sentiment Score: {average_sentiment:.2f}\n")
131 |     target.write(f"- Highest PII Risk Score: {max_risk_score:.2f}\n")
132 |     if riskiest_comment:
133 |         target.write(f"- Riskiest Comment Preview: '{riskiest_comment}'\n")
134 |     target.write("✅ Analysis complete\n")
135 | 
136 | 
137 | def generate_analysis_report(
138 |     filename: str,
139 |     comments: List[Dict[str, Any]],
140 |     url: str,
141 |     results: List["AnalysisResult"],
142 |     overall_score: float,
143 |     overall_sentiment: str,
144 |     pii_only: bool = False,
145 | ) -> Dict[str, int]:
146 |     """
147 |     Generates an analysis report by writing the header, comment details, and summary.
148 | 
149 |     This function encapsulates the report generation logic previously embedded
150 |     in the ResultsScreen, thereby reducing file bloat.
151 | 
152 |     Returns:
153 |         Dict containing statistics about the analysis (total_pii_comments, total_llm_pii_comments)
154 |     """
155 |     try:
156 |         sentiment_scores: List[float] = []
157 |         max_risk_score = 0.0
158 |         riskiest_comment = ""
159 |         total_pii_comments = 0
160 |         total_llm_pii_comments = 0
161 | 
162 |         with open(filename, "w") as target:
163 |             write_report_header(target, url, overall_score, overall_sentiment, len(comments))
164 | 
165 |             for idx, result in enumerate(results, 1):
166 |                 if not should_show_result(result, pii_only):
167 |                     continue
168 |                 write_comment_details(target, result, idx)
169 | 
170 |                 # Update statistics
171 |                 sentiment_scores.append(result.sentiment_score)
172 |                 if result.pii_risk_score > 0:
173 |                     total_pii_comments += 1
174 |                 if result.llm_risk_score > 0 or (
175 |                     result.llm_findings and result.llm_findings.get("has_pii", False)
176 |                 ):
177 |                     total_llm_pii_comments += 1
178 | 
179 |                 if result.pii_risk_score > max_risk_score:
180 |                     max_risk_score = result.pii_risk_score
181 |                     riskiest_comment = (
182 |                         (result.text[:100] + "...") if len(result.text) > 100 else result.text
183 |                     )
184 | 
185 |             write_summary_section(
186 |                 target,
187 |                 len(comments),
188 |                 sentiment_scores,
189 |                 max_risk_score,
190 |                 riskiest_comment,
191 |                 total_pii_comments,
192 |                 total_llm_pii_comments,
193 |             )
194 | 
195 |         print(f"Report generated successfully at {filename}")
196 |         return {
197 |             "total_pii_comments": total_pii_comments,
198 |             "total_llm_pii_comments": total_llm_pii_comments,
199 |         }
200 |     except Exception as e:
201 |         print(f"Error generating report: {e}")
202 |         raise
203 | 


--------------------------------------------------------------------------------
/reddacted/utils/tables.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | from itertools import zip_longest
 3 | 
 4 | from rich.table import Table
 5 | from rich.text import Text
 6 | 
 7 | from reddacted.utils.base import BaseFormatter
 8 | from reddacted.utils.analysis import AnalysisResult
 9 | 
10 | 
11 | class TableFormatter(BaseFormatter):
12 |     """Handles creation and formatting of tables."""
13 | 
14 |     def generate_summary_table(self, filtered_results: List[AnalysisResult]) -> Table:
15 |         """Generates a summary table with selection indicators."""
16 |         table = Table(header_style="bold magenta", box=None, padding=(0, 1), collapse_padding=True)
17 |         table.add_column("Risk", justify="center", style="bold", width=10)
18 |         table.add_column("Sentiment", justify="center", width=15)
19 |         table.add_column("Comment Preview", justify="center", width=75)
20 |         table.add_column("Votes", justify="center", width=10)
21 |         table.add_column("ID", justify="center", width=10)
22 | 
23 |         for result in filtered_results:
24 |             risk_style = self._get_risk_style(result.pii_risk_score)
25 |             risk_text = Text(f"{result.pii_risk_score:.0%}", style=risk_style)
26 |             permalink = f"https://reddit.com{result.permalink}"
27 |             preview = (result.text[:67] + "...") if len(result.text) > 70 else result.text
28 |             preview = f"[link={permalink}]{preview}[/link]"
29 | 
30 |             vote_display = (
31 |                 f"[green]⬆️ {result.upvotes:>3}[/]"
32 |                 if result.upvotes > result.downvotes
33 |                 else (
34 |                     f"[red]⬇️ {result.downvotes:>3}[/]"
35 |                     if result.downvotes > result.upvotes
36 |                     else f"[dim]0[/]"
37 |                 )
38 |             )
39 | 
40 |             table.add_row(
41 |                 risk_text,
42 |                 Text(f"{result.sentiment_emoji} {result.sentiment_score:.2f}"),
43 |                 preview,
44 |                 vote_display,
45 |                 result.comment_id,
46 |             )
47 | 
48 |         return table
49 | 
50 |     def create_features_table(
51 |         self,
52 |         auth_enabled: bool,
53 |         pii_enabled: bool,
54 |         llm_config: Optional[Dict[str, Any]],
55 |         pii_only: bool,
56 |         limit: int,
57 |         sort: str,
58 |     ) -> Table:
59 |         """Creates a table displaying the features configuration."""
60 |         features_table = Table(
61 |             show_header=False, box=None, padding=(0, 2), collapse_padding=True, expand=True
62 |         )
63 |         features_table.add_column("Left", ratio=1, justify="left")
64 |         features_table.add_column("Right", ratio=1, justify="left")
65 | 
66 |         # Define all config items
67 |         config_items = [
68 |             ("🔐 Authentication", self._format_status(auth_enabled)),
69 |             ("🔍 PII Detection", self._format_status(pii_enabled)),
70 |             (
71 |                 "🤖 LLM Analysis",
72 |                 (
73 |                     Text(llm_config["model"], style="green")
74 |                     if llm_config
75 |                     else self._format_status(False)
76 |                 ),
77 |             ),
78 |             ("🎯 PII-Only Filter", self._format_status(pii_only, "Active", "Inactive")),
79 |             ("📊 Comment Limit", Text(f"{limit}" if limit else "Unlimited", style="cyan")),
80 |             ("📑 Sort Preference", Text(f"{sort}" if sort else "New", style="cyan")),
81 |         ]
82 | 
83 |         # Split items into two columns
84 |         mid_point = (len(config_items) + 1) // 2
85 |         left_items = config_items[:mid_point]
86 |         right_items = config_items[mid_point:]
87 | 
88 |         # Create formatted text for each column
89 |         for left, right in zip_longest(left_items, right_items, fillvalue=None):
90 |             left_text = Text.assemble(f"{left[0]}: ", left[1]) if left else Text("")
91 |             right_text = Text.assemble(f"{right[0]}: ", right[1]) if right else Text("")
92 |             features_table.add_row(left_text, right_text)
93 | 
94 |         return features_table
95 | 


--------------------------------------------------------------------------------
/reddacted/version.py:
--------------------------------------------------------------------------------
1 | """Version information."""
2 | 
3 | __version__ = "0.2.5"
4 | version = __version__
5 | 


--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -e
  3 | 
  4 | # Configuration - Version comes from version.py
  5 | VERSION_FILE="reddacted/version.py"
  6 | GITHUB_USER="taylorwilsdon"
  7 | REPO="reddacted"
  8 | UPDATE_DEPS_ONLY=false
  9 | 
 10 | # Parse arguments
 11 | while [[ "$#" -gt 0 ]]; do
 12 |     case $1 in
 13 |         --update-deps-only) UPDATE_DEPS_ONLY=true ;;
 14 |         *) echo "Unknown parameter: $1"; exit 1 ;;
 15 |     esac
 16 |     shift
 17 | done
 18 | 
 19 | # Colors for output
 20 | GREEN='\033[0;32m'
 21 | YELLOW='\033[1;33m'
 22 | NC='\033[0m' # No Color
 23 | 
 24 | # Extract version from version.py file
 25 | VERSION=$(grep -o '__version__ = "[^"]*"' "$VERSION_FILE" | cut -d'"' -f2)
 26 | echo -e "${YELLOW}Starting release process for reddacted v${VERSION}${NC}"
 27 | 
 28 | # 1. Check for required tools
 29 | if ! command -v jq &> /dev/null; then
 30 |   echo -e "${YELLOW}jq not found. Please install it to update dependencies.${NC}"
 31 |   echo -e "${YELLOW}On macOS: brew install jq${NC}"
 32 |   exit 1
 33 | fi
 34 | 
 35 | if [ "$UPDATE_DEPS_ONLY" = false ]; then
 36 |     # 2. Ensure we're on the main branch
 37 |     git checkout main
 38 |     # Skip git pull if no upstream is configured
 39 |     git rev-parse --abbrev-ref @{upstream} >/dev/null 2>&1 && git pull || echo "No upstream branch configured, skipping pull"
 40 | 
 41 |     # 3. Clean build artifacts
 42 |     echo -e "${YELLOW}Cleaning previous build artifacts...${NC}"
 43 |     rm -rf dist/ build/ *.egg-info/
 44 | 
 45 |     # 4. Build the package with UV (both sdist and wheel)
 46 |     echo -e "${YELLOW}Building package with UV...${NC}"
 47 |     uv build --sdist --wheel || {
 48 |         echo -e "${YELLOW}Failed to build package${NC}"
 49 |         exit 1
 50 |     }
 51 | 
 52 |     # 5. Create and push git tag
 53 |     echo -e "${YELLOW}Creating and pushing git tag v${VERSION}...${NC}"
 54 |     # Improved tag handling - check both local and remote tags
 55 |     LOCAL_TAG_EXISTS=$(git tag -l "v${VERSION}")
 56 |     REMOTE_TAG_EXISTS=$(git ls-remote --tags origin "refs/tags/v${VERSION}" | wc -l)
 57 | 
 58 |     if [ -n "$LOCAL_TAG_EXISTS" ]; then
 59 |       echo -e "${YELLOW}Local tag v${VERSION} already exists${NC}"
 60 |     else
 61 |       git tag -a "v${VERSION}" -m "Release v${VERSION}"
 62 |       echo -e "${YELLOW}Created local tag v${VERSION}${NC}"
 63 |     fi
 64 |     
 65 |     # Only push if tag doesn't exist on remote
 66 |     if [ "$REMOTE_TAG_EXISTS" -eq 0 ]; then
 67 |       echo -e "${YELLOW}Pushing tag to remote...${NC}"
 68 |       git push origin "v${VERSION}" || echo "Failed to push tag, continuing anyway"
 69 |     else
 70 |       echo -e "${YELLOW}Remote tag v${VERSION} already exists, skipping push${NC}"
 71 |     fi
 72 | 
 73 |     # 6. Create GitHub release
 74 |     echo -e "${YELLOW}Creating GitHub release...${NC}"
 75 |     # Check if gh command is available
 76 |     if ! command -v gh &> /dev/null; then
 77 |       echo -e "${YELLOW}GitHub CLI not found. Please install it to create releases.${NC}"
 78 |       echo -e "${YELLOW}Skipping GitHub release creation.${NC}"
 79 |     else
 80 |       # Check if release already exists
 81 |       if gh release view "v${VERSION}" &>/dev/null; then
 82 |         echo -e "${YELLOW}Release v${VERSION} already exists, skipping creation${NC}"
 83 |       else
 84 |         gh release create "v${VERSION}" \
 85 |           --title "reddacted v${VERSION}" \
 86 |           --notes "Release v${VERSION}" \
 87 |           ./dist/*
 88 |       fi
 89 |     fi
 90 | 
 91 |     # 7. Download the tarball to calculate SHA
 92 |     echo -e "${YELLOW}Downloading tarball to calculate SHA...${NC}"
 93 |     TARBALL_PATH="/tmp/${REPO}-${VERSION}.tar.gz"
 94 |     if curl -sL --fail "https://github.com/${GITHUB_USER}/${REPO}/archive/refs/tags/v${VERSION}.tar.gz" -o "${TARBALL_PATH}"; then
 95 |       SHA=$(shasum -a 256 "${TARBALL_PATH}" | cut -d ' ' -f 1)
 96 |       
 97 |       # Generate new Homebrew formula
 98 |       echo -e "${YELLOW}Generating new Homebrew formula...${NC}"
 99 |       if ! python3 scripts/homebrew_formula_generator.py "${VERSION}"; then
100 |         echo -e "${YELLOW}Failed to generate Homebrew formula${NC}"
101 |         exit 1
102 |       fi
103 |     else
104 |       echo -e "${YELLOW}Failed to download tarball, skipping SHA calculation and Homebrew formula update${NC}"
105 |     fi
106 | 
107 |     # 8. Publish to PyPI if desired
108 |     read -p "Do you want to publish to PyPI? (y/n) " -n 1 -r
109 |     echo
110 |     if [[ $REPLY =~ ^[Yy]$ ]]
111 |     then
112 |         echo -e "${YELLOW}Publishing to PyPI...${NC}"
113 |         if ! uv publish; then
114 |             echo -e "${YELLOW}Failed to publish to PyPI${NC}"
115 |             exit 1
116 |         fi
117 |     fi
118 | fi
119 | 
120 | # Ensure scripts directory exists and formula generator is executable
121 | if [ ! -d "scripts" ]; then
122 |   echo -e "${YELLOW}Creating scripts directory...${NC}"
123 |   mkdir -p scripts
124 | fi
125 | 
126 | if [ ! -x "scripts/homebrew_formula_generator.py" ]; then
127 |   echo -e "${YELLOW}Making formula generator executable...${NC}"
128 |   chmod +x scripts/homebrew_formula_generator.py
129 | fi
130 | 
131 | # 10. Instructions for Homebrew tap
132 | echo -e "${GREEN}Release v${VERSION} completed!${NC}"
133 | echo -e "${GREEN}To publish to Homebrew:${NC}"
134 | echo -e "1. Create a tap repository: github.com/${GITHUB_USER}/homebrew-tap"
135 | echo -e "2. Copy homebrew/reddacted.rb to your tap repository"
136 | echo -e "3. Users can then install with: brew install ${GITHUB_USER}/tap/reddacted"
137 | 
138 | echo -e "${GREEN}Done!${NC}"
139 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pytest>=7.4.0
2 | black>=23.7.0
3 | mypy>=1.5.1
4 | ruff>=0.0.291
5 | build==1.2.2.post1
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Core dependencies
 2 | praw==7.7.1
 3 | nltk==3.8.1
 4 | requests==2.31.0
 5 | six==1.16.0
 6 | httpx==0.27.2
 7 | openai==1.3.7
 8 | rich==13.7.0
 9 | textual==2.1.2
10 | # Development dependencies
11 | flake8==7.0.0
12 | build==1.0.3
13 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorwilsdon/reddacted/3efa6363aeebbf62befc73b7e5bcb7e5523738d2/tests/__init__.py


--------------------------------------------------------------------------------
/tests/templates/reddit/article.json:
--------------------------------------------------------------------------------
1 | [{"kind": "Listing", "data": {"modhash": "", "dist": 1, "children": [{"kind": "t3", "data": {"approved_at_utc": null, "subreddit": "doge", "selftext": "", "user_reports": [], "saved": false, "mod_reason_title": null, "gilded": 0, "clicked": false, "title": "I drew this doge in 2013 during my first years of grad school. Students loved it.", "link_flair_richtext": [], "subreddit_name_prefixed": "r/doge", "hidden": false, "pwls": 6, "link_flair_css_class": null, "downs": 0, "thumbnail_height": 122, "top_awarded_type": null, "parent_whitelist_status": "all_ads", "hide_score": false, "name": "t3_l7zp94", "quarantine": false, "link_flair_text_color": "dark", "upvote_ratio": 0.98, "author_flair_background_color": null, "subreddit_type": "public", "ups": 128, "total_awards_received": 4, "media_embed": {"content": "&lt;iframe class=\"embedly-embed\" src=\"https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX%2Fembed%3Fpub%3Dtrue%26ref%3Dhttps%253A%252F%252Fembed.ly%26w%3D900&amp;display_name=Imgur&amp;url=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX&amp;image=https%3A%2F%2Fi.imgur.com%2FzCLUa41.jpg%3Ffb&amp;key=2aa3c4d5f3de4f5b9120b660ad850dc9&amp;type=text%2Fhtml&amp;schema=imgur\" width=\"600\" height=\"567\" scrolling=\"no\" title=\"Imgur embed\" frameborder=\"0\" allow=\"autoplay; fullscreen\" allowfullscreen=\"true\"&gt;&lt;/iframe&gt;", "width": 600, "scrolling": false, "height": 567}, "thumbnail_width": 140, "author_flair_template_id": null, "is_original_content": false, "author_fullname": "t2_bkqxw", "secure_media": {"oembed": {"provider_url": "http://imgur.com", "description": "Post with 0 views.", "title": "Imgur", "url": "https://imgur.com/a/X5hateX", "thumbnail_width": 600, "height": 567, "width": 600, "html": "&lt;iframe class=\"embedly-embed\" src=\"https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX%2Fembed%3Fpub%3Dtrue%26ref%3Dhttps%253A%252F%252Fembed.ly%26w%3D900&amp;display_name=Imgur&amp;url=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX&amp;image=https%3A%2F%2Fi.imgur.com%2FzCLUa41.jpg%3Ffb&amp;key=2aa3c4d5f3de4f5b9120b660ad850dc9&amp;type=text%2Fhtml&amp;schema=imgur\" width=\"600\" height=\"567\" scrolling=\"no\" title=\"Imgur embed\" frameborder=\"0\" allow=\"autoplay; fullscreen\" allowfullscreen=\"true\"&gt;&lt;/iframe&gt;", "version": "1.0", "provider_name": "Imgur", "thumbnail_url": "https://i.imgur.com/zCLUa41.jpg?fb", "type": "rich", "thumbnail_height": 315}, "type": "imgur.com"}, "is_reddit_media_domain": false, "is_meta": false, "category": null, "secure_media_embed": {"content": "&lt;iframe class=\"embedly-embed\" src=\"https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX%2Fembed%3Fpub%3Dtrue%26ref%3Dhttps%253A%252F%252Fembed.ly%26w%3D900&amp;display_name=Imgur&amp;url=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX&amp;image=https%3A%2F%2Fi.imgur.com%2FzCLUa41.jpg%3Ffb&amp;key=2aa3c4d5f3de4f5b9120b660ad850dc9&amp;type=text%2Fhtml&amp;schema=imgur\" width=\"600\" height=\"567\" scrolling=\"no\" title=\"Imgur embed\" frameborder=\"0\" allow=\"autoplay; fullscreen\" allowfullscreen=\"true\"&gt;&lt;/iframe&gt;", "width": 600, "scrolling": false, "media_domain_url": "https://www.redditmedia.com/mediaembed/l7zp94", "height": 567}, "link_flair_text": null, "can_mod_post": false, "score": 128, "approved_by": null, "is_created_from_ads_ui": false, "author_premium": false, "thumbnail": "https://b.thumbs.redditmedia.com/UH7uKNkjzWKyMsHqX1RULNQCo3LwW0M_-sZI0UjWG9I.jpg", "edited": false, "author_flair_css_class": null, "author_flair_richtext": [], "gildings": {"gid_1": 2}, "post_hint": "link", "content_categories": null, "is_self": false, "mod_note": null, "created": 1611972924.0, "link_flair_type": "text", "wls": 6, "removed_by_category": null, "banned_by": null, "author_flair_type": "text", "domain": "imgur.com", "allow_live_comments": false, "selftext_html": null, "likes": null, "suggested_sort": null, "banned_at_utc": null, "url_overridden_by_dest": "https://imgur.com/a/X5hateX", "view_count": null, "archived": false, "no_follow": false, "is_crosspostable": false, "pinned": false, "over_18": false, "preview": {"images": [{"source": {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?auto=webp&amp;s=3928f490a160694f22f82f1628cadc2d50a9c5fb", "width": 2048, "height": 1798}, "resolutions": [{"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=108&amp;crop=smart&amp;auto=webp&amp;s=880cfc8e4584d8070913af5aeb23adb1434ba516", "width": 108, "height": 94}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=216&amp;crop=smart&amp;auto=webp&amp;s=e23d538fc6094fda00842acaa044b5ea2e5ac24a", "width": 216, "height": 189}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=320&amp;crop=smart&amp;auto=webp&amp;s=afc5f79f3d8311fe385d53d2f3d65ea104054b8b", "width": 320, "height": 280}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=640&amp;crop=smart&amp;auto=webp&amp;s=608c7ab721bb21583c7a507488e5169b60596fcd", "width": 640, "height": 561}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=960&amp;crop=smart&amp;auto=webp&amp;s=9667eb9d3567734bfe54e20f6b51b964987a2d89", "width": 960, "height": 842}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=1080&amp;crop=smart&amp;auto=webp&amp;s=3a283c033e53a68c8260f2ee92036f632d5f7fc5", "width": 1080, "height": 948}], "variants": {}, "id": "kHHZ4X60_wNKr1dAD5CDwgWkoxS63LJ1qKaB_CzUJnE"}], "enabled": false}, "all_awardings": [{"giver_coin_reward": null, "subreddit_id": null, "is_new": false, "days_of_drip_extension": 0, "coin_price": 125, "id": "award_5f123e3d-4f48-42f4-9c11-e98b566d5897", "penny_donate": null, "coin_reward": 0, "icon_url": "https://i.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png", "days_of_premium": 0, "icon_height": 2048, "tiers_by_required_awardings": null, "resized_icons": [{"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=16&amp;height=16&amp;auto=webp&amp;s=92932f465d58e4c16b12b6eac4ca07d27e3d11c0", "width": 16, "height": 16}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=32&amp;height=32&amp;auto=webp&amp;s=d11484a208d68a318bf9d4fcf371171a1cb6a7ef", "width": 32, "height": 32}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=48&amp;height=48&amp;auto=webp&amp;s=febdf28b6f39f7da7eb1365325b85e0bb49a9f63", "width": 48, "height": 48}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=64&amp;height=64&amp;auto=webp&amp;s=b4406a2d88bf86fa3dc8a45aacf7e0c7bdccc4fb", "width": 64, "height": 64}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=128&amp;height=128&amp;auto=webp&amp;s=19555b13e3e196b62eeb9160d1ac1d1b372dcb0b", "width": 128, "height": 128}], "icon_width": 2048, "static_icon_width": 2048, "start_date": null, "is_enabled": true, "awardings_required_to_grant_benefits": null, "description": "When you come across a feel-good thing.", "end_date": null, "subreddit_coin_reward": 0, "count": 1, "static_icon_height": 2048, "name": "Wholesome", "resized_static_icons": [{"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=16&amp;height=16&amp;auto=webp&amp;s=92932f465d58e4c16b12b6eac4ca07d27e3d11c0", "width": 16, "height": 16}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=32&amp;height=32&amp;auto=webp&amp;s=d11484a208d68a318bf9d4fcf371171a1cb6a7ef", "width": 32, "height": 32}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=48&amp;height=48&amp;auto=webp&amp;s=febdf28b6f39f7da7eb1365325b85e0bb49a9f63", "width": 48, "height": 48}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=64&amp;height=64&amp;auto=webp&amp;s=b4406a2d88bf86fa3dc8a45aacf7e0c7bdccc4fb", "width": 64, "height": 64}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=128&amp;height=128&amp;auto=webp&amp;s=19555b13e3e196b62eeb9160d1ac1d1b372dcb0b", "width": 128, "height": 128}], "icon_format": null, "award_sub_type": "GLOBAL", "penny_price": null, "award_type": "global", "static_icon_url": "https://i.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png"}, {"giver_coin_reward": null, "subreddit_id": null, "is_new": false, "days_of_drip_extension": 0, "coin_price": 100, "id": "gid_1", "penny_donate": null, "coin_reward": 0, "icon_url": "https://www.redditstatic.com/gold/awards/icon/silver_512.png", "days_of_premium": 0, "icon_height": 512, "tiers_by_required_awardings": null, "resized_icons": [{"url": "https://www.redditstatic.com/gold/awards/icon/silver_16.png", "width": 16, "height": 16}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_32.png", "width": 32, "height": 32}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_48.png", "width": 48, "height": 48}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_64.png", "width": 64, "height": 64}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_128.png", "width": 128, "height": 128}], "icon_width": 512, "static_icon_width": 512, "start_date": null, "is_enabled": true, "awardings_required_to_grant_benefits": null, "description": "Shows the Silver Award... and that's it.", "end_date": null, "subreddit_coin_reward": 0, "count": 2, "static_icon_height": 512, "name": "Silver", "resized_static_icons": [{"url": "https://www.redditstatic.com/gold/awards/icon/silver_16.png", "width": 16, "height": 16}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_32.png", "width": 32, "height": 32}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_48.png", "width": 48, "height": 48}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_64.png", "width": 64, "height": 64}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_128.png", "width": 128, "height": 128}], "icon_format": null, "award_sub_type": "GLOBAL", "penny_price": null, "award_type": "global", "static_icon_url": "https://www.redditstatic.com/gold/awards/icon/silver_512.png"}, {"giver_coin_reward": 0, "subreddit_id": null, "is_new": false, "days_of_drip_extension": 0, "coin_price": 80, "id": "award_8352bdff-3e03-4189-8a08-82501dd8f835", "penny_donate": 0, "coin_reward": 0, "icon_url": "https://i.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png", "days_of_premium": 0, "icon_height": 2048, "tiers_by_required_awardings": null, "resized_icons": [{"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=16&amp;height=16&amp;auto=webp&amp;s=73a23bf7f08b633508dedf457f2704c522b94a04", "width": 16, "height": 16}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=32&amp;height=32&amp;auto=webp&amp;s=50f2f16e71d2929e3d7275060af3ad6b851dbfb1", "width": 32, "height": 32}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=48&amp;height=48&amp;auto=webp&amp;s=ca487311563425e195699a4d7e4c57a98cbfde8b", "width": 48, "height": 48}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=64&amp;height=64&amp;auto=webp&amp;s=7b4eedcffb1c09a826e7837532c52979760f1d2b", "width": 64, "height": 64}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=128&amp;height=128&amp;auto=webp&amp;s=e4d5ab237eb71a9f02bb3bf9ad5ee43741918d6c", "width": 128, "height": 128}], "icon_width": 2048, "static_icon_width": 2048, "start_date": null, "is_enabled": true, "awardings_required_to_grant_benefits": null, "description": "Everything is better with a good hug", "end_date": null, "subreddit_coin_reward": 0, "count": 1, "static_icon_height": 2048, "name": "Hugz", "resized_static_icons": [{"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=16&amp;height=16&amp;auto=webp&amp;s=69997ace3ef4ffc099b81d774c2c8f1530602875", "width": 16, "height": 16}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=32&amp;height=32&amp;auto=webp&amp;s=e9519d1999ef9dce5c8a9f59369cb92f52d95319", "width": 32, "height": 32}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=48&amp;height=48&amp;auto=webp&amp;s=f076c6434fb2d2f9075991810fd845c40fa73fc6", "width": 48, "height": 48}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=64&amp;height=64&amp;auto=webp&amp;s=85527145e0c4b754306a30df29e584fd16187636", "width": 64, "height": 64}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=128&amp;height=128&amp;auto=webp&amp;s=b8843cdf82c3b741d7af057c14076dcd2621e811", "width": 128, "height": 128}], "icon_format": "PNG", "award_sub_type": "GLOBAL", "penny_price": 0, "award_type": "global", "static_icon_url": "https://i.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png"}], "awarders": [], "media_only": false, "can_gild": false, "spoiler": false, "locked": false, "author_flair_text": null, "treatment_tags": [], "visited": false, "removed_by": null, "num_reports": null, "distinguished": null, "subreddit_id": "t5_2vzn6", "mod_reason_by": null, "removal_reason": null, "link_flair_background_color": "", "id": "l7zp94", "is_robot_indexable": true, "num_duplicates": 1, "report_reasons": null, "author": "alexandermikh", "discussion_type": null, "num_comments": 5, "send_replies": true, "media": {"oembed": {"provider_url": "http://imgur.com", "description": "Post with 0 views.", "title": "Imgur", "url": "https://imgur.com/a/X5hateX", "thumbnail_width": 600, "height": 567, "width": 600, "html": "&lt;iframe class=\"embedly-embed\" src=\"https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX%2Fembed%3Fpub%3Dtrue%26ref%3Dhttps%253A%252F%252Fembed.ly%26w%3D900&amp;display_name=Imgur&amp;url=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX&amp;image=https%3A%2F%2Fi.imgur.com%2FzCLUa41.jpg%3Ffb&amp;key=2aa3c4d5f3de4f5b9120b660ad850dc9&amp;type=text%2Fhtml&amp;schema=imgur\" width=\"600\" height=\"567\" scrolling=\"no\" title=\"Imgur embed\" frameborder=\"0\" allow=\"autoplay; fullscreen\" allowfullscreen=\"true\"&gt;&lt;/iframe&gt;", "version": "1.0", "provider_name": "Imgur", "thumbnail_url": "https://i.imgur.com/zCLUa41.jpg?fb", "type": "rich", "thumbnail_height": 315}, "type": "imgur.com"}, "contest_mode": false, "author_patreon_flair": false, "author_flair_text_color": null, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/", "whitelist_status": "all_ads", "stickied": false, "url": "https://imgur.com/a/X5hateX", "subreddit_subscribers": 73538, "created_utc": 1611944124.0, "num_crossposts": 0, "mod_reports": [], "is_video": false}}], "after": null, "before": null}}, {"kind": "Listing", "data": {"modhash": "", "dist": null, "children": [{"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 4, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": "", "user_reports": [], "saved": false, "id": "glai61b", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": false, "author": "L103131", "can_mod_post": false, "send_replies": true, "parent_id": "t3_l7zp94", "score": 4, "author_fullname": "t2_u7ppvf1", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "body": "Looks sick!", "edited": false, "downs": 0, "author_flair_css_class": null, "is_submitter": false, "collapsed": false, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "&lt;div class=\"md\"&gt;&lt;p&gt;Looks sick!&lt;/p&gt;\n&lt;/div&gt;", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": null, "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/glai61b/", "num_reports": null, "locked": false, "name": "t1_glai61b", "created": 1611983461.0, "subreddit": "doge", "author_flair_text": null, "treatment_tags": [], "created_utc": 1611954661.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 0, "author_flair_background_color": null, "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}, {"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 1, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": "", "user_reports": [], "saved": false, "id": "glc7p50", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": true, "author": "ManBets", "can_mod_post": false, "send_replies": true, "parent_id": "t3_l7zp94", "score": 1, "author_fullname": "t2_a1nnt5hp", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "body": "#DOGE HOLD IT", "edited": false, "downs": 0, "author_flair_css_class": null, "is_submitter": false, "collapsed": false, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "&lt;div class=\"md\"&gt;&lt;h1&gt;DOGE HOLD IT&lt;/h1&gt;\n&lt;/div&gt;", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": null, "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/glc7p50/", "num_reports": null, "locked": false, "name": "t1_glc7p50", "created": 1612013001.0, "subreddit": "doge", "author_flair_text": null, "treatment_tags": [], "created_utc": 1611984201.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 0, "author_flair_background_color": null, "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}, {"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 1, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": "", "user_reports": [], "saved": false, "id": "gladezi", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": true, "author": "anormalusername1", "can_mod_post": false, "send_replies": true, "parent_id": "t3_l7zp94", "score": 1, "author_fullname": "t2_i86lt", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "body": "10/10 very art such picasso wow", "edited": false, "downs": 0, "author_flair_css_class": "mod", "is_submitter": false, "collapsed": false, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "&lt;div class=\"md\"&gt;&lt;p&gt;10/10 very art such picasso wow&lt;/p&gt;\n&lt;/div&gt;", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": "dark", "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gladezi/", "num_reports": null, "locked": false, "name": "t1_gladezi", "created": 1611981922.0, "subreddit": "doge", "author_flair_text": "wow such mod", "treatment_tags": [], "created_utc": 1611953122.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 0, "author_flair_background_color": "", "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}, {"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 2, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": {"kind": "Listing", "data": {"modhash": "", "dist": null, "children": [{"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 1, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": "", "user_reports": [], "saved": false, "id": "gsdp7lq", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": true, "author": "alexandermikh", "can_mod_post": false, "send_replies": true, "parent_id": "t1_gsdozky", "score": 1, "author_fullname": "t2_bkqxw", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "collapsed": false, "body": "Thanks &lt;3", "edited": false, "author_flair_css_class": null, "is_submitter": true, "downs": 0, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "&lt;div class=\"md\"&gt;&lt;p&gt;Thanks &amp;lt;3&lt;/p&gt;\n&lt;/div&gt;", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": null, "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gsdp7lq/", "num_reports": null, "locked": false, "name": "t1_gsdp7lq", "created": 1616845579.0, "subreddit": "doge", "author_flair_text": null, "treatment_tags": [], "created_utc": 1616816779.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 1, "author_flair_background_color": null, "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}], "after": null, "before": null}}, "user_reports": [], "saved": false, "id": "gsdozky", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": true, "author": "The_disasterpeice", "can_mod_post": false, "send_replies": true, "parent_id": "t3_l7zp94", "score": 2, "author_fullname": "t2_93ke41tz", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "body": "Much drawing, very sketch. Ps I gave you the silver award.", "edited": false, "downs": 0, "author_flair_css_class": null, "is_submitter": false, "collapsed": false, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "&lt;div class=\"md\"&gt;&lt;p&gt;Much drawing, very sketch. Ps I gave you the silver award.&lt;/p&gt;\n&lt;/div&gt;", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": null, "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gsdozky/", "num_reports": null, "locked": false, "name": "t1_gsdozky", "created": 1616845451.0, "subreddit": "doge", "author_flair_text": null, "treatment_tags": [], "created_utc": 1616816651.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 0, "author_flair_background_color": null, "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}], "after": null, "before": null}}]


--------------------------------------------------------------------------------
/tests/test_llm.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pytest
  3 | from unittest.mock import MagicMock, patch, AsyncMock
  4 | 
  5 | from reddacted.llm_detector import LLMDetector
  6 | 
  7 | import asyncio
  8 | from typing import Dict, Any, List
  9 | 
 10 | SAMPLE_RESPONSE = {
 11 |     "has_pii": True,
 12 |     "confidence": 0.85,
 13 |     "details": ["Mentions specific location 'Miami Springs'"],
 14 |     "reasoning": "Location mention could help identify author's residence",
 15 |     "risk_factors": ["geographical specificity", "local slang reference"],
 16 | }
 17 | 
 18 | TEST_CASES = [
 19 |     {
 20 |         "text": "My phone number is 555-0123",
 21 |         "response": {
 22 |             "has_pii": True,
 23 |             "confidence": 0.95,
 24 |             "details": ["Contains phone number"],
 25 |             "risk_factors": ["contact_info"],
 26 |             "reasoning": "Phone number present",
 27 |         },
 28 |     },
 29 |     {
 30 |         "text": "I live at 123 Main St, Springfield",
 31 |         "response": {
 32 |             "has_pii": True,
 33 |             "confidence": 0.90,
 34 |             "details": ["Contains address"],
 35 |             "risk_factors": ["location"],
 36 |             "reasoning": "Street address present",
 37 |         },
 38 |     },
 39 |     {
 40 |         "text": "Just a regular comment about cats",
 41 |         "response": {
 42 |             "has_pii": False,
 43 |             "confidence": 0.1,
 44 |             "details": [],
 45 |             "risk_factors": [],
 46 |             "reasoning": "No PII detected",
 47 |         },
 48 |     },
 49 | ]
 50 | 
 51 | 
 52 | @pytest.fixture
 53 | def mock_responses() -> List[Dict[str, Any]]:
 54 |     """Fixture providing a list of test responses"""
 55 |     return [case["response"] for case in TEST_CASES]
 56 | 
 57 | 
 58 | @pytest.fixture
 59 | def mock_texts() -> List[str]:
 60 |     """Fixture providing a list of test texts"""
 61 |     return [case["text"] for case in TEST_CASES]
 62 | 
 63 | 
 64 | @pytest.fixture
 65 | def mock_api_error():
 66 |     """Fixture providing a mock API error"""
 67 |     return Exception("API Error: Rate limit exceeded")
 68 | 
 69 | 
 70 | @pytest.fixture
 71 | def mock_openai():
 72 |     """Fixture to provide mocked OpenAI client"""
 73 |     with patch("openai.AsyncOpenAI") as mock:
 74 |         mock_client = MagicMock()
 75 |         mock_client.chat = MagicMock()
 76 |         mock.return_value = mock_client
 77 |         yield mock
 78 | 
 79 | 
 80 | @pytest.fixture
 81 | def mock_completion():
 82 |     """Fixture to provide mocked completion response"""
 83 |     completion = MagicMock()
 84 |     message = MagicMock()
 85 |     message.content = json.dumps(SAMPLE_RESPONSE)
 86 |     choice = MagicMock()
 87 |     choice.message = message
 88 |     completion.choices = [choice]
 89 |     return completion
 90 | 
 91 | 
 92 | class TestLLMDetector:
 93 |     """Test suite for LLMDetector class"""
 94 | 
 95 |     @pytest.fixture(autouse=True)
 96 |     def setup_method(self):
 97 |         """Setup method run before each test"""
 98 |         self.detector = LLMDetector(api_key="sk-test")
 99 | 
100 |     @pytest.mark.asyncio
101 |     async def test_analyze_text_success(self, mock_openai, mock_completion):
102 |         """Test successful PII analysis with valid response"""
103 |         mock_openai.return_value.chat.completions.create = AsyncMock(return_value=mock_completion)
104 | 
105 |         detector = LLMDetector(api_key="sk-test")
106 |         risk_score, details = await detector.analyze_text(
107 |             "RaunchyRaccoon that looks a lot like Miami Springs!"
108 |         )
109 | 
110 |         assert risk_score == 0.85
111 |         assert details["details"] == SAMPLE_RESPONSE["details"]
112 |         assert details["risk_factors"] == SAMPLE_RESPONSE["risk_factors"]
113 |         mock_openai.assert_called_once_with(api_key="sk-test")
114 | 
115 |     @pytest.mark.asyncio
116 |     async def test_analyze_invalid_key(self, mock_openai):
117 |         """Test authentication error handling"""
118 |         mock_openai.side_effect = Exception("Invalid API key")
119 | 
120 |         risk_score, details = await self.detector.analyze_text("Sample text")
121 | 
122 |         assert risk_score == 0.0
123 |         assert "error" in details
124 |         assert "Invalid API key" in details["error"]
125 | 
126 |     @pytest.mark.asyncio
127 |     async def test_rate_limit_handling(self, mock_openai, mock_api_error):
128 |         """Test handling of rate limit errors"""
129 |         mock_openai.side_effect = mock_api_error
130 | 
131 |         risk_score, details = await self.detector.analyze_text("Test text")
132 | 
133 |         assert risk_score == 0.0
134 |         assert "error" in details
135 |         assert "Rate limit" in details["error"]
136 | 
137 |     @pytest.mark.asyncio
138 |     async def test_empty_text_handling(self):
139 |         """Test handling of empty text input"""
140 |         risk_score, details = await self.detector.analyze_text("")
141 | 
142 |         assert risk_score == 0.0
143 |         assert "error" in details
144 |         assert isinstance(details["error"], str)
145 | 
146 |     @pytest.mark.asyncio
147 |     async def test_long_text_handling(self):
148 |         """Test handling of very long text input"""
149 |         # Create text that exceeds token limit
150 |         long_text = "test " * 5000
151 | 
152 |         risk_score, details = await self.detector.analyze_text(long_text)
153 | 
154 |         assert risk_score == 0.0
155 |         assert "error" in details
156 | 
157 |     @pytest.mark.asyncio
158 |     async def test_batch_concurrent_processing(self, mock_openai, mock_responses, mock_texts):
159 |         """Test concurrent processing of batch texts"""
160 |         mock_completions = []
161 |         for response in mock_responses:
162 |             completion = MagicMock()
163 |             message = MagicMock()
164 |             message.content = json.dumps(response)
165 |             choice = MagicMock()
166 |             choice.message = message
167 |             completion.choices = [choice]
168 |             mock_completions.append(completion)
169 | 
170 |         mock_openai.return_value.chat.completions.create = AsyncMock(side_effect=mock_completions)
171 | 
172 |         results = await self.detector.analyze_batch(mock_texts)
173 | 
174 |         assert len(results) == len(mock_texts)
175 |         assert all(isinstance(score, float) for score, _ in results)
176 |         assert all(isinstance(detail, dict) for _, detail in results)
177 | 
178 |     @pytest.mark.asyncio
179 |     async def test_batch_error_handling(self, mock_openai, mock_texts, mock_api_error):
180 |         """Test error handling in batch processing"""
181 |         mock_openai.return_value.chat.completions.create = AsyncMock(side_effect=mock_api_error)
182 | 
183 |         results = await self.detector.analyze_batch(mock_texts)
184 | 
185 |         assert len(results) == len(mock_texts)
186 |         assert all(score == 0.0 for score, _ in results)
187 |         assert all("error" in detail for _, detail in results)
188 | 
189 |     @pytest.mark.asyncio
190 |     async def test_analyze_batch(self, mock_openai):
191 |         """Test batch processing of multiple texts"""
192 |         # Configure different mock responses for each text
193 |         responses = [
194 |             {
195 |                 "has_pii": True,
196 |                 "confidence": 0.9,
197 |                 "details": ["Contains location"],
198 |                 "risk_factors": ["location"],
199 |             },
200 |             {
201 |                 "has_pii": True,
202 |                 "confidence": 0.8,
203 |                 "details": ["Contains phone number"],
204 |                 "risk_factors": ["contact"],
205 |             },
206 |             {"has_pii": False, "confidence": 0.0, "details": [], "risk_factors": []},
207 |         ]
208 | 
209 |         async def mock_completion(*args, **kwargs):
210 |             # Get the input text from the API call
211 |             messages = kwargs.get("messages", [])
212 |             text_index = len(mock_completion.call_count)
213 |             mock_completion.call_count.append(1)  # Track number of calls
214 | 
215 |             # Create mock response
216 |             mock_msg = MagicMock()
217 |             mock_msg.content = json.dumps(responses[text_index])
218 |             mock_choice = MagicMock()
219 |             mock_choice.message = mock_msg
220 |             mock_resp = MagicMock()
221 |             mock_resp.choices = [mock_choice]
222 |             return mock_resp
223 | 
224 |         # Initialize call counter
225 |         mock_completion.call_count = []
226 |         mock_openai.return_value.chat.completions.create = AsyncMock(side_effect=mock_completion)
227 | 
228 |         detector = LLMDetector(api_key="sk-test")
229 |         texts = ["123 Main St, New York", "Call me at 555-0123", "Just a regular text"]
230 | 
231 |         results = await detector.analyze_batch(texts)
232 | 
233 |         # Verify results
234 |         assert len(results) == len(texts)
235 | 
236 |         # Check first result (location)
237 |         assert results[0][0] == 0.9
238 |         assert results[0][1]["risk_factors"] == ["location"]
239 | 
240 |         # Check second result (phone)
241 |         assert results[1][0] == 0.8
242 |         assert results[1][1]["risk_factors"] == ["contact"]
243 | 
244 |         # Check third result (clean)
245 |         assert results[2][0] == 0.0
246 |         assert results[2][1]["risk_factors"] == []
247 | 
248 |         # Verify API setup
249 |         mock_openai.assert_called_once_with(api_key="sk-test")
250 | 
251 |     @pytest.mark.asyncio
252 |     async def test_invalid_json_response(self, mock_openai):
253 |         """Test handling of malformed LLM response"""
254 |         # Create mock with invalid JSON response
255 |         mock_completion = MagicMock()
256 |         message = MagicMock()
257 |         message.content = "Not valid JSON"
258 |         mock_completion.choices = [MagicMock(message=message)]
259 |         mock_openai.return_value.chat.completions.create = AsyncMock(return_value=mock_completion)
260 | 
261 |         risk_score, details = await self.detector.analyze_text("Sample text")
262 | 
263 |         assert risk_score == 0.0
264 |         assert "error" in details
265 |         assert "Expecting value" in details["error"]
266 | 


--------------------------------------------------------------------------------
/tests/test_scraper.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import unittest
  3 | import json
  4 | 
  5 | from unittest.mock import MagicMock
  6 | 
  7 | from reddacted.api import scraper
  8 | 
  9 | 
 10 | class ScraperPostiveTestCases(unittest.TestCase):
 11 | 
 12 |     def setUp(self):
 13 |         super(ScraperPostiveTestCases, self).setUp()
 14 |         self.maxDiff = None
 15 | 
 16 |     def tearDown(self):
 17 |         super(ScraperPostiveTestCases, self).tearDown()
 18 | 
 19 |     def test_parse_user(self):
 20 |         # Mocking the Request for obtaining json from Reddit
 21 |         user_data = ""
 22 |         with open("tests/templates/reddit/user.json") as user_file:
 23 |             user_data = json.loads(user_file.read())
 24 |         valid_user_response = MagicMock(name="mock_response")
 25 |         valid_user_response.json = MagicMock(return_value=user_data)
 26 |         valid_user_response.status_code = 200
 27 |         requests.get = MagicMock(return_value=valid_user_response)
 28 | 
 29 |         expected = [
 30 |             {
 31 |                 "text": "Sure is! Appended it to my tweet.",
 32 |                 "upvotes": 1,
 33 |                 "downvotes": 0,
 34 |                 "id": "fnq55o3",
 35 |                 "permalink": "/r/WhitePeopleTwitter/comments/g35yge/the_battle_cry_of_a_generation/fnq55o3/",
 36 |             },
 37 |             {
 38 |                 "text": "Anyone not treating the animals properly, should simply be banned after a warning.",
 39 |                 "upvotes": 2,
 40 |                 "downvotes": 0,
 41 |                 "id": "eyvcagh",
 42 |                 "permalink": "/r/AmItheAsshole/comments/cyt2nl/aita_for_allowing_two_teenagers_to_be_spit_on_by/eyvcagh/",
 43 |             },
 44 |             {
 45 |                 "text": "Is it safe to shower using head and shoulders once per day?",
 46 |                 "upvotes": 4,
 47 |                 "downvotes": 0,
 48 |                 "id": "eyvbx4k",
 49 |                 "permalink": "/r/science/comments/cyx8s4/teen_went_blind_after_eating_only_pringles_fries/eyvbx4k/",
 50 |             },
 51 |             {
 52 |                 "text": "Its crazy how this flows so well in my mind",
 53 |                 "upvotes": 2,
 54 |                 "downvotes": 0,
 55 |                 "id": "ex2ovgj",
 56 |                 "permalink": "/r/memes/comments/cr86z6/dr_phil_review_this/ex2ovgj/",
 57 |             },
 58 |             {
 59 |                 "text": "Cuban's love the name fifi for girl dogs. Source: Me a Cuban American.",
 60 |                 "upvotes": 1,
 61 |                 "downvotes": 0,
 62 |                 "id": "ewx8paz",
 63 |                 "permalink": "/r/AskReddit/comments/cqdjg6/nonamericans_does_your_culture_have_oldfashioned/ewx8paz/",
 64 |             },
 65 |             {
 66 |                 "text": "You can simply follow the deployment guide for Ingress-NGINX, if that is the controller you are wanting to use. See [https://github.com/kubernetes/ingress-nginx/blob/master/docs/deploy/index.md](https://github.com/kubernetes/ingress-nginx/blob/master/docs/deploy/index.md)  When you create your ingress resource, you can specify the host as [www.example.com](https://www.example.com) and in your /etc/hosts you can put that URL as the clusterIP. Then send a curl using the \\`Host\\` header to verify. I made a tutorial a while back, using minikube, but the example should still work on an AWS cluster. See [https://medium.com/@awkwardferny/getting-started-with-kubernetes-ingress-nginx-on-minikube-d75e58f52b6c](https://medium.com/@awkwardferny/getting-started-with-kubernetes-ingress-nginx-on-minikube-d75e58f52b6c) Also if you still have questions, you can always post on [http://slack.k8s.io/](http://slack.k8s.io/) on the #ingress-nginx channel.",
 67 |                 "upvotes": 2,
 68 |                 "downvotes": 0,
 69 |                 "id": "ehsepvh",
 70 |                 "permalink": "/r/kubernetes/comments/awvv0h/how_to_create_an_ingress_controller_on_cluster/ehsepvh/",
 71 |             },
 72 |             {
 73 |                 "text": "Hey u/Jokkamo Seems like the syntax is off in the template. I created a blog about templating : [https://medium.com/@awkwardferny/golang-templating-made-easy-4d69d663c558](https://medium.com/@awkwardferny/golang-templating-made-easy-4d69d663c558). Hope it helps you!! You could also create a template function to examine currentTitle. ",
 74 |                 "upvotes": 1,
 75 |                 "downvotes": 0,
 76 |                 "id": "ee4r5v2",
 77 |                 "permalink": "/r/golang/comments/afxhvk/how_can_i_check_where_a_variable_defined_in_html/ee4r5v2/",
 78 |             },
 79 |             {
 80 |                 "text": "I guess that's a good one to add lol.",
 81 |                 "upvotes": 2,
 82 |                 "downvotes": 0,
 83 |                 "id": "e5ezchx",
 84 |                 "permalink": "/r/programming/comments/9d1fh5/bad_software_development_patterns_and_how_to_fix/e5ezchx/",
 85 |             },
 86 |             {
 87 |                 "text": "RaunchyRaccoon that looks a lot like Miami Springs!",
 88 |                 "upvotes": 1,
 89 |                 "downvotes": 0,
 90 |                 "id": "dmvmihx",
 91 |                 "permalink": "/r/HumansBeingBros/comments/6zgfvk/our_neighborhood_got_battered_by_irma_many/dmvmihx/",
 92 |             },
 93 |             {
 94 |                 "text": "If you can't find water anywhere, I thought of a solution. Simply buy some cheap sodas/tea and drain the soda away and fill it up with tap-water! Will at least keep you with some water.",
 95 |                 "upvotes": 2,
 96 |                 "downvotes": 0,
 97 |                 "id": "dmnmuve",
 98 |                 "permalink": "/r/Miami/comments/6ydvec/hurricane_irma_megathread_2_97/dmnmuve/",
 99 |             },
100 |             {
101 |                 "text": "You ever been in a storm? https://www.youtube.com/watch?v=Pr7Y0kZ67o0",
102 |                 "upvotes": 1,
103 |                 "downvotes": 0,
104 |                 "id": "dld5va0",
105 |                 "permalink": "/r/worldnews/comments/6sfvxd/trump_if_north_korea_escalates_nuclear_threat/dld5va0/",
106 |             },
107 |             {
108 |                 "text": "Officer Joseph.",
109 |                 "upvotes": 1,
110 |                 "downvotes": 0,
111 |                 "id": "dggdqs0",
112 |                 "permalink": "/r/funny/comments/6664cj/look_whos_taking_the_picture/dggdqs0/",
113 |             },
114 |         ]
115 | 
116 |         sc = scraper.Scraper()
117 |         result = sc.parse_user("awkwardferny")
118 | 
119 |         self.assertEqual(expected, result)
120 | 
121 |     def test_parse_listing(self):
122 |         # Mocking the Request for obtaining json from Reddit
123 |         article_data = ""
124 |         with open("tests/templates/reddit/article.json") as article_file:
125 |             article_data = json.loads(article_file.read())
126 |         valid_article_response = MagicMock(name="mock_response")
127 |         valid_article_response.json = MagicMock(return_value=article_data)
128 |         valid_article_response.status_code = 200
129 |         requests.get = MagicMock(return_value=valid_article_response)
130 | 
131 |         expected = [
132 |             {
133 |                 "text": "Looks sick!",
134 |                 "upvotes": 4,
135 |                 "downvotes": 0,
136 |                 "id": "glai61b",
137 |                 "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/glai61b/",
138 |             },
139 |             {
140 |                 "text": "#DOGE HOLD IT",
141 |                 "upvotes": 1,
142 |                 "downvotes": 0,
143 |                 "id": "glc7p50",
144 |                 "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/glc7p50/",
145 |             },
146 |             {
147 |                 "text": "10/10 very art such picasso wow",
148 |                 "upvotes": 1,
149 |                 "downvotes": 0,
150 |                 "id": "gladezi",
151 |                 "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gladezi/",
152 |             },
153 |             {
154 |                 "text": "Much drawing, very sketch. Ps I gave you the silver award.",
155 |                 "upvotes": 2,
156 |                 "downvotes": 0,
157 |                 "id": "gsdozky",
158 |                 "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gsdozky/",
159 |             },
160 |         ]
161 | 
162 |         sc = scraper.Scraper()
163 |         result = sc.parse_listing("doge", "l7zp94")
164 | 
165 |         self.assertEqual(expected, result)
166 | 


--------------------------------------------------------------------------------
/tests/test_sentiment.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | class SentimentPostiveTestCases(unittest.TestCase):
 5 | 
 6 |     def setUp(self):
 7 |         super(SentimentPostiveTestCases, self).setUp()
 8 |         self.maxDiff = None
 9 | 
10 |     def tearDown(self):
11 |         super(SentimentPostiveTestCases, self).tearDown()
12 | 


--------------------------------------------------------------------------------