├── .githooks └── pre-commit ├── .github └── workflows │ ├── lint.yml │ ├── smoke.yml │ └── unit.yml ├── .gitignore ├── LICENSE ├── README.md ├── pyproject.toml ├── reddacted ├── __init__.py ├── api │ ├── __init__.py │ ├── api.py │ ├── list_models.py │ ├── reddit.py │ └── scraper.py ├── cli.py ├── cli_config.py ├── llm_detector.py ├── pii_detector.py ├── results.py ├── sentiment.py ├── styles.py ├── textual_cli.py ├── textual_ui.py ├── ui │ ├── __init__.py │ ├── comment_actions.py │ └── details_screen.py ├── utils │ ├── __init__.py │ ├── analysis.py │ ├── base.py │ ├── log_handler.py │ ├── panels.py │ ├── progress.py │ ├── report.py │ └── tables.py └── version.py ├── release.sh ├── requirements-dev.txt ├── requirements.txt └── tests ├── __init__.py ├── templates └── reddit │ ├── article.json │ └── user.json ├── test_llm.py ├── test_scraper.py └── test_sentiment.py /.githooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Remove trailing whitespace from all staged files 3 | 4 | # Get list of staged files 5 | staged_files=$(git diff --cached --name-only --diff-filter=ACMR | sed 's| |\\ |g') 6 | 7 | # Remove trailing whitespace for each file 8 | for file in $staged_files; do 9 | # Only process text files 10 | if file --mime-encoding "$file" | grep -qE '(us-ascii|utf-8)'; then 11 | # Portable whitespace removal using awk 12 | awk '{ sub(/[ \t]+$/, ""); print }' "$file" > "$file.tmp" && 13 | mv "$file.tmp" "$file" 14 | fi 15 | done 16 | 17 | # Re-add modified files to staging 18 | git add -u 19 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: "Lint" 2 | 3 | on: 4 | push: 5 | branches: [ main, master ] 6 | pull_request: 7 | branches: [ main, master ] 8 | merge_group: 9 | 10 | jobs: 11 | lint: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python 3.12 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: "3.12" 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install flake8 pytest 23 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 24 | - name: Lint with flake8 25 | run: | 26 | # stop the build if there are Python syntax errors or undefined names 27 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 28 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 29 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 30 | -------------------------------------------------------------------------------- /.github/workflows/smoke.yml: -------------------------------------------------------------------------------- 1 | name: "Smoke Tests" 2 | 3 | on: 4 | push: 5 | branches: [ main, master ] 6 | pull_request: 7 | branches: [ main, master ] 8 | merge_group: 9 | 10 | jobs: 11 | smoke-tests: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python 3.12 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: "3.12" 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install uv 23 | pip install -e ".[dev]" 24 | - name: Run tests 25 | run: | 26 | pytest tests 27 | -------------------------------------------------------------------------------- /.github/workflows/unit.yml: -------------------------------------------------------------------------------- 1 | name: "Unit Tests" 2 | 3 | on: 4 | push: 5 | branches: [ main, master ] 6 | pull_request: 7 | branches: [ main, master ] 8 | merge_group: 9 | 10 | jobs: 11 | unit-tests: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.11", "3.12"] 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | cache: 'pip' 24 | 25 | - name: Install package and dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install -e ".[dev]" 29 | pip install pytest-asyncio 30 | 31 | - name: Run tests 32 | run: | 33 | pytest tests/ -v 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Virtual Environment 24 | .env 25 | .venv 26 | env/ 27 | venv/ 28 | ENV/ 29 | 30 | # IDE 31 | .idea/ 32 | .vscode/ 33 | *.swp 34 | *.swo 35 | 36 | # Testing 37 | .coverage 38 | htmlcov/ 39 | .pytest_cache/ 40 | .mypy_cache/ 41 | .tox/ 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | .hypothesis/ 46 | 47 | # Project specific 48 | *.log 49 | output/ 50 | .DS_Store 51 | .aider* 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Taylor Wilsdon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🛡️ reddacted 2 | 3 |
4 | 5 | ### AI-Powered Reddit Privacy Suite 6 | 7 | [![Privacy Shield](https://img.shields.io/badge/Privacy-100%25_Client--Side_Processing-success)](https://github.com/taylorwilsdon) 8 | [![AI Analysis](https://img.shields.io/badge/AI-PII_Detection-blueviolet)](https://github.com/taylorwilsdon/reddacted) 9 | ![GitHub License](https://img.shields.io/github/license/taylorwilsdon/reddacted) 10 | ![PyPI - Version](https://img.shields.io/pypi/v/reddacted) 11 | [![PyPI Downloads](https://static.pepy.tech/badge/reddacted)](https://pepy.tech/projects/reddacted) 12 | 13 |

Local LLM powered, highly performant privacy analysis leveraging AI, sentiment analysis & PII detection
to provide insights into your true privacy with bulk remediation

14 | 15 |

For aging engineers who want to protect their future political careers 🏛️

16 | 17 |
18 | 19 |
20 | reddacted demo 21 |
22 | 23 |
24 | 25 | 26 |
27 | 28 | ## ✨ Key Features 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 |
🛡️
PII Detection
Analyze the content of comments to identify anything that might reveal PII that you may not want correlated with your anonymous username
🤫
Sentiment Analysis
Understand the emotional tone of your Reddit history, combined with upvote/downvote counts & privacy risks to choose which posts to reddact
🔒
Zero-Trust Architecture
Client-side execution only, no data leaves your machine unless you choose to use a hosted API. Fully compatible with all OpenAI compatible endpoints

Self-Host Ready
Use any model via Ollama, llama.cpp, vLLM or other platform capable of exposing an OpenAI-compatible endpoint. LiteLLM works just dandy.
📊
Smart Cleanup
Preserve valuable contributions while removing risky content - clean up your online footprint without blowing away everything
52 | 53 | ## 🔐 Can I trust this with my data? 54 | 55 |
56 |

You don't have to - read the code for yourself, only reddit is called

57 |
58 | 59 | ```bash 60 | reddacted user yourusername --local-llm "http://localhost:11434" 61 | ``` 62 | 63 | - ✅ Client-side execution only, no tracking or external calls 64 | - ✅ Session-based authentication if you choose - it is optional unless you want to delete 65 | - ✅ Keep your nonsense comments with lots of upvotes and good vibes without unintentionally doxing yourself 66 | 67 | ```bash 68 | reddacted user taylorwilsdon --limit 3 69 | ``` 70 | 71 | ## 📋 Table of Contents 72 | 73 | - [Key Features](#-key-features) 74 | - [Can I trust this with my data?](#-can-i-trust-this-with-my-data) 75 | - [Installation](#-installation) 76 | - [Usage](#-usage) 77 | - [Available Commands](#available-commands) 78 | - [Common Arguments](#common-arguments) 79 | - [LLM Configuration](#llm-configuration) 80 | - [How accurate is the PII detection?](#-how-accurate-is-the-pii-detection-really) 81 | - [FAQ](#-faq) 82 | - [Troubleshooting](#-troubleshooting) 83 | - [Authentication](#-authentication) 84 | - [Advanced Usage](#-advanced-usage) 85 | - [Development](#-development) 86 | - [Testing](#-testing) 87 | - [Common Exceptions](#-common-exceptions) 88 | - [Support & Community](#-support--community) 89 | 90 | ## 📥 Installation 91 | 92 | ```bash 93 | # Install from brew (recommended) 94 | brew install taylorwilsdon/tap/reddacted 95 | 96 | # Install from PyPI (recommended) 97 | pip install reddacted 98 | 99 | # Or install from source 100 | git clone https://github.com/taylorwilsdon/reddacted.git 101 | cd reddacted 102 | pip install -e ".[dev]" # Installs with development dependencies 103 | ``` 104 | 105 | ## 🚀 Usage 106 | 107 | ```bash 108 | # Most basic possible quick start - this will walk you through selecting your LLM in the command line 109 | reddacted user spez 110 | 111 | # Analyze a user's recent comments with local LLM specified 112 | reddacted user spez \ 113 | --limit 5 \ 114 | --local-llm "http://localhost:11434" \ 115 | --model "qwen2.5:3b" \ 116 | --sort new 117 | 118 | # Analyze controversial comments with OpenAI 119 | export OPENAI_API_KEY="your-api-key" 120 | reddacted user spez \ 121 | --sort controversial \ 122 | --time month \ 123 | --model "gpt-4" \ 124 | --limit 10 \ 125 | --pii-only 126 | 127 | # Analyze a specific subreddit post with PII filter disabled 128 | reddacted listing r/privacy abc123 \ 129 | --local-llm "http://localhost:11434" \ 130 | --model "qwen2.5:3b" \ 131 | --disable-pii \ 132 | --sort new 133 | 134 | # Search for specific content (requires auth) 135 | reddacted user spez \ 136 | --enable-auth \ 137 | --text-match "python" \ 138 | --skip-text "deleted" \ 139 | --sort top \ 140 | --time all 141 | 142 | # Bulk comment management 143 | reddacted delete abc123,def456 --batch-size 5 # Delete comments 144 | reddacted update abc123,def456 # Replace with standard redaction message 145 | reddacted update abc123,def456 --use-random-string # Replace with random UUID 146 | ``` 147 | 148 | ### Available Commands 149 | 150 | | Command | Description | 151 | |---------|-------------| 152 | | `user` | Analyze a user's comment history | 153 | | `listing` | Analyze a specific post and its comments | 154 | | `delete` | Delete comments by their IDs | 155 | | `update` | Replace comment content with r/reddacted | 156 | 157 | ### Common Arguments 158 | 159 | | Argument | Description | 160 | |----------|-------------| 161 | | `--limit N` | Maximum comments to analyze (default: 100, 0 for unlimited) | 162 | | `--sort` | Sort method: hot, new, controversial, top (default: new) | 163 | | `--time` | Time filter: all, day, hour, month, week, year (default: all) | 164 | | `--output-file` | Save detailed analysis to a file | 165 | | `--enable-auth` | Enable Reddit API authentication | 166 | | `--disable-pii` | Skip PII detection | 167 | | `--pii-only` | Show only comments containing PII | 168 | | `--text-match` | Search for comments containing specific text | 169 | | `--skip-text` | Skip comments containing specific text pattern | 170 | | `--batch-size` | Comments per batch for delete/update (default: 10) | 171 | | `--use-random-string` | Use random UUID instead of standard message when updating comments | 172 | 173 | ### LLM Configuration 174 | 175 | | Argument | Description | 176 | |----------|-------------| 177 | | `--local-llm URL` | Local LLM endpoint (OpenAI compatible) | 178 | | `--openai-key KEY` | OpenAI API key | 179 | | `--openai-base URL` | Custom OpenAI API base URL | 180 | | `--model NAME` | Model to use (default: gpt-4 for OpenAI) | 181 | 182 |
183 | Note: For cloud-based analysis using OpenAI, you can either use the --openai-key flag or set the environment variable: 184 | 185 | ```bash 186 | export OPENAI_API_KEY="your-api-key" 187 | ``` 188 |
189 | 190 | ## ❓ How accurate is the PII detection, really? 191 | 192 | Surprisingly good. Good enough that I run it against my own stuff in delete mode. It's basically a defense-in-depth approach combining these methods: 193 | 194 |
195 |
196 |

📊 AI Detection

197 |

Doesn't need a crazy smart model, don't waste your money on r1 or o1.

198 | 203 |
204 | 205 |
206 |

🔍 Pattern Matching

207 |

50+ regex rules for common PII formats does a first past sweep for the obvious stuff

208 |
209 | 210 |
211 |

🧠 Context Analysis

212 |

Are you coming off as a dick? Perhaps that factors into your decision to clean up. Who could say, mine are all smiley faces.

213 |
214 |
215 | 216 | ## 💡 FAQ 217 | 218 |
219 | Q: How does the AI handle false positives? 220 |

Adjust confidence threshold (default 0.7) per risk tolerance. You're building a repo from source off some random dude's github - don't run this and just delete a bunch of stuff blindly, you're a smart person. Review your results, and if it is doing something crazy, please tell me.

221 |
222 | 223 |
224 | Q: What LLMs are supported? 225 |

Local: any model via Ollama, vLLM or other platform capable of exposing an openai-compatible endpoint.
226 | Cloud: OpenAI-compatible endpoints

227 |
228 | 229 |
230 | Q: Is my data sent externally? 231 |

If you choose to use a hosted provider, yes - in cloud mode - local analysis stays fully private.

232 |
233 | 234 | ## 🔧 Troubleshooting 235 | 236 | If you get "command not found" after installation: 237 | 238 | 1. Check Python scripts directory is in your PATH: 239 | 240 | ```bash 241 | # Typical Linux/Mac location 242 | export PATH="$HOME/.local/bin:$PATH" 243 | 244 | # Typical Windows location 245 | set PATH=%APPDATA%\Python\Python311\Scripts;%PATH% 246 | ``` 247 | 248 | 2. Verify installation location: 249 | 250 | ```bash 251 | pip show reddacted 252 | ``` 253 | 254 | ## 🔑 Authentication 255 | 256 | Before running any commands that require authentication, you'll need to set up your Reddit API credentials: 257 | 258 |
259 |
260 |

Step 1: Create a Reddit Account

261 |

If you don't have one, sign up at https://www.reddit.com/account/register/

262 |
263 | 264 |
265 |

Step 2: Create a Reddit App

266 | 274 |
275 | 276 |
277 |

Step 3: Get Your Credentials

278 |

After creating the app, note down:

279 | 283 |
284 | 285 |
286 |

Step 4: Set Environment Variables

287 | 288 | ```bash 289 | export REDDIT_USERNAME=your-reddit-username 290 | export REDDIT_PASSWORD=your-reddit-password 291 | export REDDIT_CLIENT_ID=your-client-id 292 | export REDDIT_CLIENT_SECRET=your-client-secret 293 | ``` 294 |
295 |
296 | 297 | These credentials are also automatically used if all environment variables are present, even without the `--enable-auth` flag. 298 | 299 | ## 🧙‍♂️ Advanced Usage 300 | 301 | ### Text Filtering 302 | 303 | You can filter comments using these arguments: 304 | 305 | | Argument | Description | 306 | |----------|-------------| 307 | | `--text-match "search phrase"` | Only analyze comments containing specific text (requires authentication) | 308 | | `--skip-text "skip phrase"` | Skip comments containing specific text pattern | 309 | 310 | For example: 311 | 312 | ```bash 313 | # Only analyze comments containing "python" 314 | reddacted user spez --text-match "python" 315 | 316 | # Skip comments containing "deleted" 317 | reddacted user spez --skip-text "deleted" 318 | 319 | # Combine both filters 320 | reddacted user spez --text-match "python" --skip-text "deleted" 321 | ``` 322 | 323 | ## 👨‍💻 Development 324 | 325 | This project uses [UV](https://github.com/astral-sh/uv) for building and publishing. Here's how to set up your development environment: 326 | 327 | 1. Create and activate a virtual environment: 328 | 329 | ```bash 330 | python -m venv venv 331 | source venv/bin/activate # On Windows: venv\Scripts\activate 332 | ``` 333 | 334 | 2. Install UV: 335 | 336 | ```bash 337 | pip install uv 338 | ``` 339 | 340 | 3. Install in development mode with test dependencies: 341 | 342 | ```bash 343 | pip install -e ".[dev]" 344 | ``` 345 | 346 | 4. Build the package: 347 | 348 | ```bash 349 | uv build --sdist --wheel 350 | ``` 351 | 352 | 5. Create a new release: 353 | 354 | ```bash 355 | ./release.sh 356 | ``` 357 | 358 | The release script will: 359 | - Build the package with UV 360 | - Create and push a git tag 361 | - Create a GitHub release 362 | - Update the Homebrew formula 363 | - Publish to PyPI (optional) 364 | 365 | That's it! The package handles all other dependencies automatically, including NLTK data. 366 | 367 | ## 🧪 Testing 368 | 369 | Run the test suite: 370 | 371 | ```bash 372 | pytest tests 373 | ``` 374 | 375 | Want to contribute? Great! Feel free to: 376 | - Open an Issue 377 | - Submit a Pull Request 378 | 379 | ## ⚠️ Common Exceptions 380 | 381 |
382 |
383 |

too many requests

384 |

If you're unauthenticated, reddit has relatively low rate limits for it's API. Either authenticate against your account, or just wait a sec and try again.

385 |
386 | 387 |
388 |

the page you requested does not exist

389 |

Simply a 404, which means that the provided username does not point to a valid page.

390 |
391 |
392 | 393 | > **Pro Tip**: Always review changes before executing deletions! 394 | 395 | ## 🌐 Support & Community 396 | 397 |
398 |

Join our subreddit: r/reddacted

399 |
400 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=68.0.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.uv] 6 | # Configuration for UV package build 7 | package = true 8 | 9 | [project] 10 | name = "reddacted" 11 | version = "0.2.5" 12 | description = "Analyze Reddit comments for PII and other sensitive information using local or OpenAI API compatible LLMs and perform sentiment analysis, edit and remove comments." 13 | readme = "README.md" 14 | requires-python = ">=3.11" 15 | license = {file = "LICENSE"} 16 | authors = [ 17 | {name = "Taylor Wilsdon", email = "taylor@taylorwilsdon.com"} 18 | ] 19 | keywords = ["reddact", "reddacted", "reddit", "llm", "pii", "sentiment", "analysis", "nlp"] 20 | classifiers = [ 21 | "Development Status :: 3 - Alpha", 22 | "Intended Audience :: Science/Research", 23 | "License :: OSI Approved :: MIT License", 24 | "Programming Language :: Python :: 3", 25 | "Programming Language :: Python :: 3.11", 26 | "Environment :: Console", 27 | "Topic :: Text Processing :: Linguistic", 28 | "Topic :: Scientific/Engineering :: Artificial Intelligence" 29 | ] 30 | 31 | dependencies = [ 32 | "autopage>=0.5.2", 33 | "praw>=7.7.1", 34 | "nltk>=3.8.1", 35 | "requests>=2.31.0", 36 | "six>=1.16.0", 37 | "httpx==0.27.2", 38 | "openai>=1.3.7", 39 | "rich>=13.7.0", 40 | "textual==2.1.2", 41 | "urllib3==2.3.0" 42 | ] 43 | 44 | [project.urls] 45 | "Bug Tracker" = "https://github.com/taylorwilsdon/reddacted/issues" 46 | 47 | [project.entry-points."reddacted.commands"] 48 | listing = "reddacted.cli:Listing" 49 | user = "reddacted.cli:User" 50 | 51 | [project.optional-dependencies] 52 | dev = [ 53 | "pytest", 54 | "black", 55 | "mypy", 56 | "ruff", 57 | "uv", 58 | "tomli>=2.0.0" # Required for Homebrew formula generation 59 | ] 60 | 61 | [project.scripts] 62 | reddacted = "reddacted.cli:main" 63 | 64 | [tool.setuptools] 65 | packages = ["reddacted", "reddacted.ui", "reddacted.api", "reddacted.utils"] 66 | 67 | [tool.black] 68 | line-length = 100 69 | target-version = ['py311'] 70 | 71 | [tool.ruff] 72 | line-length = 100 73 | target-version = "py311" 74 | select = ["E", "F", "B", "I"] 75 | 76 | [tool.mypy] 77 | python_version = "3.11" 78 | warn_return_any = true 79 | warn_unused_configs = true 80 | disallow_untyped_defs = true 81 | 82 | [tool.pytest.ini_options] 83 | asyncio_mode = "auto" 84 | testpaths = ["tests"] 85 | python_files = "test_*.py" 86 | python_classes = "Test*" 87 | python_functions = "test_*" 88 | addopts = "-v --tb=short" 89 | markers = [ 90 | "asyncio: mark test as async/await test", 91 | ] 92 | asyncio_default_fixture_loop_scope = "function" 93 | -------------------------------------------------------------------------------- /reddacted/__init__.py: -------------------------------------------------------------------------------- 1 | """reddacted - AI-Powered Reddit Privacy Suite""" 2 | 3 | __version__ = "0.2.5" 4 | -------------------------------------------------------------------------------- /reddacted/api/__init__.py: -------------------------------------------------------------------------------- 1 | from .list_models import * 2 | -------------------------------------------------------------------------------- /reddacted/api/api.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import six 3 | 4 | 5 | @six.add_metaclass(abc.ABCMeta) 6 | class API(object): 7 | """Base API Interface 8 | 9 | The API is responsible for gathering data to perform a sentiment 10 | analysis on. 11 | """ 12 | 13 | @abc.abstractmethod 14 | def parse_listing(self, subreddit, article, limit=100, **kwargs): 15 | """Parses a Listing Reddit Object. 16 | 17 | Args: 18 | subreddit: Subreddit to parse 19 | article: Article ID to parse 20 | limit: Maximum number of comments to return (None for unlimited) 21 | """ 22 | pass 23 | 24 | @abc.abstractmethod 25 | def parse_user(self, username, limit=100, **kwargs): 26 | """Parses a User Reddit Object. 27 | 28 | Args: 29 | username: Username to parse 30 | limit: Maximum number of comments to return (None for unlimited) 31 | """ 32 | pass 33 | -------------------------------------------------------------------------------- /reddacted/api/list_models.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from typing import List, Optional 3 | 4 | class ModelFetchError(Exception): 5 | """Custom exception for errors during model fetching.""" 6 | pass 7 | 8 | def fetch_available_models(base_url: str, api_key: Optional[str] = None) -> List[str]: 9 | """ 10 | Fetches available LLM models from a given base URL (OpenAI compatible API). 11 | 12 | Args: 13 | base_url: The base URL of the LLM API (e.g., http://localhost:11434 or https://api.openai.com). 14 | api_key: Optional API key required for authentication (e.g., OpenAI). 15 | 16 | Returns: 17 | A list of available model IDs. 18 | 19 | Raises: 20 | ModelFetchError: If there's an error fetching or parsing the models. 21 | requests.exceptions.RequestException: If there's a connection error. 22 | """ 23 | models_url = f"{base_url.rstrip('/')}/v1/models" # Ensure correct endpoint path 24 | headers = {"User-Agent": "Reddacted-Config-UI"} 25 | if api_key: 26 | headers["Authorization"] = f"Bearer {api_key}" 27 | 28 | try: 29 | response = requests.get(models_url, headers=headers, timeout=10) # Added timeout 30 | response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx) 31 | 32 | models_data = response.json() 33 | available_models = [m.get("id") for m in models_data.get("data", []) if m.get("id")] 34 | 35 | if not available_models: 36 | raise ModelFetchError("No available models found in the API response.") 37 | 38 | return available_models 39 | 40 | except requests.exceptions.HTTPError as e: 41 | raise ModelFetchError(f"HTTP Error fetching models: {e.response.status_code} - {e.response.text}") from e 42 | except requests.exceptions.ConnectionError as e: 43 | raise ModelFetchError(f"Connection error fetching models from {models_url}: {e}") from e 44 | except requests.exceptions.Timeout as e: 45 | raise ModelFetchError(f"Timeout fetching models from {models_url}: {e}") from e 46 | except requests.exceptions.RequestException as e: 47 | raise ModelFetchError(f"Error fetching models from {models_url}: {e}") from e 48 | except (ValueError, KeyError) as e: # Handle potential JSON parsing or key errors 49 | raise ModelFetchError(f"Error parsing model response from {models_url}: {e}") from e -------------------------------------------------------------------------------- /reddacted/api/reddit.py: -------------------------------------------------------------------------------- 1 | from types import BuiltinMethodType 2 | import time 3 | import os 4 | from typing import List, Dict, Any, Optional # Added Optional 5 | from typing import List, Dict, Any 6 | import uuid # Added for random string generation 7 | import praw 8 | from reddacted.api import api 9 | from reddacted.utils.log_handler import get_logger, with_logging 10 | from reddacted.utils.log_handler import handle_exception 11 | 12 | logger = get_logger(__name__) 13 | 14 | 15 | class AuthenticationRequiredError(Exception): 16 | """Raised when authentication is required but not configured""" 17 | 18 | pass 19 | 20 | 21 | @with_logging(logger) 22 | class Reddit(api.API): 23 | """The Reddit Class obtains data to perform sentiment analysis on 24 | using the Reddit API. 25 | 26 | It allows an unauthenticated user to obtain data to analyze various 27 | reddit objects. 28 | """ 29 | 30 | def __init__(self, config: Optional[Dict[str, Any]] = None, use_random_string=False): 31 | """Initialize Reddit API client. Prioritizes credentials from config, 32 | then environment variables, falling back to read-only mode. 33 | 34 | Args: 35 | config: Optional dictionary containing configuration values (including credentials). 36 | use_random_string: Whether to use random UUIDs instead of standard message when updating comments. 37 | """ 38 | self.authenticated = False 39 | self.reddit = None 40 | self.use_random_string = use_random_string 41 | config = config or {} # Ensure config is a dict 42 | 43 | logger.debug_with_context(f"Initializing Reddit client. Config provided: {bool(config)}, Use random string: {use_random_string}") 44 | 45 | # --- Try credentials from config first --- 46 | username = config.get("reddit_username") 47 | password = config.get("reddit_password") 48 | client_id = config.get("reddit_client_id") 49 | client_secret = config.get("reddit_client_secret") 50 | 51 | # Check if enable_auth is explicitly True in config, otherwise don't use config creds 52 | auth_enabled_in_config = config.get("enable_auth", False) 53 | 54 | if auth_enabled_in_config and all([username, password, client_id, client_secret]): 55 | logger.info_with_context("Attempting authentication using credentials from configuration (auth enabled).") 56 | try: 57 | self.reddit = praw.Reddit( 58 | client_id=client_id, 59 | client_secret=client_secret, 60 | password=password, 61 | user_agent=f"reddacted u/{username}", 62 | username=username, 63 | check_for_async=False, 64 | ) 65 | logger.info_with_context("Successfully authenticated with Reddit API using configuration.") 66 | logger.debug_with_context(f"Granted scopes (config auth): {self.reddit.auth.scopes()}") # Log scopes 67 | self.authenticated = True 68 | return # Exit if successful 69 | except Exception as e: 70 | logger.warning_with_context(f"Authentication with config credentials failed: {e}. Falling back...") 71 | # Continue to try environment variables 72 | elif not auth_enabled_in_config and any([username, password, client_id, client_secret]): 73 | logger.info_with_context("Credentials found in config, but 'enable_auth' is false. Skipping config auth attempt.") 74 | 75 | 76 | # --- Fallback to environment variables --- 77 | logger.debug_with_context("Checking environment variables for Reddit credentials.") 78 | env_username = os.environ.get("REDDIT_USERNAME") 79 | env_password = os.environ.get("REDDIT_PASSWORD") 80 | env_client_id = os.environ.get("REDDIT_CLIENT_ID") 81 | env_client_secret = os.environ.get("REDDIT_CLIENT_SECRET") 82 | 83 | if all([env_username, env_password, env_client_id, env_client_secret]): 84 | # Only use env vars if config auth wasn't explicitly enabled and successful 85 | if not (auth_enabled_in_config and self.authenticated): 86 | logger.info_with_context("Attempting authentication using credentials from environment variables.") 87 | try: 88 | self.reddit = praw.Reddit( 89 | client_id=env_client_id, 90 | client_secret=env_client_secret, 91 | password=env_password, 92 | user_agent=f"reddacted u/{env_username}", 93 | username=env_username, 94 | check_for_async=False, 95 | ) 96 | logger.info_with_context("Successfully authenticated with Reddit API using environment variables.") 97 | logger.debug_with_context(f"Granted scopes (env auth): {self.reddit.auth.scopes()}") # Log scopes 98 | self.authenticated = True 99 | return # Exit if successful 100 | except Exception as e: 101 | logger.warning_with_context(f"Authentication with environment variable credentials failed: {e}. Falling back...") 102 | # Continue to try read-only 103 | else: 104 | logger.debug_with_context("Skipping environment variable auth attempt as config auth was enabled and successful.") 105 | 106 | # --- Fallback to read-only mode --- 107 | if not self.authenticated: # Only attempt read-only if not already authenticated 108 | missing_sources = [] 109 | if not auth_enabled_in_config or not all([username, password, client_id, client_secret]): 110 | missing_sources.append("configuration") 111 | if not all([env_username, env_password, env_client_id, env_client_secret]): 112 | missing_sources.append("environment variables") 113 | 114 | logger.warning_with_context( 115 | f"Reddit API authentication credentials not found or incomplete in { ' or '.join(missing_sources) }. " 116 | "Falling back to read-only mode. Some features like comment deletion/update will be unavailable." 117 | ) 118 | try: 119 | # Use client_id/secret from config OR env vars if available for read-only 120 | read_only_client_id = config.get("reddit_client_id") or env_client_id 121 | read_only_client_secret = config.get("reddit_client_secret") or env_client_secret 122 | 123 | if read_only_client_id and read_only_client_secret: 124 | logger.debug_with_context("Attempting read-only initialization with client_id/secret.") 125 | self.reddit = praw.Reddit( 126 | client_id=read_only_client_id, 127 | client_secret=read_only_client_secret, 128 | user_agent="reddacted:read_only_client_v3" # Updated user agent slightly 129 | ) 130 | logger.info_with_context("Successfully initialized read-only Reddit client (with client ID/secret).") 131 | elif read_only_client_id: 132 | logger.debug_with_context("Attempting read-only initialization with client_id only.") 133 | self.reddit = praw.Reddit( 134 | client_id=read_only_client_id, 135 | user_agent="reddacted:read_only_client_v3" 136 | ) 137 | logger.info_with_context("Successfully initialized read-only Reddit client (with client ID only).") 138 | else: 139 | # PRAW requires at least client_id for read-only access usually. 140 | # If neither config nor env vars provide it, initialization will likely fail here. 141 | logger.error_with_context("Cannot initialize read-only Reddit client: Missing 'client_id' in both config and environment variables.") 142 | # Optionally, raise an error or let the PRAW error propagate 143 | # raise ValueError("Missing required client_id for Reddit API access.") 144 | # For now, let PRAW handle the potential error if it occurs without client_id 145 | self.reddit = praw.Reddit(user_agent="reddacted:read_only_client_v3") # This line might fail 146 | logger.info_with_context("Attempted read-only Reddit client initialization (without client ID/secret - may fail).") 147 | 148 | 149 | except Exception as e: 150 | # Log the specific PRAW error if initialization fails 151 | logger.error_with_context(f"Failed to initialize read-only client: {str(e)}") 152 | # self.reddit remains None 153 | 154 | @with_logging(logger) 155 | def parse_listing(self, subreddit, article, limit=100, **kwargs): 156 | """Parses a listing and extracts the comments from it. 157 | 158 | :param subreddit: a subreddit 159 | :param article: an article associated with the subreddit 160 | :param limit: maximum number of comments to return (None for unlimited) 161 | :return: a list of comments from an article 162 | """ 163 | if self.reddit is None: 164 | logger.error_with_context("Reddit client initialization failed - cannot fetch comments") 165 | return [] 166 | 167 | mode = "authenticated" if self.authenticated else "read-only" 168 | logger.info_with_context(f"Fetching comments for article '{article}' in {mode} mode") 169 | logger.debug_with_context( 170 | f"Parsing listing for subreddit={subreddit}, article={article}, limit={limit}" 171 | ) 172 | submission = self.reddit.submission(id=article) 173 | logger.debug_with_context(f"Retrieved submission: title='{submission.title}'") 174 | logger.debug_with_context("Expanding 'more comments' links") 175 | submission.comments.replace_more(limit=None) 176 | comments = [] 177 | 178 | for comment in submission.comments.list(): 179 | comment_data = { 180 | "text": comment.body.rstrip(), 181 | "upvotes": comment.ups, 182 | "downvotes": comment.downs, 183 | "permalink": comment.permalink, 184 | "id": comment.id, 185 | } 186 | logger.debug_with_context( 187 | f"Processing comment: ups={comment.ups}, downs={comment.downs}, text_preview='{comment.body[:50]}...'" 188 | ) 189 | comments.append(comment_data) 190 | 191 | return comments[:limit] if limit else comments 192 | 193 | def _process_comments( 194 | self, 195 | comment_ids: list[str], 196 | action: str, 197 | batch_size: int = 10, 198 | update_content: str = None, # Added parameter for update text 199 | ) -> dict[str, any]: 200 | """ 201 | Process comments in batches with rate limiting. 202 | 203 | :param comment_ids: List of comment IDs to process. 204 | :param action: Action to perform ('delete' or 'update'). 205 | :param batch_size: Number of comments to process per batch. 206 | :param update_content: The text to use when updating comments (only used if action='update'). 207 | :return: Dict with results and statistics. 208 | """ 209 | logger.debug_with_context("Starting _process_comments") 210 | if not self.authenticated: 211 | raise AuthenticationRequiredError(f"Full authentication required for comment {action}") 212 | 213 | results = { 214 | "processed": 0, 215 | "success": 0, 216 | "failures": 0, 217 | "successful_ids": [], 218 | "failed_ids": [], 219 | "errors": [], 220 | } 221 | 222 | for i in range(0, len(comment_ids), batch_size): 223 | batch = comment_ids[i : i + batch_size] 224 | try: 225 | for comment_id in batch: 226 | try: 227 | comment = self.reddit.comment(id=comment_id) 228 | if action == "delete": 229 | logger.debug_with_context(f"Deleting comment ID {comment.id}") # Use comment_id for clarity 230 | comment.delete() 231 | results["successful_ids"].append(comment_id) 232 | results["success"] += 1 233 | elif action == "update": 234 | logger.debug_with_context(f"Updating comment ID {comment.id} with content: '{update_content[:50]}...'") # Use comment_id 235 | if update_content is None: 236 | # Should not happen if called via update_comments, but provides a fallback. 237 | logger.warning_with_context(f"No update_content provided for comment {comment_id}, skipping edit.") 238 | else: 239 | comment.edit(update_content) 240 | results["successful_ids"].append(comment_id) 241 | results["success"] += 1 242 | except Exception as e: 243 | results["failures"] += 1 244 | results["failed_ids"].append(comment_id) 245 | results["errors"].append({"comment_id": comment_id, "error": str(e)}) 246 | # Respect Reddit's API rate limit (1 req/sec) 247 | time.sleep(1.1) 248 | 249 | results["processed"] += len(batch) 250 | except praw.exceptions.APIException as e: 251 | handle_exception(e, "Reddit API Rate Limit Exceeded") 252 | time.sleep(60) # Wait 1 minute before retrying 253 | continue 254 | 255 | return results 256 | 257 | def delete_comments(self, comment_ids: list[str], batch_size: int = 10) -> dict[str, any]: 258 | """ 259 | Delete comments in batches with rate limiting 260 | :param comment_ids: List of comment IDs to delete 261 | :param batch_size: Number of comments to process per batch 262 | :return: Dict with results and statistics 263 | """ 264 | return self._process_comments(comment_ids, "delete", batch_size) 265 | 266 | def update_comments( 267 | self, 268 | comment_ids: list[str], 269 | batch_size: int = 10, 270 | use_random_string: bool = None, # Can be explicitly provided or use instance default 271 | ) -> dict[str, any]: 272 | """ 273 | Update comments in batches with rate limiting. 274 | 275 | Replaces content either with a standard redaction message or a random UUID. 276 | 277 | :param comment_ids: List of comment IDs to update. 278 | :param batch_size: Number of comments to process per batch. 279 | :param use_random_string: If True, replace content with a random UUID; otherwise, use the standard message. 280 | If None, uses the value set during Reddit instance initialization. 281 | :return: Dict with results and statistics. 282 | """ 283 | # Use instance default if not explicitly provided 284 | if use_random_string is None: 285 | use_random_string = self.use_random_string 286 | 287 | if use_random_string: 288 | content_to_write = str(uuid.uuid4()) 289 | logger.info_with_context(f"Updating comments with random UUIDs. Example: {content_to_write}") 290 | else: 291 | content_to_write = "This comment has been reddacted to preserve online privacy - see r/reddacted for more info" 292 | logger.info_with_context("Updating comments with standard redaction message.") 293 | 294 | return self._process_comments( 295 | comment_ids, "update", batch_size, update_content=content_to_write 296 | ) 297 | 298 | @with_logging(logger) 299 | def search_comments( 300 | self, query: str, subreddit: str = None, limit: int = 100 301 | ) -> List[Dict[str, Any]]: 302 | """Search for comments containing specific text. 303 | 304 | Args: 305 | query: Text to search for 306 | subreddit: Optional subreddit to limit search to 307 | limit: Maximum number of results to return 308 | 309 | Returns: 310 | List of comment dictionaries 311 | 312 | Raises: 313 | AuthenticationRequiredError: If not authenticated 314 | """ 315 | if not self.authenticated: 316 | raise AuthenticationRequiredError("Authentication required for comment search") 317 | 318 | logger.debug_with_context(f"Searching for '{query}' in {subreddit or 'all'}") 319 | 320 | try: 321 | comments = [] 322 | search_params = {"q": query, "limit": limit, "type": "comment"} 323 | if subreddit: 324 | results = self.reddit.subreddit(subreddit).search(**search_params) 325 | else: 326 | results = self.reddit.subreddit("all").search(**search_params) 327 | 328 | for result in results: 329 | if isinstance(result, praw.models.Comment): 330 | comments.append( 331 | { 332 | "text": result.body.rstrip(), 333 | "upvotes": result.ups, 334 | "downvotes": result.downs, 335 | "permalink": result.permalink, 336 | "id": result.id, 337 | } 338 | ) 339 | if len(comments) >= limit: 340 | break 341 | 342 | return comments 343 | except Exception as e: 344 | handle_exception(e, f"Failed to search for '{query}'", debug=True) 345 | return [] 346 | 347 | @with_logging(logger) 348 | def parse_user(self, username, limit=100, sort="new", time_filter="all", **kwargs): 349 | """Parses a listing and extracts the comments from it. 350 | 351 | :param username: a user 352 | :param limit: maximum number of comments to return (None for unlimited) 353 | :param sort: Sort method ('hot', 'new', 'controversial', 'top') 354 | :param time_filter: Time filter for 'top' ('all', 'day', 'hour', 'month', 'week', 'year') 355 | :return: a list of comments from a user 356 | :raises: prawcore.exceptions.NotFound if user doesn't exist 357 | :raises: prawcore.exceptions.Forbidden if user is private/banned 358 | """ 359 | if self.reddit is None: 360 | logger.error_with_context("Reddit client initialization failed - cannot fetch comments") 361 | return [] 362 | 363 | mode = "authenticated" if self.authenticated else "read-only" 364 | logger.info_with_context(f"Fetching comments for user '{username}' in {mode} mode") 365 | logger.debug_with_context(f"Using sort method: {sort}") 366 | try: 367 | redditor = self.reddit.redditor(username) 368 | comments = [] 369 | 370 | # Get the appropriate comment listing based on sort 371 | if sort == "hot": 372 | comment_listing = redditor.comments.hot(limit=limit) 373 | elif sort == "new": 374 | comment_listing = redditor.comments.new(limit=limit) 375 | elif sort == "controversial": 376 | comment_listing = redditor.comments.controversial( 377 | limit=limit, time_filter=time_filter 378 | ) 379 | elif sort == "top": 380 | comment_listing = redditor.comments.top(limit=limit, time_filter=time_filter) 381 | else: 382 | comment_listing = redditor.comments.new(limit=limit) # default to new 383 | 384 | for comment in comment_listing: 385 | comment_data = { 386 | "text": comment.body.rstrip(), 387 | "upvotes": comment.ups, 388 | "downvotes": comment.downs, 389 | "permalink": comment.permalink, 390 | "id": comment.id, 391 | } 392 | 393 | # If text matching is enabled, only include matching comments 394 | if "text_match" in kwargs: 395 | logger.debug_with_context( 396 | f"Text match enabled: searching for '{kwargs['text_match']}' in comment {comment_data['id']}" 397 | ) 398 | if kwargs["text_match"].lower() in comment_data["text"].lower(): 399 | logger.debug_with_context(f"Match found in comment {comment_data['id']}") 400 | comments.append(comment_data) 401 | else: 402 | logger.debug_with_context(f"No match found in comment {comment_data['id']}") 403 | else: 404 | logger.debug_with_context( 405 | f"No text match filter, including comment {comment_data['id']}" 406 | ) 407 | comments.append(comment_data) 408 | 409 | if len(comments) >= limit: 410 | break 411 | 412 | return comments 413 | except Exception as e: 414 | handle_exception(e, f"Failed to fetch comments for user '{username}'", debug=True) 415 | return [] 416 | -------------------------------------------------------------------------------- /reddacted/api/scraper.py: -------------------------------------------------------------------------------- 1 | from types import BuiltinMethodType 2 | import requests 3 | from reddacted.api import api 4 | from reddacted.utils.log_handler import get_logger, with_logging 5 | from reddacted.utils.log_handler import handle_exception 6 | 7 | logger = get_logger(__name__) 8 | 9 | 10 | class Scraper(api.API): 11 | """The Reddit Class obtains data to perform sentiment analysis by 12 | scraping the Reddit json endpoint. 13 | 14 | It allows an unauthenticated user to obtain data to analyze various 15 | reddit objects. 16 | """ 17 | 18 | def __init__(self): 19 | """Initialize Scraper""" 20 | pass 21 | 22 | @with_logging(logger) 23 | def parse_listing(self, subreddit, article, limit=100, **kwargs): 24 | """Parses a listing and extracts the comments from it. 25 | 26 | :param subreddit: a subreddit 27 | :param article: an article associated with the subreddit 28 | :return: a list of comments from an article. 29 | """ 30 | logger.debug_with_context( 31 | f"Parsing listing for subreddit={subreddit}, article={article}, limit={limit}" 32 | ) 33 | url = f"https://www.reddit.com/r/{subreddit}/{article}.json?limit={limit}" 34 | headers = kwargs.get("headers") 35 | logger.debug_with_context(f"Request URL: {url}") 36 | logger.debug_with_context(f"Request headers: {headers}") 37 | try: 38 | response = requests.get(url, headers=headers) 39 | logger.debug_with_context(f"Response status code: {response.status_code}") 40 | except Exception as e: 41 | handle_exception(e, "Error obtaining article information", debug=True) 42 | return [] 43 | 44 | comments = [] 45 | json_resp = response.json() 46 | logger.debug_with_context(f"Retrieved {len(json_resp)} top-level JSON objects") 47 | 48 | for top in range(0, len(json_resp)): 49 | logger.debug_with_context(f"Processing top-level object {top+1}/{len(json_resp)}") 50 | if json_resp[top]["data"]["children"]: 51 | children = json_resp[top]["data"]["children"] 52 | for child in range(0, len(children)): 53 | data = children[child]["data"] 54 | if "body" in data: 55 | # remove empty spaces and weird reddit strings 56 | comment_text = data["body"].rstrip() 57 | comment_text = " ".join(comment_text.split()) 58 | comment_text = comment_text.replace("&#x200B;", "") 59 | if comment_text != "": 60 | comment_data = { 61 | "text": comment_text, 62 | "upvotes": data["ups"], 63 | "downvotes": data["downs"], 64 | "permalink": data["permalink"], 65 | "id": data["id"], 66 | } 67 | logger.debug_with_context( 68 | f"Added comment: ups={data['ups']}, downs={data['downs']}, text_preview='{comment_text[:50]}...'" 69 | ) 70 | comments.append(comment_data) 71 | 72 | logger.debug_with_context(f"Returning {len(comments)} processed comments") 73 | return comments 74 | 75 | @with_logging(logger) 76 | def parse_user(self, username, limit=100, sort="new", time_filter="all", **kwargs): 77 | """Parses a listing and extracts the comments from it. 78 | 79 | :param username: a user 80 | :param limit: maximum number of comments to return 81 | :param sort: Sort method ('hot', 'new', 'controversial', 'top') 82 | :param time_filter: Time filter for 'top' ('all', 'day', 'hour', 'month', 'week', 'year') 83 | :return: a list of comments from a user. 84 | """ 85 | url = f"https://www.reddit.com/user/{username}.json?limit={limit}&sort={sort}" 86 | if sort in ["top", "controversial"]: 87 | url += f"&t={time_filter}" 88 | logger.debug_with_context(f"Completed scraping for user {username}") 89 | headers = kwargs.get("headers") 90 | try: 91 | response = requests.get(url, headers=headers) 92 | except Exception as e: 93 | handle_exception(e, "Error obtaining user information", debug=True) 94 | return [] 95 | 96 | comments = [] 97 | json_resp = response.json() 98 | 99 | if json_resp["data"]["children"]: 100 | children = json_resp["data"]["children"] 101 | for child in range(0, len(children)): 102 | data = children[child]["data"] 103 | if "body" in data: 104 | # remove empty spaces and weird reddit strings 105 | comment_text = data["body"].rstrip() 106 | comment_text = " ".join(comment_text.split()) 107 | comment_text = comment_text.replace("&#x200B;", "") 108 | if comment_text != "": 109 | comments.append( 110 | { 111 | "text": comment_text, 112 | "upvotes": data["ups"], 113 | "downvotes": data["downs"], 114 | "permalink": data["permalink"], 115 | "id": data["id"], 116 | } 117 | ) 118 | logger.debug_with_context(f"Reddact is scraping {url}...") 119 | return comments 120 | -------------------------------------------------------------------------------- /reddacted/cli_config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import os.path 4 | from typing import Optional, Dict, Any, TYPE_CHECKING, Tuple, List 5 | 6 | from textual.widgets import Input, Label, Checkbox # Needed for validate_inputs 7 | 8 | # Forward reference for type hinting ConfigApp to avoid circular import 9 | if TYPE_CHECKING: 10 | from reddacted.textual_cli import ConfigApp 11 | 12 | # --- Constants --- 13 | VALID_SORT_OPTIONS = ["hot", "new", "controversial", "top"] 14 | VALID_TIME_OPTIONS = ["all", "day", "hour", "month", "week", "year"] 15 | URL_REGEX = r"^(http|https)://[^\s/$.?#].[^\s]*$" 16 | CONFIG_FILE = "config.json" 17 | 18 | # Environment Variable Keys (Example - adjust if needed) 19 | ENV_VARS_MAP = { 20 | "REDDIT_USERNAME": "reddit_username", 21 | "REDDIT_PASSWORD": "reddit_password", 22 | "REDDIT_CLIENT_ID": "reddit_client_id", 23 | "REDDIT_CLIENT_SECRET": "reddit_client_secret", 24 | "OPENAI_API_KEY": "openai_key", 25 | } 26 | 27 | # --- Configuration File I/O --- 28 | 29 | def load_config_from_file(filepath: str) -> Tuple[Dict[str, Any], Optional[str]]: 30 | """ 31 | Loads configuration from a JSON file. 32 | 33 | Args: 34 | filepath: The path to the configuration file. 35 | 36 | Returns: 37 | A tuple containing: 38 | - A dictionary with the loaded configuration values (empty if file not found or error). 39 | - An optional notification message (string) for success or error. 40 | """ 41 | config_values = {} 42 | notification = None 43 | if os.path.exists(filepath): 44 | try: 45 | with open(filepath, "r") as f: 46 | config_values = json.load(f) 47 | notification = f"Configuration loaded from '{filepath}'." 48 | except json.JSONDecodeError: 49 | notification = f"Error decoding '{filepath}'. Using defaults." 50 | config_values = {} 51 | except Exception as e: 52 | notification = f"Error loading config file '{filepath}': {e}" 53 | config_values = {} 54 | else: 55 | notification = f"No configuration file found at '{filepath}'. Using defaults." 56 | return config_values, notification 57 | 58 | def save_config_to_file(filepath: str, config_data: Dict[str, Any]) -> Optional[str]: 59 | """ 60 | Saves the configuration dictionary to a JSON file. 61 | 62 | Args: 63 | filepath: The path to the configuration file. 64 | config_data: The dictionary containing configuration values to save. 65 | 66 | Returns: 67 | An optional notification message (string) for success or error. 68 | """ 69 | notification = None 70 | try: 71 | with open(filepath, "w") as f: 72 | json.dump(config_data, f, indent=4) # Write with indentation 73 | notification = f"Configuration saved successfully to '{filepath}'." 74 | except IOError as e: 75 | notification = f"Error saving configuration to '{filepath}': {e}" 76 | except Exception as e: # Catch other potential errors 77 | notification = f"An unexpected error occurred during save to '{filepath}': {e}" 78 | return notification 79 | 80 | # --- Configuration Merging & Processing --- 81 | 82 | def merge_configs(file_config: Dict[str, Any], initial_config: Dict[str, Any]) -> Dict[str, Any]: 83 | """ 84 | Merges configuration from file and initial values (CLI/env). 85 | Initial values take precedence. Handles boolean type conversions. 86 | 87 | Args: 88 | file_config: Configuration loaded from the file. 89 | initial_config: Configuration provided via CLI arguments or environment variables. 90 | 91 | Returns: 92 | The final merged configuration dictionary. 93 | """ 94 | merged_config = file_config.copy() # Start with file config 95 | 96 | # Process and merge initial_config, giving it precedence 97 | processed_initial_config = {} 98 | boolean_keys = ["enable_auth", "pii_only", "use_openai_api", "write_to_file"] # Keys expected to be boolean 99 | for key, value in initial_config.items(): 100 | if isinstance(value, str): 101 | if value.lower() in ('true', '1', 'yes'): 102 | processed_initial_config[key] = True 103 | elif value.lower() in ('false', '0', 'no'): 104 | processed_initial_config[key] = False 105 | else: 106 | processed_initial_config[key] = value # Keep as string if not boolean-like 107 | elif isinstance(value, int) and key in boolean_keys: 108 | processed_initial_config[key] = bool(value) # Convert int to bool for specific keys 109 | else: 110 | processed_initial_config[key] = value # Keep other types as is 111 | 112 | merged_config.update(processed_initial_config) # Update with processed initial values 113 | return merged_config 114 | 115 | 116 | # --- Input Validation --- 117 | 118 | def validate_inputs(app: 'ConfigApp') -> Tuple[bool, List[str]]: 119 | """ 120 | Validate all visible and required inputs in the ConfigApp. 121 | 122 | Args: 123 | app: The instance of the ConfigApp. 124 | 125 | Returns: 126 | A tuple containing: 127 | - A boolean indicating if all validations passed. 128 | - A list of validation failure messages. 129 | """ 130 | is_valid = True 131 | summary_messages = [] 132 | 133 | # Validate standard Input widgets with validators 134 | for input_widget in app.query(Input): 135 | if input_widget.display and not input_widget.disabled: 136 | # Special case: Skip validation for batch_size if it's empty 137 | if input_widget.id == "batch_size" and not input_widget.value.strip(): 138 | input_widget.remove_class("-invalid") # Ensure it's not marked invalid if empty 139 | input_widget.add_class("-valid") 140 | continue # Skip the rest of the validation for this input 141 | 142 | # Clear previous invalid state 143 | input_widget.remove_class("-invalid") 144 | input_widget.add_class("-valid") # Assume valid initially 145 | 146 | if input_widget.validators: 147 | validation_result = input_widget.validate(input_widget.value) 148 | if validation_result is not None and not validation_result.is_valid: 149 | is_valid = False 150 | # Find label via DOM traversal 151 | label_text = input_widget.id # Default to ID 152 | try: 153 | container = input_widget.parent 154 | if container: 155 | label_widget = container.query(Label).first() 156 | if label_widget: 157 | label_text = str(label_widget.renderable).strip().rstrip(':') # Use renderable text, clean up 158 | except Exception: 159 | pass # Keep default ID if traversal fails 160 | summary_messages.extend([f"{label_text}: {desc}" for desc in validation_result.failure_descriptions]) 161 | input_widget.remove_class("-valid") 162 | input_widget.add_class("-invalid") 163 | 164 | # Specific check for output_file if write_to_file is checked 165 | write_cb = app.query_one("#write_to_file_checkbox", Checkbox) 166 | output_input = app.query_one("#output_file", Input) 167 | if write_cb.value and not output_input.value.strip(): 168 | is_valid = False 169 | summary_messages.append("Output File Path: Cannot be empty when 'Write to File' is checked.") 170 | output_input.remove_class("-valid") 171 | output_input.add_class("-invalid") 172 | elif write_cb.value: # If checked and not empty, ensure it's marked valid (if not already invalid by validator) 173 | if "-invalid" not in output_input.classes: 174 | output_input.remove_class("-invalid") 175 | output_input.add_class("-valid") 176 | 177 | 178 | # Specific check for Reddit auth fields if enable_auth is checked 179 | auth_cb = app.query_one("#enable_auth", Checkbox) 180 | if auth_cb.value: 181 | auth_fields = ["reddit_username", "reddit_password", "reddit_client_id", "reddit_client_secret"] 182 | for field_id in auth_fields: 183 | auth_input = app.query_one(f"#{field_id}", Input) 184 | if not auth_input.value.strip(): 185 | is_valid = False 186 | # Find label via DOM traversal 187 | label_text = field_id # Default to ID 188 | try: 189 | container = auth_input.parent 190 | if container: 191 | label_widget = container.query(Label).first() 192 | if label_widget: 193 | label_text = str(label_widget.renderable).strip().rstrip(':') # Use renderable text, clean up 194 | except Exception: 195 | pass # Keep default ID if traversal fails 196 | summary_messages.append(f"{label_text}: Cannot be empty when 'Enable Auth' is checked.") 197 | auth_input.remove_class("-valid") 198 | auth_input.add_class("-invalid") 199 | else: # If not empty, ensure it's marked valid (if not already invalid by validator) 200 | if "-invalid" not in auth_input.classes: 201 | auth_input.remove_class("-invalid") 202 | auth_input.add_class("-valid") 203 | 204 | return is_valid, summary_messages -------------------------------------------------------------------------------- /reddacted/llm_detector.py: -------------------------------------------------------------------------------- 1 | import json 2 | import asyncio 3 | from typing import Tuple, Dict, Any, List, Optional 4 | import openai 5 | from reddacted.utils.log_handler import get_logger, with_logging 6 | from reddacted.utils.log_handler import handle_exception 7 | 8 | logger = get_logger(__name__) 9 | 10 | 11 | @with_logging(logger) 12 | class LLMDetector: 13 | """Uses LLM to detect potential PII and personal information in text, 14 | and can suggest sarcastic replacements.""" 15 | 16 | DEFAULT_PROMPT = """ 17 | Analyze the following text for any information that could potentially identify the author or reveal personal details about them. 18 | Consider both explicit PII (like names, addresses) and implicit personal information (like specific life events, locations, relationships). 19 | 20 | YOU MUST Respond in JSON format with these fields. DO NOT CHANGE FIELD NAMES, THEY ARE VERY IMPORTANT. 21 | - has_pii: boolean 22 | - confidence: float (0-1) 23 | - details: list of findings with type and example from the comment text 24 | - reasoning: detailed explanation of why this content might identify the author 25 | - risk_factors: list of specific elements that contribute to the risk score 26 | 27 | Text to analyze: {text} 28 | """ 29 | 30 | REPLACEMENT_PROMPT_TEMPLATE = """ 31 | You are a creative writing assistant specializing in sarcastic and nonsensical rewrites. 32 | Your task is to rewrite the following text, replacing any identified personal information with humorous, absurd, or sarcastic placeholders. Maintain the original structure and tone as much as possible, but ensure all sensitive details are obscured. 33 | 34 | Original Text: 35 | "{original_text}" 36 | 37 | Identified Personal Information Details: 38 | {pii_details} 39 | 40 | Rewrite the text, replacing the identified information with sarcastic/nonsensical content. 41 | ONLY output the rewritten text. Do not include explanations, apologies, or any text other than the rewritten version. 42 | """ 43 | 44 | def __init__( 45 | self, api_key: str, api_base: str = None, model: str = "gpt-3.5-turbo", headers: dict = None 46 | ): 47 | self.model = model 48 | self.client_config = { 49 | "api_key": api_key, 50 | } 51 | if headers: 52 | self.client_config["default_headers"] = headers 53 | if api_base: 54 | self.client_config["base_url"] = api_base 55 | 56 | async def analyze_batch(self, texts: List[str]) -> List[Tuple[float, Dict[str, Any]]]: 57 | """ 58 | Analyze a batch of texts using LLM for potential personal information. 59 | Returns list of tuples (risk_score, details). 60 | """ 61 | batch_size = 10 62 | results = [] 63 | try: 64 | client = openai.AsyncOpenAI(**self.client_config) 65 | except openai.AuthenticationError as e: 66 | error_msg = str(e) 67 | if "Incorrect API key provided" in error_msg: 68 | # Extract the redacted key if present 69 | key_preview = ( 70 | error_msg.split("provided: ")[1].split(".")[0] 71 | if "provided: " in error_msg 72 | else "UNKNOWN" 73 | ) 74 | raise ValueError(f"Invalid API key (provided: {key_preview})") from e 75 | raise ValueError("Authentication failed - please check your API key") from e 76 | except openai.APIError as e: 77 | raise ConnectionError(f"API error: {e.message}") from e 78 | 79 | try: 80 | for i in range(0, len(texts), batch_size): 81 | batch = texts[i : i + batch_size] 82 | tasks = [] 83 | for text in batch: 84 | task = client.chat.completions.create( 85 | model=self.model, 86 | messages=[ 87 | {"role": "system", "content": "/no-think You are a privacy analysis assistant."}, 88 | {"role": "user", "content": self.DEFAULT_PROMPT.format(text=text)}, 89 | ], 90 | temperature=0.1, 91 | ) 92 | logger.debug_with_context(f"Using API base: {client.base_url}") 93 | logger.debug_with_context(f"Using model: {self.model}") 94 | tasks.append(task) 95 | 96 | logger.info_with_context(f"Awaiting {len(tasks)} LLM analysis tasks...") 97 | batch_responses = await asyncio.gather(*tasks) 98 | logger.info_with_context("LLM analysis tasks completed.") 99 | 100 | for response in batch_responses: 101 | try: 102 | raw_response = response.choices[0].message.content.strip() 103 | logger.debug_with_context(f"\n🤖 Raw LLM Response:\n{raw_response}\n") 104 | # Handle for qwen3 series thinking models 105 | if "" in raw_response: 106 | raw_response = raw_response.split("")[1] 107 | try: 108 | # First attempt direct parse, sometimes stupid LLM messes up formatting 109 | analysis = json.loads(raw_response) 110 | except json.JSONDecodeError: 111 | # If that fails, try to extract JSON from markdown blocks 112 | if "```json" in raw_response: 113 | logger.debug_with_context(f"\nAttempting to extract json from markdown in {raw_response}\n") 114 | json_content = ( 115 | raw_response.split("```json")[1].split("```")[0].strip() 116 | ) 117 | analysis = json.loads(json_content) 118 | else: 119 | logger.debug_with_context(f"Failed to extract json from markdown in {raw_response}\n") 120 | raise 121 | 122 | # Calculate risk score based on confidence and PII presence 123 | confidence = float(analysis.get("confidence", 0.0)) 124 | has_pii = analysis.get("has_pii", False) 125 | 126 | logger.debug_with_context(f"Parsed confidence: {confidence}") 127 | logger.debug_with_context(f"Parsed has_pii: {has_pii}") 128 | 129 | if has_pii: 130 | risk_score = confidence 131 | else: 132 | risk_score = 0.0 133 | analysis = { 134 | "has_pii": False, 135 | "confidence": 0.0, 136 | "details": [], 137 | "risk_factors": [], 138 | "reasoning": "No PII detected", 139 | } 140 | 141 | results.append((risk_score, analysis)) 142 | except Exception as e: 143 | logger.warning_with_context(f"Failed to parse LLM analysis response: {e}") 144 | results.append((0.0, {"error": f"LLM response parsing failed: {e}"})) 145 | return results 146 | 147 | except Exception as e: 148 | logger.error_with_context("AI analysis failed") 149 | logger.error_with_context(f"Batch LLM analysis failed: {str(e)}") 150 | error_msg = str(e) 151 | if isinstance(e, ValueError) and "Invalid API key" in error_msg: 152 | # Format a user-friendly error message 153 | return [ 154 | ( 155 | 0.0, 156 | { 157 | "error": "Authentication Failed", 158 | "details": error_msg, 159 | "help": "Please check your OpenAI API key configuration", 160 | }, 161 | ) 162 | ] * len(texts) 163 | return [ 164 | ( 165 | 0.0, 166 | { 167 | "error": "LLM Analysis Failed", 168 | "details": error_msg, 169 | "help": "Please try again or contact support if the issue persists", 170 | }, 171 | ) 172 | ] * len(texts) 173 | 174 | async def analyze_text(self, text: str) -> Tuple[float, Dict[str, Any]]: 175 | """ 176 | Analyze a single text using LLM for potential personal information. 177 | Returns tuple of (risk_score, details). 178 | """ 179 | try: 180 | results = await self.analyze_batch([text]) 181 | return results[0] 182 | except Exception as e: 183 | logger.error_with_context(f"LLM analysis failed: {str(e)}") 184 | return 0.0, {"error": str(e)} 185 | 186 | async def suggest_replacement(self, text: str, analysis: Dict[str, Any]) -> Optional[str]: 187 | """ 188 | Suggests a sarcastic/nonsensical replacement for the text, obscuring PII. 189 | 190 | Args: 191 | text: The original text. 192 | analysis: The analysis result dictionary from analyze_text/analyze_batch. 193 | 194 | Returns: 195 | The suggested replacement text, or None if no PII was found or an error occurred. 196 | """ 197 | if not analysis or not analysis.get("has_pii"): 198 | logger.info_with_context("No PII found, skipping replacement suggestion.") 199 | return None 200 | 201 | pii_details_list = analysis.get("details", []) 202 | if not pii_details_list: 203 | logger.warning_with_context("has_pii is True, but no details found. Skipping replacement.") 204 | return None 205 | 206 | # Format PII details for the prompt 207 | pii_details_str = "\n".join([f"- Type: {item.get('type', 'N/A')}, Example: {item.get('example', 'N/A')}" for item in pii_details_list]) 208 | 209 | prompt = self.REPLACEMENT_PROMPT_TEMPLATE.format( 210 | original_text=text, 211 | pii_details=pii_details_str 212 | ) 213 | 214 | try: 215 | # Create a client instance for this specific call 216 | client = openai.AsyncOpenAI(**self.client_config) 217 | logger.debug_with_context("Requesting replacement suggestion from LLM.") 218 | logger.debug_with_context(f"Replacement Prompt:\n{prompt}") 219 | 220 | response = await client.chat.completions.create( 221 | model=self.model, 222 | messages=[ 223 | {"role": "system", "content": "You are a creative writing assistant specializing in sarcastic and nonsensical rewrites."}, 224 | {"role": "user", "content": prompt}, 225 | ], 226 | temperature=0.7, # Slightly higher temp for creativity 227 | ) 228 | 229 | replacement_text = response.choices[0].message.content.strip() 230 | logger.debug_with_context(f"Raw replacement suggestion:\n{replacement_text}") 231 | 232 | # Basic check to ensure it's not empty or just whitespace 233 | if not replacement_text: 234 | logger.warning_with_context("LLM returned an empty replacement suggestion.") 235 | return None 236 | 237 | return replacement_text 238 | 239 | except openai.AuthenticationError as e: 240 | error_msg = str(e) 241 | key_preview = "UNKNOWN" 242 | if "Incorrect API key provided" in error_msg and "provided: " in error_msg: 243 | key_preview = error_msg.split("provided: ")[1].split(".")[0] 244 | logger.error_with_context(f"Authentication failed for replacement suggestion (key: {key_preview}): {e}") 245 | # Propagate a clear error message or handle as needed downstream 246 | # For now, returning None as the function signature suggests optional return 247 | return None 248 | except openai.APIError as e: 249 | logger.error_with_context(f"API error during replacement suggestion: {e.message}") 250 | return None 251 | except Exception as e: 252 | logger.error_with_context(f"Unexpected error during replacement suggestion: {str(e)}") 253 | return None 254 | -------------------------------------------------------------------------------- /reddacted/pii_detector.py: -------------------------------------------------------------------------------- 1 | import re 2 | from dataclasses import dataclass 3 | from typing import List, Tuple 4 | from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn 5 | from reddacted.utils.log_handler import get_logger, with_logging 6 | from reddacted.utils.log_handler import handle_exception 7 | 8 | logger = get_logger(__name__) 9 | 10 | 11 | @dataclass 12 | class PIIMatch: 13 | """Represents a PII match found in text""" 14 | 15 | type: str 16 | value: str 17 | confidence: float 18 | 19 | 20 | class PIIDetector: 21 | """Detects various types of personally identifiable information in text""" 22 | 23 | # Common PII patterns 24 | PATTERNS = { 25 | "email": (r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", 0.95), 26 | "phone": (r"\b(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b", 0.85), 27 | "ssn": (r"\b\d{3}-\d{2}-\d{4}\b", 0.97), 28 | "credit_card": (r"\b(?:\d{4}[- ]?){3}\d{4}\b", 0.95), 29 | "address": ( 30 | r"\b\d{2,5}\s+(?:[A-Za-z]+\s)+(?:street|st|avenue|ave|road|rd|boulevard|blvd|lane|ln|drive|dr)\.?\b", 31 | 0.65, 32 | ), 33 | "name_pattern": (r"\b(?:Mr\.|Mrs\.|Ms\.|Dr\.)\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", 0.7), 34 | } 35 | 36 | # Keywords that might indicate PII context 37 | CONTEXT_KEYWORDS = [ 38 | "name is", 39 | "live at", 40 | "address", 41 | "reach me", 42 | "phone", 43 | "email", 44 | "contact", 45 | "call me", 46 | "ssn", 47 | "social security", 48 | "credit card", 49 | "driver license", 50 | ] 51 | 52 | COMMON_FALSE_POSITIVES = [ 53 | r"\b\d+ (llm|ai|gpu|cpu|ram|mb|gb|ghz|mhz|api)\b", 54 | r"\b\d+ (times|years|days|hours|minutes|seconds)\b", 55 | r"\b\d+(?:st|nd|rd|th)\b", 56 | r"\b\d+[km]?b?\b", 57 | ] 58 | 59 | def __init__(self): 60 | self.compiled_patterns = { 61 | name: re.compile(pattern, re.IGNORECASE) for name, (pattern, _) in self.PATTERNS.items() 62 | } 63 | self.false_positive_patterns = [ 64 | re.compile(pattern, re.IGNORECASE) for pattern in self.COMMON_FALSE_POSITIVES 65 | ] 66 | 67 | @with_logging(logger) 68 | def analyze_text(self, text: str) -> List[PIIMatch]: 69 | """ 70 | Analyze text for potential PII. 71 | Returns a list of PIIMatch objects for each PII instance found. 72 | """ 73 | matches = [] 74 | 75 | # First check for false positives 76 | if any(fp.search(text) for fp in self.false_positive_patterns): 77 | return [] 78 | 79 | # Validate matches against known false positive contexts 80 | for pii_type, (_, confidence) in self.PATTERNS.items(): 81 | pattern = self.compiled_patterns[pii_type] 82 | for match in pattern.finditer(text): 83 | full_match = match.group(0) 84 | 85 | # Additional validation per type 86 | if pii_type == "phone" and len(full_match.replace("-", "").replace(" ", "")) < 10: 87 | continue 88 | 89 | if pii_type == "address" and not any(c.isalpha() for c in full_match.split()[-2]): 90 | continue 91 | 92 | matches.append(PIIMatch(pii_type, full_match, confidence)) 93 | 94 | # Contextual confidence boost with cap 95 | context_boost = ( 96 | 0.15 97 | if any( 98 | re.search(rf"\b{re.escape(kw)}\b", text, re.IGNORECASE) 99 | for kw in self.CONTEXT_KEYWORDS 100 | ) 101 | else 0.0 102 | ) 103 | 104 | for match in matches: 105 | match.confidence = min(1.0, match.confidence + context_boost) 106 | 107 | return matches 108 | 109 | @with_logging(logger) 110 | def get_pii_risk_score(self, text: str, progress=None) -> Tuple[float, List[PIIMatch]]: 111 | """ 112 | Calculate overall PII risk score for a text and return matches. 113 | Returns a tuple of (risk_score, matches). 114 | """ 115 | matches = self.analyze_text(text) 116 | if not matches: 117 | return 0.0, [] 118 | 119 | # Weighted average with type weights 120 | type_weights = { 121 | "ssn": 1.2, 122 | "credit_card": 1.2, 123 | "email": 1.0, 124 | "phone": 0.9, 125 | "address": 0.7, 126 | "name_pattern": 0.6, 127 | } 128 | 129 | total_weight = sum(type_weights.get(match.type, 1.0) for match in matches) 130 | weighted_sum = sum( 131 | match.confidence * type_weights.get(match.type, 1.0) for match in matches 132 | ) 133 | 134 | return min(1.0, weighted_sum / total_weight), matches 135 | -------------------------------------------------------------------------------- /reddacted/results.py: -------------------------------------------------------------------------------- 1 | from reddacted.api.reddit import Reddit 2 | #!/usr/bin/env python 3 | # -*- coding: utf-8 -*- 4 | 5 | from typing import List, Dict, Any, Optional 6 | 7 | from rich.columns import Columns 8 | from rich.console import Group 9 | from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn 10 | 11 | from reddacted.utils.log_handler import get_logger, with_logging 12 | from reddacted.utils.report import ( 13 | generate_analysis_report, 14 | should_show_result, 15 | ) 16 | from reddacted.utils.tables import TableFormatter 17 | from reddacted.utils.panels import PanelFormatter 18 | from reddacted.utils.analysis import AnalysisResult 19 | from reddacted.textual_ui import show_results 20 | 21 | 22 | logger = get_logger(__name__) 23 | 24 | 25 | class ResultsFormatter(TableFormatter, PanelFormatter): 26 | """Handles formatting and display of analysis results.""" 27 | 28 | def __init__(self): 29 | TableFormatter.__init__(self) 30 | PanelFormatter.__init__(self) 31 | # Use module-level logger instance 32 | self.total_pii_comments = 0 33 | self.total_llm_pii_comments = 0 34 | self.use_random_string = False # Default to False 35 | 36 | @with_logging(logger) 37 | def create_progress(self) -> Progress: 38 | """Creates a unified progress context manager.""" 39 | if not hasattr(self, "_progress"): 40 | self._progress = Progress( 41 | SpinnerColumn(spinner_name="dots"), 42 | TextColumn("[bold blue]{task.description}"), 43 | TimeElapsedColumn(), 44 | transient=True, 45 | ) 46 | return self._progress 47 | 48 | @with_logging(logger) 49 | def generate_output_file( 50 | self, 51 | filename: str, 52 | comments: List[Dict[str, Any]], 53 | url: str, 54 | results: List[AnalysisResult], 55 | overall_score: float, 56 | overall_sentiment: str, 57 | ) -> None: 58 | """Outputs a file containing a detailed sentiment and PII analysis per comment.""" 59 | progress = self.create_progress() 60 | with progress: 61 | progress_task = progress.add_task( 62 | "📝 Generating analysis report...", total=len(comments) 63 | ) 64 | try: 65 | stats = generate_analysis_report( 66 | filename=filename, 67 | comments=comments, 68 | url=url, 69 | results=results, 70 | overall_score=overall_score, 71 | overall_sentiment=overall_sentiment, 72 | pii_only=getattr(self, "pii_only", False), 73 | ) 74 | self.total_pii_comments = stats["total_pii_comments"] 75 | self.total_llm_pii_comments = stats["total_llm_pii_comments"] 76 | self._print_completion_message(filename, comments, results, progress) 77 | except Exception as e: 78 | logger.exception("Failed to generate output file: %s", e) # Use module logger 79 | raise 80 | 81 | @with_logging(logger) 82 | def print_config(self, config: Dict[str, Any]) -> None: 83 | """Prints the active configuration using the provided config dictionary.""" 84 | progress = self.create_progress() 85 | 86 | # Extract values needed for panels from the config dict 87 | auth_enabled = config.get("enable_auth", False) 88 | pii_enabled = True # Assuming PII is always enabled for now 89 | pii_only = config.get("pii_only", False) 90 | limit_val = config.get("limit", 20) 91 | limit = None if limit_val == 0 else limit_val 92 | sort = config.get("sort", "new") 93 | use_random_string = config.get("use_random_string", False) # Get from config 94 | 95 | # Construct llm_config dict for the features panel if applicable 96 | llm_config = None 97 | if config.get("model"): 98 | llm_config = { 99 | "api_key": config.get("openai_key") if config.get("use_openai_api") else "sk-not-needed", 100 | "api_base": config.get("local_llm") if not config.get("use_openai_api") else "https://api.openai.com/v1", 101 | "model": config.get("model"), 102 | } 103 | # Adjust api_base for local LLM if needed (redundant with Sentiment.__init__ but safe) 104 | if not config.get("use_openai_api") and llm_config.get("api_base"): 105 | base_url = llm_config["api_base"].rstrip('/') 106 | if not base_url.endswith('/v1'): 107 | llm_config["api_base"] = f"{base_url}/v1" 108 | elif config.get("openai_key") or config.get("local_llm"): 109 | llm_config = { # Handle case where URL/key provided but no model 110 | "api_key": config.get("openai_key") if config.get("use_openai_api") else "sk-not-needed", 111 | "api_base": config.get("local_llm") if not config.get("use_openai_api") else "https://api.openai.com/v1", 112 | "model": None, 113 | } 114 | if not config.get("use_openai_api") and llm_config.get("api_base"): 115 | base_url = llm_config["api_base"].rstrip('/') 116 | if not base_url.endswith('/v1'): 117 | llm_config["api_base"] = f"{base_url}/v1" 118 | 119 | 120 | with progress: 121 | progress.console.print("\n[bold cyan]Active Configuration[/]") 122 | features_panel = self.create_features_panel( 123 | auth_enabled, pii_enabled, llm_config, pii_only, limit, sort, 124 | use_random_string=use_random_string # Use value from config 125 | ) 126 | panels = [features_panel] 127 | # Pass the full config to create_auth_panel 128 | auth_panel = self.create_auth_panel(config) 129 | panels.append(auth_panel) 130 | progress.console.print(Columns(panels)) 131 | 132 | @with_logging(logger) 133 | def print_comments( 134 | self, 135 | comments: List[Dict[str, Any]], 136 | url: str, 137 | results: List[AnalysisResult], 138 | overall_score: float, 139 | overall_sentiment: str, 140 | reddit_api: 'Reddit', # Added reddit_api 141 | ) -> None: 142 | """Prints out analysis of user comments using Textual UI.""" 143 | filtered_results = [ 144 | r for r in results if should_show_result(r, getattr(self, "pii_only", False)) 145 | ] 146 | if not filtered_results and getattr(self, "pii_only", False): 147 | logger.info_with_context("No comments with high PII risk found.") # Use module logger with context 148 | print("No comments with high PII risk found.") 149 | return 150 | 151 | # Show interactive results view 152 | show_results( 153 | url=url, 154 | comments=comments, 155 | results=filtered_results, 156 | overall_score=overall_score, 157 | overall_sentiment=overall_sentiment, 158 | reddit_api=reddit_api, # Pass reddit_api 159 | use_random_string=getattr(self, "use_random_string", False), 160 | ) 161 | 162 | def _print_completion_message( 163 | self, 164 | filename: str, 165 | comments: List[Dict[str, Any]], 166 | results: List[AnalysisResult], 167 | progress: Progress, 168 | ) -> None: 169 | """Prints completion message with file info and action panel.""" 170 | high_risk_comments = [ 171 | r 172 | for r in results 173 | if r.pii_risk_score > 0.5 or (r.llm_findings and r.llm_findings.get("has_pii", False)) 174 | ] 175 | comment_ids = [r.comment_id for r in high_risk_comments] 176 | completion_panel = self.create_completion_panel( 177 | filename, len(comments), self.total_pii_comments, self.total_llm_pii_comments 178 | ) 179 | if comment_ids: 180 | actions_panel = self.create_action_panel(results, use_random_string=getattr(self, "use_random_string", False)) 181 | progress.console.print(Group(completion_panel, actions_panel)) 182 | else: 183 | progress.console.print(completion_panel) 184 | -------------------------------------------------------------------------------- /reddacted/sentiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Standard library 5 | import asyncio 6 | import re 7 | from os import environ 8 | from typing import List, Dict, Any, Optional, Tuple 9 | 10 | # Third-party 11 | import nltk 12 | from nltk.sentiment.vader import SentimentIntensityAnalyzer 13 | from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn 14 | from rich.console import Console 15 | 16 | # Local 17 | from reddacted.utils.log_handler import get_logger, with_logging 18 | 19 | # Initialize rich console 20 | console = Console() 21 | from reddacted.utils.log_handler import handle_exception 22 | from reddacted.api.scraper import Scraper 23 | from reddacted.api.reddit import Reddit 24 | from reddacted.pii_detector import PIIDetector 25 | from reddacted.llm_detector import LLMDetector 26 | from reddacted.results import ResultsFormatter, AnalysisResult 27 | 28 | logger = get_logger(__name__) 29 | 30 | _COMMENT_ANALYSIS_HEADERS = {"User-agent": "reddacted"} 31 | 32 | 33 | # Sentiment constants 34 | HAPPY_SENTIMENT = "😁" 35 | SAD_SENTIMENT = "😕" 36 | NEUTRAL_SENTIMENT = "😐" 37 | 38 | 39 | class Sentiment: 40 | """Performs the LLM PII & sentiment analysis on a given set of Reddit Objects.""" 41 | 42 | def __init__(self, config: Dict[str, Any]): 43 | """Initialize Sentiment Analysis using a configuration dictionary. 44 | 45 | Args: 46 | config (Dict[str, Any]): Dictionary containing all configuration settings. 47 | """ 48 | logger.debug_with_context(f"Initializing Sentiment Analyzer with config: {config}") 49 | 50 | # Extract values from config, providing defaults 51 | self.auth_enabled = config.get("enable_auth", False) 52 | self.pii_enabled = True # Assuming PII is always enabled for now, adjust if needed 53 | self.llm_config = None # Will be constructed below if needed 54 | self.pii_only = config.get("pii_only", False) 55 | self.sort = config.get("sort", "new") 56 | # Default limit to 20 if missing, map 0 to None (unlimited) 57 | limit_val = config.get("limit", 20) 58 | self.limit = None if limit_val == 0 else limit_val 59 | self.skip_text = config.get("skip_text") 60 | self.use_random_string = config.get("use_random_string", False) 61 | 62 | # Download required NLTK data if not already present 63 | try: 64 | nltk.data.find("sentiment/vader_lexicon") 65 | except LookupError: 66 | logger.debug("Downloading required NLTK data...") 67 | nltk.download("vader_lexicon", quiet=True) 68 | 69 | # Initialize necessary variables 70 | self.llm_detector = None # Initialize llm_detector early 71 | # Initialize batch processing attributes 72 | self._llm_batch = [] 73 | self._llm_batch_indices = [] 74 | self._pending_results = [] 75 | 76 | try: 77 | self.api = Scraper() # Default to Scraper 78 | self.score = 0 79 | self.sentiment = NEUTRAL_SENTIMENT 80 | self.headers = _COMMENT_ANALYSIS_HEADERS 81 | self.pii_detector = PIIDetector() if self.pii_enabled else None 82 | logger.debug_with_context( 83 | "Initialized base attributes with configuration: " 84 | f"auth_enabled={self.auth_enabled}, " 85 | f"pii_enabled={self.pii_enabled}, " 86 | f"pii_only={self.pii_only}, " 87 | f"sort={self.sort}, " 88 | f"limit={self.limit}" 89 | ) 90 | 91 | logger.debug_with_context("Base sentiment analyzer initialized") 92 | except Exception as e: 93 | handle_exception(e, "Failed to initialize base Sentiment analyzer") 94 | logger.error_with_context("Failed to initialize base Sentiment analyzer") 95 | raise 96 | 97 | # Construct LLM config dictionary if applicable 98 | if config.get("model"): 99 | # If model is specified but no LLM URL, default to local 100 | if not config.get("local_llm") and not config.get("openai_key"): 101 | config["local_llm"] = "http://localhost:11434" # Modify config directly or use a local var 102 | logger.warning_with_context("No LLM URL specified, defaulting to local") 103 | 104 | self.llm_config = { 105 | "api_key": config.get("openai_key") if config.get("use_openai_api") else "sk-not-needed", 106 | "api_base": config.get("local_llm") if not config.get("use_openai_api") else "https://api.openai.com/v1", 107 | "model": config.get("model"), 108 | } 109 | # Adjust api_base for local LLM if needed 110 | if not config.get("use_openai_api") and self.llm_config["api_base"]: 111 | base_url = self.llm_config["api_base"].rstrip('/') 112 | if not base_url.endswith('/v1'): 113 | self.llm_config["api_base"] = f"{base_url}/v1" 114 | elif config.get("openai_key") or config.get("local_llm"): 115 | # Handle case where URL/key is provided but no model selected yet 116 | self.llm_config = { 117 | "api_key": config.get("openai_key") if config.get("use_openai_api") else "sk-not-needed", 118 | "api_base": config.get("local_llm") if not config.get("use_openai_api") else "https://api.openai.com/v1", 119 | "model": None, # Explicitly set model to None 120 | } 121 | if not config.get("use_openai_api") and self.llm_config["api_base"]: 122 | base_url = self.llm_config["api_base"].rstrip('/') 123 | if not base_url.endswith('/v1'): 124 | self.llm_config["api_base"] = f"{base_url}/v1" 125 | logger.warning_with_context("LLM URL/Key provided, but no model selected. LLM analysis may be limited.") 126 | 127 | 128 | # Initialize LLM detector if config was constructed 129 | if self.llm_config: 130 | logger.debug_with_context(f"Constructed LLM Config: {self.llm_config}") 131 | try: 132 | api_key = self.llm_config.get("api_key") 133 | api_base = self.llm_config.get("api_base") 134 | model = self.llm_config.get("model") # Already extracted 135 | 136 | logger.debug_with_context(f"LLM Config - API Base: {api_base}, Model: {model}") 137 | # Initialize LLM detector if we have sufficient configuration 138 | if not model: 139 | logger.warning_with_context("No model specified in config - LLM analysis disabled") 140 | self.llm_detector = None 141 | elif not api_base: 142 | logger.error_with_context("Missing API base URL - required for both local and OpenAI") 143 | self.llm_detector = None 144 | elif api_base == "https://api.openai.com/v1" and not api_key: 145 | logger.error_with_context("Missing API key - required for OpenAI API") 146 | self.llm_detector = None 147 | else: 148 | self.llm_detector = LLMDetector( 149 | api_key=api_key, 150 | api_base=api_base, 151 | model=model, 152 | ) 153 | logger.info_with_context("LLM Detector initialized") 154 | except Exception as e: 155 | logger.error_with_context(f"Failed to initialize LLM Detector: {str(e)}") 156 | self.llm_detector = None 157 | else: 158 | logger.info_with_context("No LLM config provided or model missing, LLM analysis disabled.") 159 | 160 | # Initialize Reddit API if auth is enabled, passing the config 161 | if self.auth_enabled: 162 | logger.debug_with_context("Authentication enabled, initializing Reddit API with config") 163 | # Pass the full config dictionary and the specific use_random_string preference 164 | self.api = Reddit(config=config, use_random_string=self.use_random_string) 165 | logger.debug_with_context("Reddit API initialized") 166 | else: 167 | logger.debug_with_context("Authentication not enabled, using default Scraper API") 168 | # self.api remains the Scraper instance initialized earlier 169 | 170 | self.formatter = ResultsFormatter() 171 | self.formatter.pii_only = self.pii_only 172 | self.formatter.use_random_string = self.use_random_string # Use instance variable 173 | # Pass the entire config dictionary to print_config 174 | self.formatter.print_config(config) 175 | 176 | @with_logging(logger) 177 | async def _analyze(self, comments): 178 | """Analyzes comments for both sentiment and PII content. 179 | :param comments: comments to perform analysis on. 180 | :return: tuple of (sentiment_score, list of AnalysisResult objects) 181 | """ 182 | logger.debug_with_context("Starting _analyze function") 183 | sentiment_analyzer = SentimentIntensityAnalyzer() 184 | final_score = 0 185 | results: List[AnalysisResult] = [] # Final results list 186 | _llm_batch: List[str] = [] # Batch of comments for LLM 187 | _llm_result_indices: List[int] = [] # Indices in 'results' corresponding to _llm_batch items 188 | 189 | cleanup_regex = re.compile("<.*?>") 190 | total_comments = len(comments) 191 | progress = Progress( 192 | SpinnerColumn(spinner_name="dots"), 193 | TextColumn("[bold blue]{task.description}"), 194 | TimeElapsedColumn(), 195 | transient=True, 196 | ) 197 | with progress: 198 | main_task = progress.add_task( 199 | f"Received {total_comments} comments, processing...", total=total_comments 200 | ) 201 | pii_task = progress.add_task("🔍 PII Analysis", visible=False, total=1) 202 | llm_task = progress.add_task("🤖 LLM Analysis", visible=False, total=1) 203 | for i, comment_data in enumerate(comments, 1): 204 | try: 205 | clean_comment = re.sub(cleanup_regex, "", str(comment_data["text"])) 206 | 207 | # Skip already reddacted comments 208 | if self.skip_text and self.skip_text in clean_comment: 209 | logger.debug_with_context(f"Skipping already reddacted comment {i}") 210 | progress.update(main_task, advance=1) 211 | continue 212 | progress.update( 213 | main_task, 214 | description=f"[bold blue]💭 Processing comment[/] [cyan]{i}[/]/[cyan]{total_comments}[/]", 215 | ) 216 | # Sentiment analysis 217 | all_scores = sentiment_analyzer.polarity_scores(clean_comment) 218 | score = all_scores["compound"] 219 | final_score += score 220 | # PII analysis 221 | pii_risk_score, pii_matches = 0.0, [] 222 | if self.pii_enabled: 223 | progress.update(pii_task, visible=True) 224 | progress.update(pii_task, description=f"🔍 Scanning comment {i} for PII") 225 | pii_risk_score, pii_matches = self.pii_detector.get_pii_risk_score( 226 | clean_comment 227 | ) 228 | progress.update(pii_task, visible=False) 229 | 230 | # Create the initial result object 231 | result = AnalysisResult( 232 | comment_id=comment_data["id"], 233 | sentiment_score=score, 234 | sentiment_emoji=self._get_sentiment(score), 235 | pii_risk_score=pii_risk_score, 236 | pii_matches=pii_matches, 237 | text=clean_comment, 238 | upvotes=comment_data["upvotes"], 239 | downvotes=comment_data["downvotes"], 240 | permalink=comment_data["permalink"], 241 | llm_risk_score=0.0, # Placeholder 242 | llm_findings=None, # Placeholder 243 | ) 244 | results.append(result) # Add initial result to final list 245 | 246 | # If LLM is enabled, add to batch for later processing 247 | if self.llm_detector: 248 | _llm_batch.append(clean_comment) 249 | _llm_result_indices.append(len(results) - 1) # Store index of the result we just added 250 | logger.debug_with_context(f"Added comment {i} to LLM batch (size: {len(_llm_batch)})") 251 | 252 | # Process batch if full 253 | if len(_llm_batch) >= 10: 254 | batch_size = len(_llm_batch) 255 | try: 256 | progress.update(llm_task, visible=True) 257 | progress.update(llm_task, description=f"[bold blue]🤖 Processing LLM batch[/] ([cyan]{batch_size}[/] items)") 258 | batch_llm_results = await self.llm_detector.analyze_batch(_llm_batch) 259 | logger.debug_with_context(f"Successfully processed LLM batch of {batch_size} items") 260 | 261 | # Update results in place 262 | for result_idx, (llm_risk_score, findings) in zip(_llm_result_indices, batch_llm_results): 263 | results[result_idx].llm_risk_score = llm_risk_score 264 | results[result_idx].llm_findings = findings 265 | if findings and findings.get("has_pii"): 266 | results[result_idx].pii_risk_score = max(results[result_idx].pii_risk_score, llm_risk_score) 267 | logger.debug_with_context(f"Updated {batch_size} results with LLM data") 268 | 269 | except Exception as e: 270 | logger.error_with_context(f"Failed to process LLM batch: {str(e)}") 271 | finally: 272 | progress.update(llm_task, description=f"[bold green]✅ LLM batch complete[/] ([cyan]{batch_size}[/] items analyzed)", visible=False) 273 | # Clear batch lists for next batch 274 | _llm_batch = [] 275 | _llm_result_indices = [] 276 | else: 277 | logger.warning_with_context(f"Skipping LLM analysis for comment {i} - detector not initialized") 278 | 279 | progress.update(main_task, advance=1) 280 | except Exception as e: 281 | logger.error_with_context(f"Error processing comment {i}: {e}") 282 | # Ensure progress advances even on error 283 | progress.update(main_task, advance=1) 284 | continue 285 | 286 | # --- Process any remaining items in the LLM batch after the loop --- 287 | if self.llm_detector and _llm_batch: 288 | batch_size = len(_llm_batch) 289 | try: 290 | progress.update(llm_task, visible=True) 291 | progress.update(llm_task, description=f"[bold blue]🤖 Processing final LLM batch[/] ([cyan]{batch_size}[/] items)") 292 | batch_llm_results = await self.llm_detector.analyze_batch(_llm_batch) 293 | logger.debug_with_context(f"Successfully processed final LLM batch of {batch_size} items") 294 | 295 | # Update results in place 296 | for result_idx, (llm_risk_score, findings) in zip(_llm_result_indices, batch_llm_results): 297 | results[result_idx].llm_risk_score = llm_risk_score 298 | results[result_idx].llm_findings = findings 299 | if findings and findings.get("has_pii"): 300 | results[result_idx].pii_risk_score = max(results[result_idx].pii_risk_score, llm_risk_score) 301 | logger.debug_with_context(f"Updated {batch_size} results with final LLM data") 302 | 303 | except Exception as e: 304 | logger.error_with_context(f"Failed to process final LLM batch: {str(e)}") 305 | finally: 306 | progress.update(llm_task, description=f"[bold green]✅ Final LLM batch complete[/] ([cyan]{batch_size}[/] items analyzed)", visible=False) 307 | # No need to clear batch lists here as they are local to the function call 308 | 309 | # --- Calculate final score and return --- 310 | try: 311 | # Use len(results) which accurately reflects processed comments 312 | num_processed = len(results) 313 | if num_processed == 0: 314 | logger.warning("No comments were successfully processed.") 315 | return 0.0, [] 316 | # Calculate score based on processed comments' sentiment scores 317 | final_score = sum(r.sentiment_score for r in results) # Recalculate final_score based on actual results 318 | rounded_final = round(final_score / num_processed, 4) # Use num_processed 319 | logger.debug_with_context(f"Final sentiment score calculated: {rounded_final}") 320 | return rounded_final, results 321 | except ZeroDivisionError: # Should be caught by num_processed check, but keep for safety 322 | logger.error_with_context("Division by zero error during final score calculation.") 323 | return 0.0, [] 324 | 325 | @with_logging(logger) 326 | def _get_sentiment(self, score): 327 | """Obtains the sentiment using a sentiment score. 328 | :param score: the sentiment score. 329 | :return: sentiment from score. 330 | """ 331 | logger.debug_with_context(f"Calculating sentiment for score {score}") 332 | if score == 0: 333 | return NEUTRAL_SENTIMENT 334 | elif score > 0: 335 | return HAPPY_SENTIMENT 336 | else: 337 | return SAD_SENTIMENT 338 | 339 | @with_logging(logger) 340 | def _get_comments(self, source_type: str, identifier: str, **kwargs) -> List[Dict[str, Any]]: 341 | """Unified comment fetching method""" 342 | logger.debug_with_context( 343 | f"Fetching comments for {source_type} '{identifier}' with kwargs: {kwargs}" 344 | ) 345 | 346 | # Get the appropriate fetch method 347 | fetch_method = {"user": self.api.parse_user, "listing": self.api.parse_listing}[source_type] 348 | 349 | # Handle text search if specified 350 | if text_match := kwargs.pop("text_match", None): 351 | if source_type == "user": 352 | # For users, we pass the text_match to parse_user 353 | return fetch_method( 354 | identifier, 355 | headers=self.headers, 356 | limit=self.limit, 357 | text_match=text_match, 358 | **kwargs, 359 | ) 360 | else: 361 | # For subreddits, use search_comments 362 | return self.api.search_comments( 363 | query=text_match, subreddit=kwargs.get("subreddit"), limit=self.limit 364 | ) 365 | 366 | # Default comment fetching 367 | if source_type == "listing": 368 | # Split subreddit/article for listing type 369 | subreddit = identifier.split("/")[0] 370 | article = identifier.split("/")[1] 371 | return fetch_method( 372 | subreddit, article, headers=self.headers, limit=self.limit, **kwargs 373 | ) 374 | else: 375 | return fetch_method(identifier, headers=self.headers, limit=self.limit, **kwargs) 376 | 377 | @with_logging(logger) 378 | def _run_analysis_flow( 379 | self, comments: List[Dict[str, Any]] 380 | ) -> Tuple[float, List[AnalysisResult]]: 381 | """Centralized analysis execution""" 382 | logger.debug_with_context("Starting analysis flow") 383 | logger.debug_with_context(f"Processing {len(comments)} comments") 384 | logger.debug_with_context(f"LLM Detector status: {'Initialized' if self.llm_detector else 'Not initialized'}") 385 | 386 | try: 387 | loop = asyncio.get_running_loop() 388 | logger.debug_with_context("Using existing event loop") 389 | # If we have a running loop, use it 390 | future = asyncio.ensure_future(self._analyze(comments), loop=loop) 391 | result = loop.run_until_complete(future) 392 | logger.info_with_context("Analysis completed") 393 | return result 394 | except RuntimeError: 395 | # No running event loop, create a new one 396 | logger.debug_with_context("No running loop found, creating new one") 397 | return asyncio.run(self._analyze(comments)) 398 | 399 | @with_logging(logger) 400 | def get_sentiment( 401 | self, source_type: str, identifier: str, output_file: Optional[str] = None, **kwargs 402 | ) -> None: 403 | """Unified sentiment analysis entry point""" 404 | logger.debug_with_context( 405 | f"get_sentiment called with source_type={source_type}, identifier={identifier}" 406 | ) 407 | comments = self._get_comments(source_type, identifier, **kwargs) 408 | self.score, self.results = self._run_analysis_flow(comments) 409 | self.sentiment = self._get_sentiment(self.score) 410 | if output_file: 411 | self.formatter.generate_output_file( 412 | output_file, comments, identifier, self.results, self.score, self.sentiment 413 | ) 414 | else: 415 | self.formatter.print_comments( 416 | comments, identifier, self.results, self.score, self.sentiment, self.api # Pass self.api 417 | ) 418 | -------------------------------------------------------------------------------- /reddacted/styles.py: -------------------------------------------------------------------------------- 1 | """Centralized styling configuration for the application.""" 2 | 3 | from textual.color import Color 4 | 5 | # Textual CSS 6 | TEXTUAL_CSS = """ 7 | Screen { 8 | background: #0e333d; /* A custom dark blend derived from bg_0 (#103c48) */ 9 | } 10 | 11 | Header { 12 | dock: top; 13 | background: #184956; 14 | color: #cad8d9; 15 | height: 3; 16 | content-align: center middle; 17 | border-bottom: heavy #58a3ff; 18 | } 19 | 20 | Footer { 21 | dock: bottom; 22 | background: #184956; 23 | color: #cad8d9; 24 | height: 1; 25 | } 26 | 27 | StatsDisplay { 28 | height: auto; 29 | padding: 1; 30 | background: #103c48; 31 | border: heavy #4695f7; 32 | margin: 1; 33 | } 34 | 35 | DataTable { 36 | height: auto; 37 | margin: 1; 38 | border: heavy #4695f7; 39 | } 40 | 41 | .stats-text { 42 | text-align: center; 43 | } 44 | 45 | CommentActionScreen { 46 | align: center middle; 47 | } 48 | 49 | /* Details Screen Styles */ 50 | DetailsScreen { 51 | background: #0e333d; 52 | layout: vertical; 53 | } 54 | 55 | .details-title { 56 | text-align: center; 57 | background: #184956; 58 | color: #cad8d9; 59 | padding: 1; 60 | text-style: bold; 61 | border-bottom: solid #58a3ff; 62 | width: 100%; 63 | box-sizing: border-box; 64 | } 65 | 66 | .details-scroll { 67 | width: 100%; 68 | height: 1fr; 69 | padding: 1 2; 70 | } 71 | 72 | /* Header Information */ 73 | .header-info { 74 | background: #103c48; 75 | border: solid #4695f7; 76 | padding: 1; 77 | height: auto; 78 | text-align: center; 79 | } 80 | 81 | .details-id { 82 | color: #cad8d9; 83 | } 84 | 85 | .details-risk-high { 86 | color: #ef4444; 87 | text-style: bold; 88 | } 89 | 90 | .details-risk-medium { 91 | color: #f59e0b; 92 | text-style: bold; 93 | } 94 | 95 | .details-risk-low { 96 | color: #22c55e; 97 | text-style: bold; 98 | } 99 | 100 | .details-sentiment { 101 | color: #cad8d9; 102 | } 103 | 104 | .details-votes { 105 | color: #cad8d9; 106 | } 107 | 108 | .section-header { 109 | background: #184956; 110 | color: #cad8d9; 111 | padding: 0 1; 112 | margin-top: 1; 113 | text-style: bold; 114 | } 115 | 116 | .subsection-header { 117 | color: #cad8d9; 118 | text-style: italic; 119 | margin-top: 1; 120 | } 121 | 122 | .details-text { 123 | padding: 1; 124 | background: #103c48; 125 | border: solid #4695f7; 126 | } 127 | 128 | .pii-matches-container, .llm-findings-container, .llm-stats { 129 | padding: 1; 130 | background: #103c48; 131 | border: solid #4695f7; 132 | } 133 | 134 | .details-pii-item, .details-llm-item { 135 | padding-left: 1; 136 | color: #cad8d9; 137 | border: solid #4695f7; 138 | } 139 | 140 | .details-reasoning { 141 | padding: 1; 142 | background: #103c48; 143 | border: solid #4695f7; 144 | } 145 | 146 | .details-llm-risk { 147 | padding-left: 1; 148 | background: #103c48; 149 | color: #cad8d9; 150 | } 151 | 152 | .details-has-pii-yes { 153 | padding-left: 1; 154 | color: #ef4444; 155 | text-style: bold; 156 | } 157 | 158 | .details-has-pii-no { 159 | padding-left: 1; 160 | color: #22c55e; 161 | } 162 | 163 | .details-actions { 164 | dock: bottom; 165 | height: auto; 166 | width: 100%; 167 | padding: 1; 168 | background: #184956; 169 | border-top: solid #58a3ff; 170 | align: center middle; 171 | } 172 | 173 | .details-actions Button { 174 | margin: 0 1; 175 | min-width: 16; 176 | } 177 | """ 178 | 179 | # Color System 180 | COLORS = { 181 | "primary": Color.parse("#6366f1"), # Indigo 182 | "secondary": Color.parse("#a855f7"), # Purple 183 | "success": Color.parse("#22c55e"), # Green 184 | "warning": Color.parse("#f59e0b"), # Amber 185 | "error": Color.parse("#ef4444"), # Red 186 | "surface": Color.parse("#1e293b"), # Slate 187 | "background": Color.parse("#0f172a"), # Dark slate 188 | "text": Color.parse("#f8fafc"), # Light slate 189 | "muted": Color.parse("#64748b"), # Medium slate 190 | } 191 | 192 | # Typography 193 | TYPOGRAPHY = { 194 | "h1": ("24px", "bold"), 195 | "h2": ("20px", "bold"), 196 | "h3": ("16px", "bold"), 197 | "body": ("14px", "normal"), 198 | "small": ("12px", "normal"), 199 | } 200 | 201 | # Spacing Scale 202 | SPACING = {"xs": 4, "sm": 8, "md": 16, "lg": 24, "xl": 32} 203 | 204 | # Component-specific styles 205 | TABLE_STYLES = { 206 | "header_style": "bold magenta", 207 | "border": "rounded", 208 | "padding": (0, 1), 209 | "collapse_padding": True, 210 | } 211 | 212 | PANEL_STYLES = {"border_style": "blue", "padding": (1, 1)} 213 | 214 | 215 | # Risk level styles 216 | def get_risk_style(score: float) -> str: 217 | """Get appropriate color style based on risk score.""" 218 | if score > 0.5: 219 | return "error" 220 | elif score > 0.2: 221 | return "warning" 222 | return "success" 223 | 224 | 225 | # Status styles 226 | def get_status_style(enabled: bool) -> str: 227 | """Get appropriate color style based on status.""" 228 | return "success" if enabled else "error" 229 | -------------------------------------------------------------------------------- /reddacted/textual_ui.py: -------------------------------------------------------------------------------- 1 | from reddacted.api.reddit import Reddit 2 | from textual.app import App, ComposeResult 3 | from textual.containers import Container 4 | from textual.widgets import DataTable, Header, Footer, Static 5 | from textual.binding import Binding 6 | from textual import message 7 | 8 | from rich.text import Text 9 | from typing import List, Optional 10 | import uuid # Added for random UUID generation 11 | 12 | from reddacted.utils.analysis import AnalysisResult 13 | from reddacted.ui.comment_actions import CommentActionScreen 14 | from reddacted.ui.details_screen import DetailsScreen 15 | from reddacted.styles import TEXTUAL_CSS 16 | 17 | 18 | class ResultsSummary(DataTable): 19 | """An interactive data table showing analysis results.""" 20 | 21 | def __init__(self, results: List[AnalysisResult]): 22 | super().__init__() 23 | self.results = results 24 | 25 | def on_mount(self) -> None: 26 | """Set up the table when mounted.""" 27 | # Add columns 28 | self.add_columns("Risk", "Sentiment", "Comment Preview", "Votes", "ID") 29 | 30 | # Add rows from results 31 | for result in self.results: 32 | # Format risk score with color based on value 33 | risk_score = f"{result.pii_risk_score:.0%}" 34 | risk_style = ( 35 | "red" 36 | if result.pii_risk_score > 0.7 37 | else "yellow" if result.pii_risk_score > 0.4 else "green" 38 | ) 39 | risk_cell = Text(risk_score, style=risk_style) 40 | 41 | # Format sentiment with emoji 42 | sentiment = Text(f"{result.sentiment_emoji} {result.sentiment_score:.2f}") 43 | 44 | # Format comment preview with link 45 | preview = (result.text[:67] + "...") if len(result.text) > 70 else result.text 46 | preview_cell = Text(preview, style="link blue") 47 | 48 | # Format votes 49 | vote_style = ( 50 | "green" 51 | if result.upvotes > result.downvotes 52 | else "red" if result.downvotes > result.upvotes else "dim" 53 | ) 54 | vote_display = Text( 55 | ( 56 | f"⬆️ {result.upvotes:>3}" 57 | if result.upvotes > result.downvotes 58 | else f"⬇️ {result.downvotes:>3}" if result.downvotes > result.upvotes else "0" 59 | ), 60 | style=vote_style, 61 | ) 62 | 63 | self.add_row(risk_cell, sentiment, preview_cell, vote_display, result.comment_id) 64 | 65 | def on_data_table_row_selected(self) -> None: 66 | """Handle row selection by mouse click.""" 67 | # Trigger the view details action in the parent application 68 | if self.cursor_row is not None: 69 | self.app.action_view_details() 70 | 71 | def on_data_table_cell_selected(self) -> None: 72 | """Handle cell selection.""" 73 | if self.cursor_row is not None: 74 | self.app.action_view_details() 75 | 76 | 77 | class StatsDisplay(Static): 78 | """Displays overall statistics.""" 79 | 80 | def __init__(self, url: str, comment_count: int, overall_score: float, overall_sentiment: str): 81 | super().__init__() 82 | self.url = url 83 | self.comment_count = comment_count 84 | self.overall_score = overall_score 85 | self.overall_sentiment = overall_sentiment 86 | 87 | def compose(self) -> ComposeResult: 88 | stat1 = Static(f"📊 Analysis Results for: {self.url}") 89 | stat2 = Static(f"💬 Total Comments: {self.comment_count}") 90 | stat3 = Static(f"📈 Overall Score: {self.overall_score:.2f}") 91 | stat4 = Static(f"🎭 Overall Sentiment: {self.overall_sentiment}") 92 | 93 | # Add stats-text class to all stats 94 | for stat in [stat1, stat2, stat3, stat4]: 95 | stat.add_class("stats-text") 96 | yield stat 97 | 98 | 99 | class TextualResultsView(App): 100 | """Main Textual app for displaying analysis results.""" 101 | 102 | CSS = TEXTUAL_CSS 103 | title = "reddacted" 104 | 105 | BINDINGS = [ 106 | Binding("q", "quit", "Quit", show=True), 107 | Binding("enter", "view_details", "View Details", show=True), 108 | Binding("e", "edit_comment", "Edit Comment", show=True), 109 | Binding("d", "delete_comment", "Delete Comment", show=True), 110 | ] 111 | 112 | def on_mount(self) -> None: 113 | self.title = "reddacted" # This controls the main header title 114 | self.sub_title = "Analysis Results" # Optional: Controls the subtitle 115 | 116 | def _get_selected_comment_id(self) -> Optional[str]: 117 | """Get the comment ID of the currently selected row.""" 118 | table = self.query_one(ResultsSummary) 119 | if table.cursor_row is not None: 120 | return table.get_row_at(table.cursor_row)[-1] 121 | return None 122 | 123 | def action_edit_comment(self) -> None: 124 | """Handle editing the selected comment.""" 125 | if comment_id := self._get_selected_comment_id(): 126 | self.push_screen(CommentActionScreen(comment_id, "edit", self.reddit_api, self.use_random_string)) 127 | 128 | def action_delete_comment(self) -> None: 129 | """Handle deleting the selected comment.""" 130 | if comment_id := self._get_selected_comment_id(): 131 | self.push_screen(CommentActionScreen(comment_id, "delete", self.reddit_api, self.use_random_string)) 132 | 133 | def on_action_completed(self, event: message.Message) -> None: 134 | """Handle completion of comment actions.""" 135 | table = self.query_one(ResultsSummary) 136 | 137 | # Find the row index and result for the affected comment 138 | for i, r in enumerate(self.results): 139 | if r.comment_id == event.comment_id: 140 | if event.action == "delete": 141 | # Remove from table and results 142 | table.remove_row(i) 143 | self.results.pop(i) 144 | elif event.action == "edit": 145 | # Update the result text based on random string status 146 | if hasattr(event, "use_random_string") and event.use_random_string: 147 | r.text = f"[Random UUID: {uuid.uuid4()}]" 148 | # Update cell in table 149 | table.update_cell(i, 2, Text(r.text, style="link blue")) 150 | else: 151 | # Standard message 152 | r.text = "This comment has been reddacted to preserve online privacy - see r/reddacted for more info" 153 | # Update cell in table with shortened version for display 154 | table.update_cell(i, 2, Text("r/reddacted", style="link blue")) 155 | break 156 | 157 | def __init__( 158 | self, 159 | url: str, 160 | comments: List[dict], 161 | results: List[AnalysisResult], 162 | overall_score: float, 163 | overall_sentiment: str, 164 | reddit_api: 'Reddit', # Added reddit_api 165 | use_random_string: bool = False, 166 | ): 167 | super().__init__() 168 | self.url = url 169 | self.comments = comments 170 | self.results = results 171 | self.overall_score = overall_score 172 | self.overall_sentiment = overall_sentiment 173 | self.reddit_api = reddit_api # Store the api instance 174 | self.use_random_string = use_random_string 175 | 176 | def compose(self) -> ComposeResult: 177 | """Create child widgets for the app.""" 178 | yield Header(show_clock=True) 179 | yield Container( 180 | StatsDisplay(self.url, len(self.comments), self.overall_score, self.overall_sentiment), 181 | ResultsSummary(self.results), 182 | ) 183 | yield Footer() 184 | 185 | def action_view_details(self) -> None: 186 | """Handle viewing details of selected row.""" 187 | if comment_id := self._get_selected_comment_id(): 188 | result = next((r for r in self.results if r.comment_id == comment_id), None) 189 | if result: 190 | self.push_screen(DetailsScreen(result, self.reddit_api, self.use_random_string)) # Pass reddit_api 191 | else: 192 | self.notify(f"No result found for comment ID: {comment_id}") 193 | else: 194 | self.notify("No comment ID found") 195 | 196 | 197 | def show_results( 198 | url: str, 199 | comments: List[dict], 200 | results: List[AnalysisResult], 201 | overall_score: float, 202 | overall_sentiment: str, 203 | reddit_api: 'Reddit', # Added reddit_api 204 | use_random_string: bool = False, 205 | ) -> None: 206 | """Display results using the Textual UI.""" 207 | app = TextualResultsView( 208 | url=url, 209 | comments=comments, # Pass original comments list 210 | results=results, 211 | overall_score=overall_score, 212 | overall_sentiment=overall_sentiment, 213 | reddit_api=reddit_api, # Pass reddit_api 214 | use_random_string=use_random_string, 215 | ) 216 | app.run() 217 | -------------------------------------------------------------------------------- /reddacted/ui/__init__.py: -------------------------------------------------------------------------------- 1 | """UI components for the reddacted package.""" 2 | 3 | from .details_screen import * -------------------------------------------------------------------------------- /reddacted/ui/comment_actions.py: -------------------------------------------------------------------------------- 1 | from textual.app import ComposeResult 2 | from textual.screen import Screen 3 | from textual.containers import Center, Vertical 4 | from textual.widgets import Button, Static, Label 5 | from textual.binding import Binding 6 | from textual import message 7 | 8 | from reddacted.api.reddit import Reddit 9 | from reddacted.styles import TEXTUAL_CSS 10 | 11 | 12 | class CommentActionScreen(Screen): 13 | """Screen for confirming and executing comment actions.""" 14 | 15 | BINDINGS = [ 16 | Binding("escape", "cancel", "Cancel", show=True), 17 | ] 18 | 19 | def __init__(self, comment_id: str, action: str, reddit_api: Reddit, use_random_string: bool = False): 20 | """Initialize the action screen. 21 | 22 | Args: 23 | comment_id: The ID of the comment to act on 24 | action: Either 'edit' or 'delete' 25 | reddit_api: The authenticated Reddit API instance. 26 | use_random_string: Whether to use a random UUID instead of standard message 27 | """ 28 | super().__init__() 29 | self.comment_id = comment_id 30 | self.action = action 31 | self.use_random_string = use_random_string # Keep this for logic within the screen 32 | self.api = reddit_api # Use the passed authenticated instance 33 | 34 | def compose(self) -> ComposeResult: 35 | """Create child widgets.""" 36 | action_text = "edit" if self.action == "edit" else "delete" 37 | # Show Reddit API status 38 | api_status = "Initialized" if self.api is not None else "Not Initialized" 39 | 40 | # Show random string status 41 | random_status = "Using random UUID" if self.use_random_string else "Using standard message" 42 | 43 | with Vertical(): 44 | with Center(): 45 | yield Label(f"Are you sure you want to {action_text} comment {self.comment_id}?") 46 | yield Label(f"Reddit API: {api_status}", classes="header-info", markup=False) 47 | yield Label(f"{random_status}", classes="header-info", markup=False) 48 | yield Button("Confirm", variant="error", id="confirm") 49 | yield Button("Cancel", variant="primary", id="cancel") 50 | yield Static("", id="status") 51 | 52 | def on_button_pressed(self, event: Button.Pressed) -> None: 53 | """Handle button presses.""" 54 | if event.button.id == "cancel": 55 | self.app.pop_screen() 56 | elif event.button.id == "confirm": 57 | self._execute_action() 58 | 59 | def action_cancel(self) -> None: 60 | """Handle escape key.""" 61 | self.app.pop_screen() 62 | 63 | def _execute_action(self) -> None: 64 | """Execute the requested action.""" 65 | status = self.query_one("#status", Static) 66 | try: 67 | if self.action == "edit": 68 | result = self.api.update_comments( 69 | [self.comment_id], 70 | use_random_string=self.use_random_string 71 | ) 72 | action_text = "edited" 73 | else: # delete 74 | result = self.api.delete_comments([self.comment_id]) 75 | action_text = "deleted" 76 | 77 | if result["success"] > 0: 78 | # Notify parent to refresh 79 | self.app.post_message(self.ActionCompleted( 80 | self.comment_id, 81 | self.action, 82 | use_random_string=self.use_random_string 83 | )) 84 | 85 | # Close the screen after a short delay to show success 86 | def close_screen(): 87 | self.app.pop_screen() 88 | 89 | self.set_timer(0.5, close_screen) 90 | status.update(f"✅ Successfully {action_text} comment") 91 | else: 92 | status.update(f"❌ Failed to {self.action} comment") 93 | except Exception as e: 94 | status.update(f"❌ Error: {str(e)}") 95 | 96 | class ActionCompleted(message.Message): 97 | """Message sent when action is completed successfully.""" 98 | 99 | def __init__(self, comment_id: str, action: str, use_random_string: bool = False): 100 | self.comment_id = comment_id 101 | self.action = action 102 | self.use_random_string = use_random_string 103 | super().__init__() 104 | 105 | @property 106 | def message_type(self) -> str: 107 | return "action_completed" 108 | -------------------------------------------------------------------------------- /reddacted/ui/details_screen.py: -------------------------------------------------------------------------------- 1 | from reddacted.api.reddit import Reddit 2 | from textual.app import ComposeResult 3 | from textual.screen import Screen 4 | from textual.containers import Vertical, ScrollableContainer, Horizontal 5 | from textual.widgets import Static, Label, Markdown, Button 6 | from textual.binding import Binding 7 | from textual import message 8 | 9 | from typing import List, Dict, Any, Optional 10 | 11 | from reddacted.utils.report import format_llm_detail 12 | from reddacted.ui.comment_actions import CommentActionScreen 13 | 14 | 15 | class DetailsScreen(Screen): 16 | """Screen for displaying detailed PII analysis for a comment.""" 17 | 18 | BINDINGS = [ 19 | Binding("escape", "go_back", "Return to Results", show=True), 20 | Binding("b", "go_back", "Back", show=True), 21 | Binding("e", "edit_comment", "Edit Comment", show=True), 22 | Binding("d", "delete_comment", "Delete Comment", show=True), 23 | ] 24 | 25 | def __init__(self, result, reddit_api: 'Reddit', use_random_string=False): 26 | """Initialize the details screen. 27 | 28 | Args: 29 | result: The AnalysisResult object containing the comment data 30 | reddit_api: The authenticated Reddit API instance. 31 | use_random_string: Whether to use random UUIDs instead of standard message 32 | """ 33 | super().__init__() 34 | self.result = result 35 | self.reddit_api = reddit_api 36 | self.use_random_string = use_random_string 37 | 38 | def compose(self) -> ComposeResult: 39 | """Create child widgets for the screen.""" 40 | yield Label("Comment Details", classes="details-title") 41 | 42 | with ScrollableContainer(classes="details-scroll"): 43 | # Header information section 44 | with Vertical(classes="header-info"): 45 | # Result ID 46 | yield Static(f"ID: {self.result.comment_id}", classes="details-id") 47 | 48 | # Risk score with appropriate coloring 49 | risk_class = self._get_risk_class() 50 | yield Static( 51 | f"Risk Score: {self.result.pii_risk_score:.0%}", 52 | classes=f"details-risk-{risk_class}", 53 | ) 54 | 55 | yield Static( 56 | f"Sentiment: {self.result.sentiment_emoji} {self.result.sentiment_score:.2f}", 57 | classes="details-sentiment", 58 | ) 59 | yield Static( 60 | f"Votes: ⬆️ {self.result.upvotes} ⬇️ {self.result.downvotes}", 61 | classes="details-votes", 62 | ) 63 | 64 | # Comment text section 65 | yield Label("Comment Text", classes="section-header") 66 | yield Markdown(self.result.text, classes="details-text") 67 | 68 | # Pattern-based PII section 69 | if self.result.pii_matches: 70 | yield Label("Pattern-based PII Detected", classes="section-header") 71 | with Vertical(classes="pii-matches-container"): 72 | for pii in self.result.pii_matches: 73 | yield Static( 74 | f"• {pii.type} (confidence: {pii.confidence:.2f})", 75 | classes="details-pii-item", 76 | ) 77 | 78 | # LLM analysis section 79 | if self.result.llm_findings: 80 | yield Label("LLM Privacy Analysis", classes="section-header") 81 | yield Static( 82 | f"Risk Score: {self.result.llm_risk_score:.2f}", 83 | classes="details-llm-risk", 84 | ) 85 | findings = self.result.llm_findings 86 | has_pii = findings.get("has_pii", False) 87 | yield Static( 88 | f"PII Detected: {'Yes' if has_pii else 'No'}", 89 | classes=f"details-has-pii-{'yes' if has_pii else 'no'}", 90 | ) 91 | if isinstance(findings, dict): 92 | if details_raw := findings.get("details"): 93 | yield Label("Findings:", classes="subsection-header") 94 | # Handle case where details might be a string instead of a list 95 | details_list = [] 96 | if isinstance(details_raw, str): 97 | # Split string by newlines and remove empty lines 98 | details_list = [d.strip() for d in details_raw.split('\n') if d.strip()] 99 | elif isinstance(details_raw, list): 100 | details_list = details_raw # Assume it's the correct list format 101 | else: 102 | # Log or handle unexpected type if necessary 103 | self.app.notify(f"Unexpected type for LLM findings details: {type(details_raw)}", severity="warning", title="LLM Data Warning") 104 | 105 | for detail in details_list: 106 | formatted_detail = format_llm_detail(detail, self.app) 107 | yield Static( 108 | "• " + formatted_detail, 109 | classes="details-llm-item" 110 | ) 111 | if reasoning := findings.get("reasoning"): 112 | yield Label("Reasoning:", classes="subsection-header") 113 | yield Markdown(reasoning, classes="details-reasoning") 114 | 115 | # Action buttons at the bottom 116 | with Horizontal(classes="details-actions"): 117 | yield Button("Back", variant="default", id="back-btn") 118 | yield Button("Reddact Comment", variant="primary", id="edit-btn") 119 | yield Button("Delete Comment", variant="error", id="delete-btn") 120 | 121 | def _get_risk_class(self) -> str: 122 | """Get risk class based on PII risk score.""" 123 | if self.result.pii_risk_score > 0.7: 124 | return "high" 125 | elif self.result.pii_risk_score > 0.4: 126 | return "medium" 127 | else: 128 | return "low" 129 | 130 | def on_button_pressed(self, event: Button.Pressed) -> None: 131 | """Handle button press events.""" 132 | button_id = event.button.id 133 | if button_id == "back-btn": 134 | self.action_go_back() 135 | elif button_id == "edit-btn": 136 | self.action_edit_comment() 137 | elif button_id == "delete-btn": 138 | self.action_delete_comment() 139 | 140 | def on_comment_action_screen_action_completed(self, event: message.Message) -> None: 141 | """Handle action_completed events from CommentActionScreen.""" 142 | if event.action == "edit": 143 | # Include random string status in notification if available 144 | random_status = " with random UUID" if hasattr(event, "use_random_string") and event.use_random_string else "" 145 | self.app.notify(f"Comment {self.result.comment_id} successfully edited{random_status}") 146 | else: 147 | self.app.notify(f"Comment {self.result.comment_id} successfully deleted") 148 | 149 | # Return to main screen by popping twice (action screen + details screen) 150 | self.app.pop_screen() # Remove CommentActionScreen 151 | self.app.pop_screen() # Remove DetailsScreen 152 | 153 | 154 | def action_edit_comment(self) -> None: 155 | """Handle editing the current comment.""" 156 | self.app.push_screen(CommentActionScreen(self.result.comment_id, "edit", self.reddit_api, self.use_random_string)) # Pass reddit_api 157 | 158 | def action_delete_comment(self) -> None: 159 | """Handle deleting the current comment.""" 160 | self.app.push_screen(CommentActionScreen(self.result.comment_id, "delete", self.reddit_api, self.use_random_string)) # Pass reddit_api 161 | 162 | def action_go_back(self) -> None: 163 | """Return to the results screen.""" 164 | self.app.pop_screen() 165 | 166 | class DetailActionComplete(message.Message): 167 | """Message sent when returning to main screen.""" 168 | 169 | def __init__(self, comment_id: str, action: str = None): 170 | self.comment_id = comment_id 171 | self.action = action 172 | super().__init__() 173 | -------------------------------------------------------------------------------- /reddacted/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions and helpers for the reddacted package 3 | """ 4 | -------------------------------------------------------------------------------- /reddacted/utils/analysis.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Dict, Any, Optional 3 | 4 | 5 | @dataclass 6 | class AnalysisResult: 7 | """Holds the results of both sentiment and PII analysis.""" 8 | 9 | comment_id: str 10 | sentiment_score: float 11 | sentiment_emoji: str 12 | pii_risk_score: float 13 | pii_matches: List[Any] 14 | permalink: str 15 | text: str 16 | upvotes: int = 0 17 | downvotes: int = 0 18 | llm_risk_score: float = 0.0 19 | llm_findings: Optional[Dict[str, Any]] = None 20 | -------------------------------------------------------------------------------- /reddacted/utils/base.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any, Optional 2 | from rich.text import Text 3 | 4 | from reddacted.utils.log_handler import get_logger, with_logging 5 | 6 | 7 | class BaseFormatter: 8 | """Base class for formatters with shared utilities.""" 9 | 10 | def __init__(self): 11 | # Logger not currently used in this base class 12 | pass 13 | 14 | def _get_risk_style(self, score: float) -> str: 15 | """Determines text style based on risk score.""" 16 | if score > 0.5: 17 | return "red" 18 | elif score > 0.2: 19 | return "yellow" 20 | else: 21 | return "green" 22 | 23 | def _format_status( 24 | self, enabled: bool, true_text: str = "Enabled", false_text: str = "Disabled" 25 | ) -> Text: 26 | """Formats a status text based on a boolean value.""" 27 | return Text(true_text if enabled else false_text, style="green" if enabled else "red") 28 | -------------------------------------------------------------------------------- /reddacted/utils/log_handler.py: -------------------------------------------------------------------------------- 1 | # reddacted/utils/log_handler.py 2 | import logging 3 | import sys 4 | import inspect 5 | import traceback 6 | from functools import wraps 7 | from typing import Callable, Any, Optional, Union, Dict, TypeVar, Type 8 | 9 | from rich.console import Console 10 | from rich.panel import Panel 11 | from rich.text import Text 12 | from rich.traceback import Traceback 13 | 14 | # --- Type Definitions --- 15 | T = TypeVar("T") 16 | LoggerType = logging.Logger 17 | LogLevel = Union[int, str] 18 | 19 | # --- Globals --- 20 | console = Console() 21 | 22 | # === Logging Setup and Configuration === 23 | 24 | def setup_logging(initial_level: LogLevel = logging.INFO) -> None: 25 | """Configure root logger with file and console handlers.""" 26 | root_logger = logging.getLogger() 27 | # Set root to DEBUG to capture everything, handlers control output level 28 | root_logger.setLevel(logging.DEBUG) 29 | 30 | # Prevent duplicate handlers if called multiple times 31 | if root_logger.hasHandlers(): 32 | root_logger.handlers.clear() 33 | 34 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s:%(lineno)d - %(message)s') 35 | 36 | # File Handler (writes to reddacted.log in current directory) 37 | try: 38 | file_handler = logging.FileHandler('reddacted.log', mode='a') 39 | file_handler.setLevel(initial_level) # Set initial level 40 | file_handler.setFormatter(formatter) 41 | root_logger.addHandler(file_handler) 42 | except Exception as e: 43 | # Fallback or notify if file logging fails 44 | sys.stderr.write(f"Error setting up file logger: {e}\n") 45 | 46 | # Console Handler (stderr) 47 | console_handler = logging.StreamHandler(sys.stderr) 48 | # Console handler is INFO unless global level is DEBUG 49 | console_level = logging.INFO if initial_level != logging.DEBUG else logging.DEBUG 50 | console_handler.setLevel(console_level) 51 | console_handler.setFormatter(formatter) 52 | root_logger.addHandler(console_handler) 53 | 54 | # Set initial level for httpx (less noisy) 55 | logging.getLogger("httpx").setLevel(logging.WARNING if initial_level > logging.DEBUG else logging.DEBUG) 56 | 57 | 58 | def set_global_logging_level(level: LogLevel) -> None: 59 | """Set the global logging level for root logger and handlers. 60 | 61 | Args: 62 | level: The logging level to set globally. Can be an integer level or string name. 63 | 64 | Note: 65 | This affects all existing loggers in the hierarchy. 66 | Some third-party loggers may be set to specific levels for noise reduction. 67 | """ 68 | root_logger = logging.getLogger() 69 | root_logger.setLevel(level) # Set root level first 70 | 71 | # Adjust handler levels 72 | for handler in root_logger.handlers: 73 | if isinstance(handler, logging.FileHandler): 74 | handler.setLevel(level) # File handler matches global level 75 | elif isinstance(handler, logging.StreamHandler): 76 | # Set console handler level directly to the requested global level 77 | handler.setLevel(level) 78 | 79 | # Adjust specific noisy loggers 80 | httpx_level = logging.WARNING if level > logging.DEBUG else logging.DEBUG 81 | logging.getLogger("httpx").setLevel(httpx_level) 82 | 83 | 84 | # === Logger Retrieval and Contextual Logging === 85 | 86 | def get_logger(name: str) -> LoggerType: 87 | """Get or create a logger with consistent formatting and contextual logging methods. 88 | 89 | Args: 90 | name: The name of the logger, typically __name__ 91 | 92 | Returns: 93 | A Logger instance with additional contextual logging methods 94 | 95 | Example: 96 | >>> logger = get_logger(__name__) # Level is now controlled globally 97 | >>> logger.info_with_context("Starting process") 98 | """ 99 | logger = logging.getLogger(name) 100 | # Level is inherited from root logger and its handlers 101 | 102 | # Check if methods already exist to avoid adding them multiple times 103 | if not hasattr(logger, "debug_with_context"): 104 | def make_log_method(log_level: int) -> Callable[[str, Optional[Callable]], None]: 105 | def log_method(msg: str, func: Optional[Callable] = None) -> None: 106 | log_with_context(logger, log_level, msg, func) 107 | 108 | return log_method 109 | 110 | # Add typed convenience methods 111 | setattr(logger, "debug_with_context", make_log_method(logging.DEBUG)) 112 | setattr(logger, "info_with_context", make_log_method(logging.INFO)) 113 | setattr(logger, "warning_with_context", make_log_method(logging.WARNING)) 114 | setattr(logger, "error_with_context", make_log_method(logging.ERROR)) 115 | setattr(logger, "critical_with_context", make_log_method(logging.CRITICAL)) 116 | 117 | return logger 118 | 119 | 120 | def get_log_context(func: Callable[..., Any], frame: Optional[Any] = None) -> str: 121 | """Get standardized logging context with file, function, and line number. 122 | 123 | Args: 124 | func: The function from which the log was called 125 | frame: Optional stack frame, will get caller's frame if None 126 | 127 | Returns: 128 | A formatted string with module, function and line information 129 | """ 130 | if frame is None: 131 | frame = sys._getframe(2) # Get caller's frame 132 | return f"[{func.__module__}:{func.__name__}:{frame.f_lineno}]" 133 | 134 | 135 | def log_with_context( 136 | logger: LoggerType, level: int, msg: str, func: Optional[Callable[..., Any]] = None 137 | ) -> None: 138 | """Log a message with standardized context information. 139 | 140 | Args: 141 | logger: The logger instance to use 142 | level: The logging level for this message 143 | msg: The message to log 144 | func: Optional function to use for context, defaults to caller 145 | 146 | Note: 147 | This function automatically adds context information including: 148 | - Module name 149 | - Function name 150 | - Line number 151 | """ 152 | frame = sys._getframe(1) 153 | if func is None: 154 | # Try to get caller's context if func is not provided 155 | try: 156 | frame = sys._getframe(2) # Go one level deeper to get the caller of log_with_context 157 | func_name = frame.f_code.co_name 158 | module_name = frame.f_globals.get("__name__", "unknown_module") 159 | context = f"[{module_name}:{func_name}:{frame.f_lineno}]" 160 | except (ValueError, AttributeError): 161 | context = "[unknown_context]" # Fallback if frame inspection fails 162 | else: 163 | context = get_log_context(func, frame) 164 | logger.log(level, f"{context} {msg}") 165 | 166 | 167 | # === Exception Handling and Formatting === 168 | 169 | def format_exception(exc: Exception, include_trace: bool = False) -> str: 170 | """Format exception with source location and clean message 171 | 172 | Args: 173 | exc: The exception to format 174 | include_trace: Whether to include full traceback 175 | 176 | Returns: 177 | Formatted error message with location and optional trace 178 | 179 | Raises: 180 | TypeError: If exc is not an Exception instance 181 | """ 182 | if not isinstance(exc, Exception): 183 | raise TypeError("exc must be an Exception instance") 184 | 185 | # Get the exception chain 186 | exc_chain = [] 187 | current = exc 188 | while current: 189 | exc_chain.append(current) 190 | # Prefer __cause__ for explicit chaining, fallback to __context__ 191 | current = getattr(current, '__cause__', None) or getattr(current, '__context__', None) 192 | # Avoid infinite loops with self-referential contexts 193 | if current in exc_chain: 194 | break 195 | 196 | 197 | # Get traceback information 198 | tb = getattr(exc, "__traceback__", None) or sys.exc_info()[2] 199 | module_name = "unknown_module" 200 | func_name = "unknown_function" 201 | line_no = 0 202 | 203 | if tb: 204 | # Find the deepest relevant frame in the traceback 205 | relevant_tb = tb 206 | while relevant_tb.tb_next: 207 | relevant_tb = relevant_tb.tb_next 208 | 209 | try: 210 | frame = relevant_tb.tb_frame 211 | func_name = frame.f_code.co_name 212 | line_no = relevant_tb.tb_lineno 213 | module = inspect.getmodule(frame) 214 | module_name = module.__name__ if module else "unknown_module" 215 | except (AttributeError, ValueError): 216 | # Fallback if frame access fails 217 | pass # Keep defaults 218 | 219 | # Build the error message 220 | messages = [] 221 | for i, e in enumerate(reversed(exc_chain)): 222 | try: 223 | error_type = e.__class__.__name__ 224 | error_msg = str(e) 225 | except Exception: 226 | error_type = "UnknownError" 227 | error_msg = "Failed to format error message" 228 | 229 | if i == 0: # Original exception 230 | messages.append(f"[bold red]{error_type}[/]: {error_msg}") 231 | else: # Caused by / Context 232 | messages.append(f"[dim]Caused by: {error_type}: {error_msg}[/]") 233 | 234 | location = f"[dim]Location: {module_name}.{func_name}(), line {line_no}[/]" 235 | 236 | if include_trace and tb is not None: 237 | try: 238 | # Use rich Traceback for pretty printing 239 | rich_trace = Traceback.from_exception( 240 | exc_type=type(exc), 241 | exc_value=exc, 242 | traceback=tb, 243 | show_locals=False # Keep it concise by default 244 | ) 245 | # Convert rich Traceback to string for return 246 | trace_str = "\n".join(str(line) for line in console.render_lines(rich_trace)) 247 | return "\n".join(messages + [location, "", trace_str]) 248 | except Exception as format_err: 249 | # Fallback if rich traceback formatting fails 250 | fallback_trace = "".join(traceback.format_exception(type(exc), exc, tb)) 251 | return "\n".join(messages + [location, "", f"Failed to format traceback with Rich: {format_err}\n{fallback_trace}"]) 252 | 253 | return "\n".join(messages + [location]) 254 | 255 | 256 | def handle_exception(exc: Exception, context: Optional[str] = None, debug: bool = False) -> None: 257 | """Logs and prints a formatted exception with optional context and debugging. 258 | 259 | Args: 260 | exc: The exception to handle 261 | context: Optional context about what was happening 262 | debug: Whether to include full traceback in logs and output 263 | """ 264 | # Use get_logger internally to ensure we have a logger instance 265 | internal_logger = get_logger(__name__) 266 | 267 | # Log the full exception details for debugging purposes 268 | # exc_info=True automatically includes traceback if available 269 | log_context = f"Error in {context or 'unknown context'}" 270 | internal_logger.error(log_context, exc_info=exc if debug else False) 271 | 272 | # Format the error message for console output 273 | error_msg = format_exception(exc, include_trace=debug) 274 | if context: 275 | # Prepend the user-provided context to the formatted message 276 | error_msg = f"[yellow]Context:[/yellow] {context}\n{error_msg}" 277 | 278 | # Print the formatted error to the console using Rich Panel 279 | console.print( 280 | Panel(Text.from_markup(error_msg), title="[bold red]Error[/]", border_style="red", expand=False) 281 | ) 282 | 283 | 284 | # === Decorator === 285 | 286 | def with_logging(logger: LoggerType) -> Callable[[Callable[..., T]], Callable[..., T]]: 287 | """Decorator that adds logging context and exception handling to methods. 288 | 289 | Args: 290 | logger: The logger instance to use 291 | 292 | Returns: 293 | A decorator function that wraps the original function with logging 294 | 295 | Example: 296 | >>> logger = get_logger(__name__) 297 | >>> @with_logging(logger) 298 | >>> def process_data(data: dict) -> None: 299 | >>> # Function implementation 300 | """ 301 | 302 | def decorator(func: Callable[..., T]) -> Callable[..., T]: 303 | @wraps(func) 304 | def wrapper(*args: Any, **kwargs: Any) -> T: 305 | frame = sys._getframe(1) # Get caller's frame (wrapper's caller) 306 | context_str = get_log_context(func, frame) # Use the original func for context 307 | 308 | try: 309 | logger.debug(f"{context_str} Starting {func.__name__}") 310 | result = func(*args, **kwargs) 311 | logger.debug(f"{context_str} Completed {func.__name__}") 312 | return result 313 | except Exception as e: 314 | error_msg_context = f"Exception in {func.__name__}" 315 | # Call the local handle_exception function directly 316 | # Determine debug flag based on logger's effective level 317 | is_debug = logger.getEffectiveLevel() <= logging.DEBUG 318 | handle_exception(e, error_msg_context, debug=is_debug) 319 | raise # Re-raise the exception after handling 320 | 321 | return wrapper 322 | 323 | return decorator -------------------------------------------------------------------------------- /reddacted/utils/panels.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List, Dict, Any, Optional, Tuple 3 | from itertools import zip_longest 4 | 5 | from rich.panel import Panel 6 | from rich.columns import Columns 7 | from rich.console import Group 8 | from rich.text import Text 9 | from rich.table import Table 10 | 11 | from reddacted.utils.analysis import AnalysisResult 12 | from reddacted.utils.base import BaseFormatter 13 | from reddacted.utils.log_handler import get_logger, with_logging 14 | from reddacted.utils.tables import TableFormatter 15 | from reddacted.utils.report import format_llm_detail 16 | 17 | 18 | class PanelFormatter(BaseFormatter): 19 | """Handles creation and formatting of Rich panels for the Reddit comment analysis UI.""" 20 | 21 | def __init__(self): 22 | super().__init__() 23 | # Logger not currently used in this class 24 | self.table_formatter = TableFormatter() 25 | 26 | def create_features_panel( 27 | self, 28 | auth_enabled: bool, 29 | pii_enabled: bool, 30 | llm_config: Optional[Dict[str, Any]], 31 | pii_only: bool, 32 | limit: int, 33 | sort: str, 34 | use_random_string: bool = False, 35 | ) -> Panel: 36 | """Creates a panel displaying the features configuration.""" 37 | # Create a table with two columns 38 | features_table = Table( 39 | show_header=False, box=None, padding=(0, 2), collapse_padding=True, expand=True 40 | ) 41 | features_table.add_column("Left", ratio=1, justify="left") 42 | features_table.add_column("Right", ratio=1, justify="left") 43 | 44 | # Define all config items 45 | config_items = [ 46 | ("🔐 Authentication", self._format_status(auth_enabled)), 47 | ("🔍 PII Detection", self._format_status(pii_enabled)), 48 | ( 49 | "🤖 LLM Analysis", 50 | ( 51 | Text(llm_config["model"], style="green") # Display model name if available 52 | if llm_config and llm_config.get("model") 53 | else Text("Not Selected", style="yellow") # Indicate if URL/Key provided but no model 54 | if llm_config 55 | else self._format_status(False) # Disabled if no LLM config at all 56 | ), 57 | ), 58 | ("🎯 PII-Only Filter", self._format_status(pii_only, "Active", "Inactive")), 59 | ("🎲 Random String", self._format_status(use_random_string, "Enabled", "Disabled")), 60 | ("📊 Comment Limit", Text(f"{limit}" if limit else "Unlimited", style="cyan")), 61 | ("📑 Sort Preference", Text(f"{sort}" if sort else "New", style="cyan")), 62 | ] 63 | 64 | # Split items into two columns 65 | mid_point = (len(config_items) + 1) // 2 66 | left_items = config_items[:mid_point] 67 | right_items = config_items[mid_point:] 68 | 69 | # Create formatted text for each column 70 | for left, right in zip_longest(left_items, right_items, fillvalue=None): 71 | left_text = Text.assemble(f"{left[0]}: ", left[1]) if left else Text("") 72 | right_text = Text.assemble(f"{right[0]}: ", right[1]) if right else Text("") 73 | features_table.add_row(left_text, right_text) 74 | 75 | return Panel( 76 | features_table, 77 | title="[bold]Features[/]", 78 | border_style="blue", 79 | padding=(1, 1), 80 | expand=True, 81 | ) 82 | 83 | def create_auth_panel(self, config: Dict[str, Any]) -> Panel: 84 | """Creates a panel displaying the authentication status and values based on config and environment.""" 85 | auth_enabled = config.get("enable_auth", False) 86 | auth_texts = [] 87 | 88 | # Determine status based on config first, then environment 89 | username_config = config.get("reddit_username") 90 | client_id_config = config.get("reddit_client_id") 91 | username_env = os.environ.get("REDDIT_USERNAME") 92 | client_id_env = os.environ.get("REDDIT_CLIENT_ID") 93 | 94 | # --- Username Status --- 95 | username_value = None 96 | username_style = "red" 97 | username_source = "" 98 | 99 | if auth_enabled and username_config: 100 | username_value = username_config 101 | username_style = "green" 102 | username_source = " (Config)" 103 | elif username_env: 104 | username_value = username_env 105 | username_style = "blue" 106 | username_source = " (Env Var)" 107 | 108 | if username_value: 109 | auth_texts.append( 110 | Text.assemble("REDDIT_USERNAME: ", (username_value, username_style), username_source) 111 | ) 112 | else: 113 | auth_texts.append(Text("REDDIT_USERNAME: Not Set", style="red")) 114 | 115 | 116 | # --- Client ID Status --- 117 | client_id_value = None 118 | client_id_style = "red" 119 | client_id_source = "" 120 | 121 | if auth_enabled and client_id_config: 122 | client_id_value = client_id_config 123 | client_id_style = "green" 124 | client_id_source = " (Config)" 125 | elif client_id_env: 126 | client_id_value = client_id_env 127 | client_id_style = "blue" 128 | client_id_source = " (Env Var)" 129 | 130 | if client_id_value: 131 | # Display only first/last few chars of client_id for brevity/security if desired 132 | # display_client_id = f"{client_id_value[:4]}...{client_id_value[-4:]}" if len(client_id_value) > 8 else client_id_value 133 | display_client_id = client_id_value # Show full ID for now 134 | auth_texts.append( 135 | Text.assemble("REDDIT_CLIENT_ID: ", (display_client_id, client_id_style), client_id_source) 136 | ) 137 | else: 138 | auth_texts.append(Text("REDDIT_CLIENT_ID: Not Set", style="red")) 139 | 140 | # Note: We don't display password or secret for security 141 | 142 | return Panel(Group(*auth_texts), title="[bold]Auth Status[/]", border_style="yellow") 143 | 144 | def create_stats_panel( 145 | self, url: str, total_comments: int, score: float, sentiment: str 146 | ) -> Panel: 147 | """Creates a panel displaying the sentiment analysis summary.""" 148 | # Create metrics table 149 | metrics_table = Table(show_header=False, box=None, padding=(0, 2), collapse_padding=True) 150 | metrics_table.add_column("Icon", justify="right", style="bold") 151 | metrics_table.add_column("Label", style="bold") 152 | metrics_table.add_column("Value", justify="left") 153 | 154 | # Add rows with proper spacing and alignment 155 | metrics_table.add_row( 156 | "🔍", 157 | "Analysis for:", 158 | ( 159 | f"[link=https://reddit.com/u/{url}]{url}[/]" 160 | if url.startswith("u/") 161 | else f"[cyan]{url}[/]" 162 | ), 163 | ) 164 | metrics_table.add_row("📊", "Comments analyzed:", f"[cyan bold]{total_comments:>4}[/]") 165 | metrics_table.add_row( 166 | "🎭", "Overall Sentiment:", f"[cyan bold]{score:>6.2f}[/] {sentiment}" 167 | ) 168 | 169 | return Panel( 170 | metrics_table, 171 | title="[bold]Sentiment Analysis Summary[/]", 172 | border_style="blue", 173 | padding=(1, 1), 174 | ) 175 | 176 | def create_comment_panel(self, result: AnalysisResult, index: int) -> Panel: 177 | """Creates a panel for a single comment.""" 178 | sub_panels = [self.create_basic_info_panel(result)] 179 | if result.pii_matches: 180 | sub_panels.append(self.create_pii_panel(result)) 181 | if result.llm_findings: 182 | sub_panels.append(self.create_llm_panel(result)) 183 | return Panel(Columns(sub_panels), title=f"[bold]Comment {index}[/]", border_style="cyan") 184 | 185 | def create_basic_info_panel(self, result: AnalysisResult) -> Panel: 186 | """Creates a panel displaying basic comment information.""" 187 | # Create metrics table 188 | metrics_table = Table(show_header=False, box=None, padding=(0, 2), collapse_padding=True) 189 | metrics_table.add_column("Icon", justify="right", style="bold") 190 | metrics_table.add_column("Label", style="bold") 191 | metrics_table.add_column("Value", justify="left") 192 | 193 | # Risk score styling 194 | risk_score_style = "red bold" if result.pii_risk_score > 0.5 else "green bold" 195 | 196 | # Add rows with proper spacing and alignment 197 | metrics_table.add_row( 198 | "🎭", 199 | "Sentiment:", 200 | f"[cyan bold]{result.sentiment_score:>6.2f}[/] {result.sentiment_emoji}", 201 | ) 202 | metrics_table.add_row( 203 | "🔒", "Privacy Risk:", f"[{risk_score_style}]{result.pii_risk_score:>6.2f}[/]" 204 | ) 205 | # Format votes based on whether they're positive or negative 206 | vote_display = ( 207 | f"[green]⬆️ {result.upvotes:>3}[/]" 208 | if result.upvotes > result.downvotes 209 | else ( 210 | f"[red]⬇️ {result.downvotes:>3}[/]" 211 | if result.downvotes > result.upvotes 212 | else f"[dim]0[/]" 213 | ) 214 | ) 215 | 216 | # Combine comment text and metrics 217 | basic_info = Group( 218 | Text(result.text, style="white"), Text("─" * 50, style="dim"), metrics_table 219 | ) 220 | 221 | return Panel(basic_info, title="[bold]Basic Info[/]", border_style="blue", padding=(1, 1)) 222 | 223 | def create_pii_panel(self, result: AnalysisResult) -> Panel: 224 | """Creates a panel displaying pattern-based PII matches.""" 225 | pii_contents = [ 226 | Text(f"• {pii.type} (confidence: {pii.confidence:.2f})", style="cyan") 227 | for pii in result.pii_matches 228 | ] 229 | return Panel( 230 | Group(*pii_contents), title="[bold]Pattern-based PII Detected[/]", border_style="yellow" 231 | ) 232 | 233 | def create_llm_panel(self, result: AnalysisResult) -> Panel: 234 | """Creates a panel displaying LLM analysis findings.""" 235 | # Create metrics table similar to basic info panel 236 | metrics_table = Table(show_header=False, box=None, padding=(0, 2), collapse_padding=True) 237 | metrics_table.add_column("Icon", justify="right", style="bold") 238 | metrics_table.add_column("Label", style="bold") 239 | metrics_table.add_column("Value", justify="left") 240 | 241 | if isinstance(result.llm_findings, dict) and "error" in result.llm_findings: 242 | error_group = self.create_llm_error_content(result.llm_findings["error"]) 243 | return Panel(error_group, title="[bold]LLM Analysis[/]", border_style="red") 244 | 245 | # Risk score styling 246 | risk_style = "red bold" if result.llm_risk_score > 0.5 else "green bold" 247 | pii_style = "red bold" if result.llm_findings.get("has_pii", False) else "green bold" 248 | 249 | # Add main metrics rows 250 | metrics_table.add_row( 251 | "🎯", "Risk Score:", f"[{risk_style}]{result.llm_risk_score:>6.2f}[/]" 252 | ) 253 | metrics_table.add_row( 254 | "🔍", 255 | "PII Detected:", 256 | f"[{pii_style}]{'Yes' if result.llm_findings.get('has_pii') else 'No':>6}[/]", 257 | ) 258 | 259 | # Create content groups 260 | content_groups = [metrics_table] 261 | 262 | # Add findings if present 263 | if details := result.llm_findings.get("details"): 264 | content_groups.extend( 265 | [ 266 | Text("\n📋 Findings:", style="bold"), 267 | *[Text(f" • {format_llm_detail(detail)}", style="cyan") for detail in details], 268 | ] 269 | ) 270 | 271 | # Add risk factors if present 272 | if risk_factors := result.llm_findings.get("risk_factors"): 273 | content_groups.extend( 274 | [ 275 | Text("\n⚠️ Risk Factors:", style="bold"), 276 | *[Text(f" • {factor}", style="yellow") for factor in risk_factors], 277 | ] 278 | ) 279 | 280 | return Panel( 281 | Group(*content_groups), 282 | title="[bold]LLM Analysis[/]", 283 | border_style="magenta", 284 | padding=(1, 1), 285 | ) 286 | 287 | def create_llm_error_content(self, error_msg: str) -> Group: 288 | """Creates content for LLM analysis errors.""" 289 | error_table = Table(show_header=False, box=None, padding=(0, 2)) 290 | error_table.add_column(style="red") 291 | error_table.add_row("❌ LLM Analysis Failed") 292 | error_table.add_row(f"Error: {error_msg}") 293 | error_table.add_row( 294 | "Please check your OpenAI API key and ensure you have sufficient credits." 295 | ) 296 | return Group(error_table) 297 | 298 | def create_summary_panel(self, summary_table: Table) -> Panel: 299 | """Creates a panel displaying the action summary.""" 300 | return Panel( 301 | summary_table, title="[bold]Output Review[/]", border_style="green", padding=(1, 4) 302 | ) 303 | 304 | def create_action_panel(self, filtered_results: List[AnalysisResult], use_random_string: bool = False) -> Panel: 305 | """Creates a panel displaying actions for high-risk comments.""" 306 | high_risk_comments = [ 307 | r 308 | for r in filtered_results 309 | if r.pii_risk_score > 0.5 or (r.llm_findings and r.llm_findings.get("has_pii", False)) 310 | ] 311 | comment_ids = [r.comment_id for r in high_risk_comments] 312 | if comment_ids: 313 | action_text = Group( 314 | Text("Ready-to-use commands for high-risk comments:", style="bold yellow"), 315 | Text( 316 | f"Delete comments:\nreddacted delete {' '.join(comment_ids)}", 317 | style="italic red", 318 | ), 319 | Text( 320 | f"\nReddact (edit) comments:" + 321 | (f"\nreddacted update {' '.join(comment_ids)} --use-random-string" if use_random_string else 322 | f"\nreddacted update {' '.join(comment_ids)}"), 323 | style="italic blue", 324 | ), 325 | ) 326 | else: 327 | action_text = Text("No high-risk comments found.", style="green") 328 | return Panel(action_text, border_style="yellow", title="[bold]Actions[/]") 329 | 330 | def create_completion_panel( 331 | self, 332 | filename: str, 333 | total_comments: int, 334 | total_pii_comments: int, 335 | total_llm_pii_comments: int, 336 | ) -> Panel: 337 | """Creates a panel for the completion message with file info.""" 338 | return Panel( 339 | Text.assemble( 340 | ("📄 Report saved to ", "bold blue"), 341 | (f"{filename}\n", "bold yellow"), 342 | ("🗒️ Total comments: ", "bold blue"), 343 | (f"{total_comments}\n", "bold cyan"), 344 | ("🔐 PII detected in: ", "bold blue"), 345 | (f"{total_pii_comments} ", "bold red"), 346 | (f"({total_pii_comments/total_comments:.1%})\n", "dim"), 347 | ("🤖 LLM findings in: ", "bold blue"), 348 | (f"{total_llm_pii_comments} ", "bold magenta"), 349 | (f"({total_llm_pii_comments/total_comments:.1%})", "dim"), 350 | ), 351 | title="[bold green]Analysis Complete[/]", 352 | border_style="green", 353 | padding=(1, 4), 354 | ) 355 | -------------------------------------------------------------------------------- /reddacted/utils/progress.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn 4 | 5 | from reddacted.utils.log_handler import get_logger, with_logging 6 | from reddacted.utils.base import BaseFormatter 7 | 8 | logger = get_logger(__name__) 9 | 10 | class ProgressManager(BaseFormatter): 11 | """Manages progress bars and indicators.""" 12 | 13 | def __init__(self): 14 | super().__init__() 15 | self._progress: Optional[Progress] = None 16 | 17 | @with_logging(logger) 18 | def create_progress(self) -> Progress: 19 | """Creates a unified progress context manager.""" 20 | if not hasattr(self, "_progress") or self._progress is None: 21 | self._progress = Progress( 22 | SpinnerColumn(spinner_name="dots"), 23 | TextColumn("[bold blue]{task.description}"), 24 | TimeElapsedColumn(), 25 | transient=True, 26 | ) 27 | return self._progress 28 | -------------------------------------------------------------------------------- /reddacted/utils/report.py: -------------------------------------------------------------------------------- 1 | """ 2 | Report Generator Module 3 | 4 | This module provides functions to generate analysis reports. 5 | It extracts the report header and output file generation logic from ResultsScreen, 6 | allowing for a single-call report generation. 7 | """ 8 | 9 | from typing import List, Dict, Any, Optional, TYPE_CHECKING 10 | 11 | if TYPE_CHECKING: 12 | from dataclasses import dataclass 13 | 14 | @dataclass 15 | class AnalysisResult: 16 | """Type hint for analysis result structure""" 17 | 18 | comment_id: str 19 | sentiment_score: float 20 | sentiment_emoji: str 21 | pii_risk_score: float 22 | pii_matches: List[Any] 23 | text: str 24 | upvotes: int 25 | downvotes: int 26 | llm_risk_score: float 27 | llm_findings: Optional[Dict[str, Any]] 28 | 29 | 30 | def write_report_header( 31 | target, url: str, overall_score: float, overall_sentiment: str, num_comments: int 32 | ) -> None: 33 | """ 34 | Writes the header section of the analysis report. 35 | """ 36 | target.write(f"# Analysis Report for '{url}'\n\n") 37 | target.write(f"- **Overall Sentiment Score**: {overall_score:.2f}\n") 38 | target.write(f"- **Overall Sentiment**: {overall_sentiment}\n") 39 | target.write(f"- **Comments Analyzed**: {num_comments}\n\n") 40 | target.write("---\n\n") 41 | 42 | 43 | __all__ = [ 44 | "generate_analysis_report", 45 | "should_show_result", 46 | "format_llm_detail", 47 | ] 48 | 49 | 50 | def should_show_result(result: "AnalysisResult", pii_only: bool = False) -> bool: 51 | """ 52 | Determines if a result should be shown based on PII detection settings. 53 | """ 54 | if not pii_only: 55 | return True 56 | has_pattern_pii = result.pii_risk_score > 0.0 57 | has_llm_pii = ( 58 | result.llm_findings is not None 59 | and isinstance(result.llm_findings, dict) 60 | and result.llm_findings.get("has_pii", False) 61 | and result.llm_findings.get("confidence", 0.0) > 0.0 62 | ) 63 | return has_pattern_pii or has_llm_pii 64 | 65 | 66 | def format_llm_detail(detail: Any, app=None) -> str: 67 | """Formats LLM detail information.""" 68 | if isinstance(detail, dict): 69 | formatted = ( 70 | f"{detail.get('type', 'Finding')}: {detail.get('example', 'N/A')}" 71 | or f"{detail.get('finding', 'N/A')}: {detail.get('reasoning', '')}" 72 | ) 73 | return formatted.replace('\n', ' ') # Replace newlines with spaces 74 | return str(detail) 75 | 76 | 77 | def write_comment_details(target, result: "AnalysisResult", index: int) -> None: 78 | """ 79 | Writes detailed analysis for a single comment. 80 | """ 81 | target.write(f"## Comment {index}\n\n") 82 | target.write(f"**Text**: {result.text}\n\n") 83 | target.write(f"- Sentiment Score: `{result.sentiment_score:.2f}` {result.sentiment_emoji}\n") 84 | target.write(f"- PII Risk Score: `{result.pii_risk_score:.2f}`\n") 85 | target.write(f"- Votes: ⬆️ `{result.upvotes}` ⬇️ `{result.downvotes}`\n") 86 | target.write(f"- Comment ID: `{result.comment_id}`\n\n") 87 | if result.pii_matches: 88 | target.write("### Pattern-based PII Detected\n") 89 | for pii in result.pii_matches: 90 | target.write(f"- **{pii.type}** (confidence: {pii.confidence:.2f})\n") 91 | target.write("\n") 92 | if result.llm_findings: 93 | target.write("### LLM Privacy Analysis\n") 94 | target.write(f"- **Risk Score**: `{result.llm_risk_score:.2f}`\n") 95 | if isinstance(result.llm_findings, dict): 96 | target.write( 97 | f"- **PII Detected**: {'Yes' if result.llm_findings.get('has_pii') else 'No'}\n" 98 | ) 99 | if details := result.llm_findings.get("details"): 100 | target.write("\n#### Findings\n") 101 | for detail in details: 102 | target.write(f"- {format_llm_detail(detail)}\n") 103 | if reasoning := result.llm_findings.get("reasoning"): 104 | target.write(f"\n#### Reasoning\n{reasoning}\n") 105 | target.write("\n") 106 | target.write("---\n\n") 107 | 108 | 109 | def write_summary_section( 110 | target, 111 | total_comments: int, 112 | sentiment_scores: List[float], 113 | max_risk_score: float, 114 | riskiest_comment: str, 115 | total_pii_comments: int = 0, 116 | total_llm_pii_comments: int = 0, 117 | ) -> None: 118 | """ 119 | Writes the summary section of the analysis report. 120 | """ 121 | average_sentiment = sum(sentiment_scores) / len(sentiment_scores) if sentiment_scores else 0.0 122 | target.write("\n# Summary\n\n") 123 | target.write(f"- Total Comments Analyzed: {total_comments}\n") 124 | target.write( 125 | f"- Comments with PII Detected: {total_pii_comments} ({total_pii_comments/total_comments:.1%})\n" 126 | ) 127 | target.write( 128 | f"- Comments with LLM Privacy Risks: {total_llm_pii_comments} ({total_llm_pii_comments/total_comments:.1%})\n" 129 | ) 130 | target.write(f"- Average Sentiment Score: {average_sentiment:.2f}\n") 131 | target.write(f"- Highest PII Risk Score: {max_risk_score:.2f}\n") 132 | if riskiest_comment: 133 | target.write(f"- Riskiest Comment Preview: '{riskiest_comment}'\n") 134 | target.write("✅ Analysis complete\n") 135 | 136 | 137 | def generate_analysis_report( 138 | filename: str, 139 | comments: List[Dict[str, Any]], 140 | url: str, 141 | results: List["AnalysisResult"], 142 | overall_score: float, 143 | overall_sentiment: str, 144 | pii_only: bool = False, 145 | ) -> Dict[str, int]: 146 | """ 147 | Generates an analysis report by writing the header, comment details, and summary. 148 | 149 | This function encapsulates the report generation logic previously embedded 150 | in the ResultsScreen, thereby reducing file bloat. 151 | 152 | Returns: 153 | Dict containing statistics about the analysis (total_pii_comments, total_llm_pii_comments) 154 | """ 155 | try: 156 | sentiment_scores: List[float] = [] 157 | max_risk_score = 0.0 158 | riskiest_comment = "" 159 | total_pii_comments = 0 160 | total_llm_pii_comments = 0 161 | 162 | with open(filename, "w") as target: 163 | write_report_header(target, url, overall_score, overall_sentiment, len(comments)) 164 | 165 | for idx, result in enumerate(results, 1): 166 | if not should_show_result(result, pii_only): 167 | continue 168 | write_comment_details(target, result, idx) 169 | 170 | # Update statistics 171 | sentiment_scores.append(result.sentiment_score) 172 | if result.pii_risk_score > 0: 173 | total_pii_comments += 1 174 | if result.llm_risk_score > 0 or ( 175 | result.llm_findings and result.llm_findings.get("has_pii", False) 176 | ): 177 | total_llm_pii_comments += 1 178 | 179 | if result.pii_risk_score > max_risk_score: 180 | max_risk_score = result.pii_risk_score 181 | riskiest_comment = ( 182 | (result.text[:100] + "...") if len(result.text) > 100 else result.text 183 | ) 184 | 185 | write_summary_section( 186 | target, 187 | len(comments), 188 | sentiment_scores, 189 | max_risk_score, 190 | riskiest_comment, 191 | total_pii_comments, 192 | total_llm_pii_comments, 193 | ) 194 | 195 | print(f"Report generated successfully at {filename}") 196 | return { 197 | "total_pii_comments": total_pii_comments, 198 | "total_llm_pii_comments": total_llm_pii_comments, 199 | } 200 | except Exception as e: 201 | print(f"Error generating report: {e}") 202 | raise 203 | -------------------------------------------------------------------------------- /reddacted/utils/tables.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | from itertools import zip_longest 3 | 4 | from rich.table import Table 5 | from rich.text import Text 6 | 7 | from reddacted.utils.base import BaseFormatter 8 | from reddacted.utils.analysis import AnalysisResult 9 | 10 | 11 | class TableFormatter(BaseFormatter): 12 | """Handles creation and formatting of tables.""" 13 | 14 | def generate_summary_table(self, filtered_results: List[AnalysisResult]) -> Table: 15 | """Generates a summary table with selection indicators.""" 16 | table = Table(header_style="bold magenta", box=None, padding=(0, 1), collapse_padding=True) 17 | table.add_column("Risk", justify="center", style="bold", width=10) 18 | table.add_column("Sentiment", justify="center", width=15) 19 | table.add_column("Comment Preview", justify="center", width=75) 20 | table.add_column("Votes", justify="center", width=10) 21 | table.add_column("ID", justify="center", width=10) 22 | 23 | for result in filtered_results: 24 | risk_style = self._get_risk_style(result.pii_risk_score) 25 | risk_text = Text(f"{result.pii_risk_score:.0%}", style=risk_style) 26 | permalink = f"https://reddit.com{result.permalink}" 27 | preview = (result.text[:67] + "...") if len(result.text) > 70 else result.text 28 | preview = f"[link={permalink}]{preview}[/link]" 29 | 30 | vote_display = ( 31 | f"[green]⬆️ {result.upvotes:>3}[/]" 32 | if result.upvotes > result.downvotes 33 | else ( 34 | f"[red]⬇️ {result.downvotes:>3}[/]" 35 | if result.downvotes > result.upvotes 36 | else f"[dim]0[/]" 37 | ) 38 | ) 39 | 40 | table.add_row( 41 | risk_text, 42 | Text(f"{result.sentiment_emoji} {result.sentiment_score:.2f}"), 43 | preview, 44 | vote_display, 45 | result.comment_id, 46 | ) 47 | 48 | return table 49 | 50 | def create_features_table( 51 | self, 52 | auth_enabled: bool, 53 | pii_enabled: bool, 54 | llm_config: Optional[Dict[str, Any]], 55 | pii_only: bool, 56 | limit: int, 57 | sort: str, 58 | ) -> Table: 59 | """Creates a table displaying the features configuration.""" 60 | features_table = Table( 61 | show_header=False, box=None, padding=(0, 2), collapse_padding=True, expand=True 62 | ) 63 | features_table.add_column("Left", ratio=1, justify="left") 64 | features_table.add_column("Right", ratio=1, justify="left") 65 | 66 | # Define all config items 67 | config_items = [ 68 | ("🔐 Authentication", self._format_status(auth_enabled)), 69 | ("🔍 PII Detection", self._format_status(pii_enabled)), 70 | ( 71 | "🤖 LLM Analysis", 72 | ( 73 | Text(llm_config["model"], style="green") 74 | if llm_config 75 | else self._format_status(False) 76 | ), 77 | ), 78 | ("🎯 PII-Only Filter", self._format_status(pii_only, "Active", "Inactive")), 79 | ("📊 Comment Limit", Text(f"{limit}" if limit else "Unlimited", style="cyan")), 80 | ("📑 Sort Preference", Text(f"{sort}" if sort else "New", style="cyan")), 81 | ] 82 | 83 | # Split items into two columns 84 | mid_point = (len(config_items) + 1) // 2 85 | left_items = config_items[:mid_point] 86 | right_items = config_items[mid_point:] 87 | 88 | # Create formatted text for each column 89 | for left, right in zip_longest(left_items, right_items, fillvalue=None): 90 | left_text = Text.assemble(f"{left[0]}: ", left[1]) if left else Text("") 91 | right_text = Text.assemble(f"{right[0]}: ", right[1]) if right else Text("") 92 | features_table.add_row(left_text, right_text) 93 | 94 | return features_table 95 | -------------------------------------------------------------------------------- /reddacted/version.py: -------------------------------------------------------------------------------- 1 | """Version information.""" 2 | 3 | __version__ = "0.2.5" 4 | version = __version__ 5 | -------------------------------------------------------------------------------- /release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Configuration - Version comes from version.py 5 | VERSION_FILE="reddacted/version.py" 6 | GITHUB_USER="taylorwilsdon" 7 | REPO="reddacted" 8 | UPDATE_DEPS_ONLY=false 9 | 10 | # Parse arguments 11 | while [[ "$#" -gt 0 ]]; do 12 | case $1 in 13 | --update-deps-only) UPDATE_DEPS_ONLY=true ;; 14 | *) echo "Unknown parameter: $1"; exit 1 ;; 15 | esac 16 | shift 17 | done 18 | 19 | # Colors for output 20 | GREEN='\033[0;32m' 21 | YELLOW='\033[1;33m' 22 | NC='\033[0m' # No Color 23 | 24 | # Extract version from version.py file 25 | VERSION=$(grep -o '__version__ = "[^"]*"' "$VERSION_FILE" | cut -d'"' -f2) 26 | echo -e "${YELLOW}Starting release process for reddacted v${VERSION}${NC}" 27 | 28 | # 1. Check for required tools 29 | if ! command -v jq &> /dev/null; then 30 | echo -e "${YELLOW}jq not found. Please install it to update dependencies.${NC}" 31 | echo -e "${YELLOW}On macOS: brew install jq${NC}" 32 | exit 1 33 | fi 34 | 35 | if [ "$UPDATE_DEPS_ONLY" = false ]; then 36 | # 2. Ensure we're on the main branch 37 | git checkout main 38 | # Skip git pull if no upstream is configured 39 | git rev-parse --abbrev-ref @{upstream} >/dev/null 2>&1 && git pull || echo "No upstream branch configured, skipping pull" 40 | 41 | # 3. Clean build artifacts 42 | echo -e "${YELLOW}Cleaning previous build artifacts...${NC}" 43 | rm -rf dist/ build/ *.egg-info/ 44 | 45 | # 4. Build the package with UV (both sdist and wheel) 46 | echo -e "${YELLOW}Building package with UV...${NC}" 47 | uv build --sdist --wheel || { 48 | echo -e "${YELLOW}Failed to build package${NC}" 49 | exit 1 50 | } 51 | 52 | # 5. Create and push git tag 53 | echo -e "${YELLOW}Creating and pushing git tag v${VERSION}...${NC}" 54 | # Improved tag handling - check both local and remote tags 55 | LOCAL_TAG_EXISTS=$(git tag -l "v${VERSION}") 56 | REMOTE_TAG_EXISTS=$(git ls-remote --tags origin "refs/tags/v${VERSION}" | wc -l) 57 | 58 | if [ -n "$LOCAL_TAG_EXISTS" ]; then 59 | echo -e "${YELLOW}Local tag v${VERSION} already exists${NC}" 60 | else 61 | git tag -a "v${VERSION}" -m "Release v${VERSION}" 62 | echo -e "${YELLOW}Created local tag v${VERSION}${NC}" 63 | fi 64 | 65 | # Only push if tag doesn't exist on remote 66 | if [ "$REMOTE_TAG_EXISTS" -eq 0 ]; then 67 | echo -e "${YELLOW}Pushing tag to remote...${NC}" 68 | git push origin "v${VERSION}" || echo "Failed to push tag, continuing anyway" 69 | else 70 | echo -e "${YELLOW}Remote tag v${VERSION} already exists, skipping push${NC}" 71 | fi 72 | 73 | # 6. Create GitHub release 74 | echo -e "${YELLOW}Creating GitHub release...${NC}" 75 | # Check if gh command is available 76 | if ! command -v gh &> /dev/null; then 77 | echo -e "${YELLOW}GitHub CLI not found. Please install it to create releases.${NC}" 78 | echo -e "${YELLOW}Skipping GitHub release creation.${NC}" 79 | else 80 | # Check if release already exists 81 | if gh release view "v${VERSION}" &>/dev/null; then 82 | echo -e "${YELLOW}Release v${VERSION} already exists, skipping creation${NC}" 83 | else 84 | gh release create "v${VERSION}" \ 85 | --title "reddacted v${VERSION}" \ 86 | --notes "Release v${VERSION}" \ 87 | ./dist/* 88 | fi 89 | fi 90 | 91 | # 7. Download the tarball to calculate SHA 92 | echo -e "${YELLOW}Downloading tarball to calculate SHA...${NC}" 93 | TARBALL_PATH="/tmp/${REPO}-${VERSION}.tar.gz" 94 | if curl -sL --fail "https://github.com/${GITHUB_USER}/${REPO}/archive/refs/tags/v${VERSION}.tar.gz" -o "${TARBALL_PATH}"; then 95 | SHA=$(shasum -a 256 "${TARBALL_PATH}" | cut -d ' ' -f 1) 96 | 97 | # Generate new Homebrew formula 98 | echo -e "${YELLOW}Generating new Homebrew formula...${NC}" 99 | if ! python3 scripts/homebrew_formula_generator.py "${VERSION}"; then 100 | echo -e "${YELLOW}Failed to generate Homebrew formula${NC}" 101 | exit 1 102 | fi 103 | else 104 | echo -e "${YELLOW}Failed to download tarball, skipping SHA calculation and Homebrew formula update${NC}" 105 | fi 106 | 107 | # 8. Publish to PyPI if desired 108 | read -p "Do you want to publish to PyPI? (y/n) " -n 1 -r 109 | echo 110 | if [[ $REPLY =~ ^[Yy]$ ]] 111 | then 112 | echo -e "${YELLOW}Publishing to PyPI...${NC}" 113 | if ! uv publish; then 114 | echo -e "${YELLOW}Failed to publish to PyPI${NC}" 115 | exit 1 116 | fi 117 | fi 118 | fi 119 | 120 | # Ensure scripts directory exists and formula generator is executable 121 | if [ ! -d "scripts" ]; then 122 | echo -e "${YELLOW}Creating scripts directory...${NC}" 123 | mkdir -p scripts 124 | fi 125 | 126 | if [ ! -x "scripts/homebrew_formula_generator.py" ]; then 127 | echo -e "${YELLOW}Making formula generator executable...${NC}" 128 | chmod +x scripts/homebrew_formula_generator.py 129 | fi 130 | 131 | # 10. Instructions for Homebrew tap 132 | echo -e "${GREEN}Release v${VERSION} completed!${NC}" 133 | echo -e "${GREEN}To publish to Homebrew:${NC}" 134 | echo -e "1. Create a tap repository: github.com/${GITHUB_USER}/homebrew-tap" 135 | echo -e "2. Copy homebrew/reddacted.rb to your tap repository" 136 | echo -e "3. Users can then install with: brew install ${GITHUB_USER}/tap/reddacted" 137 | 138 | echo -e "${GREEN}Done!${NC}" 139 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pytest>=7.4.0 2 | black>=23.7.0 3 | mypy>=1.5.1 4 | ruff>=0.0.291 5 | build==1.2.2.post1 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies 2 | praw==7.7.1 3 | nltk==3.8.1 4 | requests==2.31.0 5 | six==1.16.0 6 | httpx==0.27.2 7 | openai==1.3.7 8 | rich==13.7.0 9 | textual==2.1.2 10 | # Development dependencies 11 | flake8==7.0.0 12 | build==1.0.3 13 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorwilsdon/reddacted/3efa6363aeebbf62befc73b7e5bcb7e5523738d2/tests/__init__.py -------------------------------------------------------------------------------- /tests/templates/reddit/article.json: -------------------------------------------------------------------------------- 1 | [{"kind": "Listing", "data": {"modhash": "", "dist": 1, "children": [{"kind": "t3", "data": {"approved_at_utc": null, "subreddit": "doge", "selftext": "", "user_reports": [], "saved": false, "mod_reason_title": null, "gilded": 0, "clicked": false, "title": "I drew this doge in 2013 during my first years of grad school. Students loved it.", "link_flair_richtext": [], "subreddit_name_prefixed": "r/doge", "hidden": false, "pwls": 6, "link_flair_css_class": null, "downs": 0, "thumbnail_height": 122, "top_awarded_type": null, "parent_whitelist_status": "all_ads", "hide_score": false, "name": "t3_l7zp94", "quarantine": false, "link_flair_text_color": "dark", "upvote_ratio": 0.98, "author_flair_background_color": null, "subreddit_type": "public", "ups": 128, "total_awards_received": 4, "media_embed": {"content": "<iframe class=\"embedly-embed\" src=\"https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX%2Fembed%3Fpub%3Dtrue%26ref%3Dhttps%253A%252F%252Fembed.ly%26w%3D900&display_name=Imgur&url=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX&image=https%3A%2F%2Fi.imgur.com%2FzCLUa41.jpg%3Ffb&key=2aa3c4d5f3de4f5b9120b660ad850dc9&type=text%2Fhtml&schema=imgur\" width=\"600\" height=\"567\" scrolling=\"no\" title=\"Imgur embed\" frameborder=\"0\" allow=\"autoplay; fullscreen\" allowfullscreen=\"true\"></iframe>", "width": 600, "scrolling": false, "height": 567}, "thumbnail_width": 140, "author_flair_template_id": null, "is_original_content": false, "author_fullname": "t2_bkqxw", "secure_media": {"oembed": {"provider_url": "http://imgur.com", "description": "Post with 0 views.", "title": "Imgur", "url": "https://imgur.com/a/X5hateX", "thumbnail_width": 600, "height": 567, "width": 600, "html": "<iframe class=\"embedly-embed\" src=\"https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX%2Fembed%3Fpub%3Dtrue%26ref%3Dhttps%253A%252F%252Fembed.ly%26w%3D900&display_name=Imgur&url=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX&image=https%3A%2F%2Fi.imgur.com%2FzCLUa41.jpg%3Ffb&key=2aa3c4d5f3de4f5b9120b660ad850dc9&type=text%2Fhtml&schema=imgur\" width=\"600\" height=\"567\" scrolling=\"no\" title=\"Imgur embed\" frameborder=\"0\" allow=\"autoplay; fullscreen\" allowfullscreen=\"true\"></iframe>", "version": "1.0", "provider_name": "Imgur", "thumbnail_url": "https://i.imgur.com/zCLUa41.jpg?fb", "type": "rich", "thumbnail_height": 315}, "type": "imgur.com"}, "is_reddit_media_domain": false, "is_meta": false, "category": null, "secure_media_embed": {"content": "<iframe class=\"embedly-embed\" src=\"https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX%2Fembed%3Fpub%3Dtrue%26ref%3Dhttps%253A%252F%252Fembed.ly%26w%3D900&display_name=Imgur&url=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX&image=https%3A%2F%2Fi.imgur.com%2FzCLUa41.jpg%3Ffb&key=2aa3c4d5f3de4f5b9120b660ad850dc9&type=text%2Fhtml&schema=imgur\" width=\"600\" height=\"567\" scrolling=\"no\" title=\"Imgur embed\" frameborder=\"0\" allow=\"autoplay; fullscreen\" allowfullscreen=\"true\"></iframe>", "width": 600, "scrolling": false, "media_domain_url": "https://www.redditmedia.com/mediaembed/l7zp94", "height": 567}, "link_flair_text": null, "can_mod_post": false, "score": 128, "approved_by": null, "is_created_from_ads_ui": false, "author_premium": false, "thumbnail": "https://b.thumbs.redditmedia.com/UH7uKNkjzWKyMsHqX1RULNQCo3LwW0M_-sZI0UjWG9I.jpg", "edited": false, "author_flair_css_class": null, "author_flair_richtext": [], "gildings": {"gid_1": 2}, "post_hint": "link", "content_categories": null, "is_self": false, "mod_note": null, "created": 1611972924.0, "link_flair_type": "text", "wls": 6, "removed_by_category": null, "banned_by": null, "author_flair_type": "text", "domain": "imgur.com", "allow_live_comments": false, "selftext_html": null, "likes": null, "suggested_sort": null, "banned_at_utc": null, "url_overridden_by_dest": "https://imgur.com/a/X5hateX", "view_count": null, "archived": false, "no_follow": false, "is_crosspostable": false, "pinned": false, "over_18": false, "preview": {"images": [{"source": {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?auto=webp&s=3928f490a160694f22f82f1628cadc2d50a9c5fb", "width": 2048, "height": 1798}, "resolutions": [{"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=108&crop=smart&auto=webp&s=880cfc8e4584d8070913af5aeb23adb1434ba516", "width": 108, "height": 94}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=216&crop=smart&auto=webp&s=e23d538fc6094fda00842acaa044b5ea2e5ac24a", "width": 216, "height": 189}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=320&crop=smart&auto=webp&s=afc5f79f3d8311fe385d53d2f3d65ea104054b8b", "width": 320, "height": 280}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=640&crop=smart&auto=webp&s=608c7ab721bb21583c7a507488e5169b60596fcd", "width": 640, "height": 561}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=960&crop=smart&auto=webp&s=9667eb9d3567734bfe54e20f6b51b964987a2d89", "width": 960, "height": 842}, {"url": "https://external-preview.redd.it/FQxPVy7RvIoViU-NIG1pGBw7Aix_ceh02S-1CDidJVM.jpg?width=1080&crop=smart&auto=webp&s=3a283c033e53a68c8260f2ee92036f632d5f7fc5", "width": 1080, "height": 948}], "variants": {}, "id": "kHHZ4X60_wNKr1dAD5CDwgWkoxS63LJ1qKaB_CzUJnE"}], "enabled": false}, "all_awardings": [{"giver_coin_reward": null, "subreddit_id": null, "is_new": false, "days_of_drip_extension": 0, "coin_price": 125, "id": "award_5f123e3d-4f48-42f4-9c11-e98b566d5897", "penny_donate": null, "coin_reward": 0, "icon_url": "https://i.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png", "days_of_premium": 0, "icon_height": 2048, "tiers_by_required_awardings": null, "resized_icons": [{"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=16&height=16&auto=webp&s=92932f465d58e4c16b12b6eac4ca07d27e3d11c0", "width": 16, "height": 16}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=32&height=32&auto=webp&s=d11484a208d68a318bf9d4fcf371171a1cb6a7ef", "width": 32, "height": 32}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=48&height=48&auto=webp&s=febdf28b6f39f7da7eb1365325b85e0bb49a9f63", "width": 48, "height": 48}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=64&height=64&auto=webp&s=b4406a2d88bf86fa3dc8a45aacf7e0c7bdccc4fb", "width": 64, "height": 64}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=128&height=128&auto=webp&s=19555b13e3e196b62eeb9160d1ac1d1b372dcb0b", "width": 128, "height": 128}], "icon_width": 2048, "static_icon_width": 2048, "start_date": null, "is_enabled": true, "awardings_required_to_grant_benefits": null, "description": "When you come across a feel-good thing.", "end_date": null, "subreddit_coin_reward": 0, "count": 1, "static_icon_height": 2048, "name": "Wholesome", "resized_static_icons": [{"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=16&height=16&auto=webp&s=92932f465d58e4c16b12b6eac4ca07d27e3d11c0", "width": 16, "height": 16}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=32&height=32&auto=webp&s=d11484a208d68a318bf9d4fcf371171a1cb6a7ef", "width": 32, "height": 32}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=48&height=48&auto=webp&s=febdf28b6f39f7da7eb1365325b85e0bb49a9f63", "width": 48, "height": 48}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=64&height=64&auto=webp&s=b4406a2d88bf86fa3dc8a45aacf7e0c7bdccc4fb", "width": 64, "height": 64}, {"url": "https://preview.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png?width=128&height=128&auto=webp&s=19555b13e3e196b62eeb9160d1ac1d1b372dcb0b", "width": 128, "height": 128}], "icon_format": null, "award_sub_type": "GLOBAL", "penny_price": null, "award_type": "global", "static_icon_url": "https://i.redd.it/award_images/t5_22cerq/5izbv4fn0md41_Wholesome.png"}, {"giver_coin_reward": null, "subreddit_id": null, "is_new": false, "days_of_drip_extension": 0, "coin_price": 100, "id": "gid_1", "penny_donate": null, "coin_reward": 0, "icon_url": "https://www.redditstatic.com/gold/awards/icon/silver_512.png", "days_of_premium": 0, "icon_height": 512, "tiers_by_required_awardings": null, "resized_icons": [{"url": "https://www.redditstatic.com/gold/awards/icon/silver_16.png", "width": 16, "height": 16}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_32.png", "width": 32, "height": 32}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_48.png", "width": 48, "height": 48}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_64.png", "width": 64, "height": 64}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_128.png", "width": 128, "height": 128}], "icon_width": 512, "static_icon_width": 512, "start_date": null, "is_enabled": true, "awardings_required_to_grant_benefits": null, "description": "Shows the Silver Award... and that's it.", "end_date": null, "subreddit_coin_reward": 0, "count": 2, "static_icon_height": 512, "name": "Silver", "resized_static_icons": [{"url": "https://www.redditstatic.com/gold/awards/icon/silver_16.png", "width": 16, "height": 16}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_32.png", "width": 32, "height": 32}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_48.png", "width": 48, "height": 48}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_64.png", "width": 64, "height": 64}, {"url": "https://www.redditstatic.com/gold/awards/icon/silver_128.png", "width": 128, "height": 128}], "icon_format": null, "award_sub_type": "GLOBAL", "penny_price": null, "award_type": "global", "static_icon_url": "https://www.redditstatic.com/gold/awards/icon/silver_512.png"}, {"giver_coin_reward": 0, "subreddit_id": null, "is_new": false, "days_of_drip_extension": 0, "coin_price": 80, "id": "award_8352bdff-3e03-4189-8a08-82501dd8f835", "penny_donate": 0, "coin_reward": 0, "icon_url": "https://i.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png", "days_of_premium": 0, "icon_height": 2048, "tiers_by_required_awardings": null, "resized_icons": [{"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=16&height=16&auto=webp&s=73a23bf7f08b633508dedf457f2704c522b94a04", "width": 16, "height": 16}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=32&height=32&auto=webp&s=50f2f16e71d2929e3d7275060af3ad6b851dbfb1", "width": 32, "height": 32}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=48&height=48&auto=webp&s=ca487311563425e195699a4d7e4c57a98cbfde8b", "width": 48, "height": 48}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=64&height=64&auto=webp&s=7b4eedcffb1c09a826e7837532c52979760f1d2b", "width": 64, "height": 64}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/ks45ij6w05f61_oldHugz.png?width=128&height=128&auto=webp&s=e4d5ab237eb71a9f02bb3bf9ad5ee43741918d6c", "width": 128, "height": 128}], "icon_width": 2048, "static_icon_width": 2048, "start_date": null, "is_enabled": true, "awardings_required_to_grant_benefits": null, "description": "Everything is better with a good hug", "end_date": null, "subreddit_coin_reward": 0, "count": 1, "static_icon_height": 2048, "name": "Hugz", "resized_static_icons": [{"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=16&height=16&auto=webp&s=69997ace3ef4ffc099b81d774c2c8f1530602875", "width": 16, "height": 16}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=32&height=32&auto=webp&s=e9519d1999ef9dce5c8a9f59369cb92f52d95319", "width": 32, "height": 32}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=48&height=48&auto=webp&s=f076c6434fb2d2f9075991810fd845c40fa73fc6", "width": 48, "height": 48}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=64&height=64&auto=webp&s=85527145e0c4b754306a30df29e584fd16187636", "width": 64, "height": 64}, {"url": "https://preview.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png?width=128&height=128&auto=webp&s=b8843cdf82c3b741d7af057c14076dcd2621e811", "width": 128, "height": 128}], "icon_format": "PNG", "award_sub_type": "GLOBAL", "penny_price": 0, "award_type": "global", "static_icon_url": "https://i.redd.it/award_images/t5_q0gj4/fpm0r5ryq1361_PolarHugs.png"}], "awarders": [], "media_only": false, "can_gild": false, "spoiler": false, "locked": false, "author_flair_text": null, "treatment_tags": [], "visited": false, "removed_by": null, "num_reports": null, "distinguished": null, "subreddit_id": "t5_2vzn6", "mod_reason_by": null, "removal_reason": null, "link_flair_background_color": "", "id": "l7zp94", "is_robot_indexable": true, "num_duplicates": 1, "report_reasons": null, "author": "alexandermikh", "discussion_type": null, "num_comments": 5, "send_replies": true, "media": {"oembed": {"provider_url": "http://imgur.com", "description": "Post with 0 views.", "title": "Imgur", "url": "https://imgur.com/a/X5hateX", "thumbnail_width": 600, "height": 567, "width": 600, "html": "<iframe class=\"embedly-embed\" src=\"https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX%2Fembed%3Fpub%3Dtrue%26ref%3Dhttps%253A%252F%252Fembed.ly%26w%3D900&display_name=Imgur&url=https%3A%2F%2Fimgur.com%2Fa%2FX5hateX&image=https%3A%2F%2Fi.imgur.com%2FzCLUa41.jpg%3Ffb&key=2aa3c4d5f3de4f5b9120b660ad850dc9&type=text%2Fhtml&schema=imgur\" width=\"600\" height=\"567\" scrolling=\"no\" title=\"Imgur embed\" frameborder=\"0\" allow=\"autoplay; fullscreen\" allowfullscreen=\"true\"></iframe>", "version": "1.0", "provider_name": "Imgur", "thumbnail_url": "https://i.imgur.com/zCLUa41.jpg?fb", "type": "rich", "thumbnail_height": 315}, "type": "imgur.com"}, "contest_mode": false, "author_patreon_flair": false, "author_flair_text_color": null, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/", "whitelist_status": "all_ads", "stickied": false, "url": "https://imgur.com/a/X5hateX", "subreddit_subscribers": 73538, "created_utc": 1611944124.0, "num_crossposts": 0, "mod_reports": [], "is_video": false}}], "after": null, "before": null}}, {"kind": "Listing", "data": {"modhash": "", "dist": null, "children": [{"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 4, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": "", "user_reports": [], "saved": false, "id": "glai61b", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": false, "author": "L103131", "can_mod_post": false, "send_replies": true, "parent_id": "t3_l7zp94", "score": 4, "author_fullname": "t2_u7ppvf1", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "body": "Looks sick!", "edited": false, "downs": 0, "author_flair_css_class": null, "is_submitter": false, "collapsed": false, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "<div class=\"md\"><p>Looks sick!</p>\n</div>", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": null, "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/glai61b/", "num_reports": null, "locked": false, "name": "t1_glai61b", "created": 1611983461.0, "subreddit": "doge", "author_flair_text": null, "treatment_tags": [], "created_utc": 1611954661.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 0, "author_flair_background_color": null, "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}, {"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 1, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": "", "user_reports": [], "saved": false, "id": "glc7p50", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": true, "author": "ManBets", "can_mod_post": false, "send_replies": true, "parent_id": "t3_l7zp94", "score": 1, "author_fullname": "t2_a1nnt5hp", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "body": "#DOGE HOLD IT", "edited": false, "downs": 0, "author_flair_css_class": null, "is_submitter": false, "collapsed": false, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "<div class=\"md\"><h1>DOGE HOLD IT</h1>\n</div>", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": null, "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/glc7p50/", "num_reports": null, "locked": false, "name": "t1_glc7p50", "created": 1612013001.0, "subreddit": "doge", "author_flair_text": null, "treatment_tags": [], "created_utc": 1611984201.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 0, "author_flair_background_color": null, "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}, {"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 1, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": "", "user_reports": [], "saved": false, "id": "gladezi", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": true, "author": "anormalusername1", "can_mod_post": false, "send_replies": true, "parent_id": "t3_l7zp94", "score": 1, "author_fullname": "t2_i86lt", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "body": "10/10 very art such picasso wow", "edited": false, "downs": 0, "author_flair_css_class": "mod", "is_submitter": false, "collapsed": false, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "<div class=\"md\"><p>10/10 very art such picasso wow</p>\n</div>", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": "dark", "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gladezi/", "num_reports": null, "locked": false, "name": "t1_gladezi", "created": 1611981922.0, "subreddit": "doge", "author_flair_text": "wow such mod", "treatment_tags": [], "created_utc": 1611953122.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 0, "author_flair_background_color": "", "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}, {"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 2, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": {"kind": "Listing", "data": {"modhash": "", "dist": null, "children": [{"kind": "t1", "data": {"total_awards_received": 0, "approved_at_utc": null, "comment_type": null, "awarders": [], "mod_reason_by": null, "banned_by": null, "ups": 1, "author_flair_type": "text", "removal_reason": null, "link_id": "t3_l7zp94", "author_flair_template_id": null, "likes": null, "replies": "", "user_reports": [], "saved": false, "id": "gsdp7lq", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": true, "author": "alexandermikh", "can_mod_post": false, "send_replies": true, "parent_id": "t1_gsdozky", "score": 1, "author_fullname": "t2_bkqxw", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "collapsed": false, "body": "Thanks <3", "edited": false, "author_flair_css_class": null, "is_submitter": true, "downs": 0, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "<div class=\"md\"><p>Thanks &lt;3</p>\n</div>", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": null, "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gsdp7lq/", "num_reports": null, "locked": false, "name": "t1_gsdp7lq", "created": 1616845579.0, "subreddit": "doge", "author_flair_text": null, "treatment_tags": [], "created_utc": 1616816779.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 1, "author_flair_background_color": null, "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}], "after": null, "before": null}}, "user_reports": [], "saved": false, "id": "gsdozky", "banned_at_utc": null, "mod_reason_title": null, "gilded": 0, "archived": false, "no_follow": true, "author": "The_disasterpeice", "can_mod_post": false, "send_replies": true, "parent_id": "t3_l7zp94", "score": 2, "author_fullname": "t2_93ke41tz", "report_reasons": null, "approved_by": null, "all_awardings": [], "subreddit_id": "t5_2vzn6", "body": "Much drawing, very sketch. Ps I gave you the silver award.", "edited": false, "downs": 0, "author_flair_css_class": null, "is_submitter": false, "collapsed": false, "author_flair_richtext": [], "author_patreon_flair": false, "body_html": "<div class=\"md\"><p>Much drawing, very sketch. Ps I gave you the silver award.</p>\n</div>", "gildings": {}, "collapsed_reason": null, "associated_award": null, "stickied": false, "author_premium": false, "subreddit_type": "public", "can_gild": true, "top_awarded_type": null, "author_flair_text_color": null, "score_hidden": false, "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gsdozky/", "num_reports": null, "locked": false, "name": "t1_gsdozky", "created": 1616845451.0, "subreddit": "doge", "author_flair_text": null, "treatment_tags": [], "created_utc": 1616816651.0, "subreddit_name_prefixed": "r/doge", "controversiality": 0, "depth": 0, "author_flair_background_color": null, "collapsed_because_crowd_control": null, "mod_reports": [], "mod_note": null, "distinguished": null}}], "after": null, "before": null}}] -------------------------------------------------------------------------------- /tests/test_llm.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pytest 3 | from unittest.mock import MagicMock, patch, AsyncMock 4 | 5 | from reddacted.llm_detector import LLMDetector 6 | 7 | import asyncio 8 | from typing import Dict, Any, List 9 | 10 | SAMPLE_RESPONSE = { 11 | "has_pii": True, 12 | "confidence": 0.85, 13 | "details": ["Mentions specific location 'Miami Springs'"], 14 | "reasoning": "Location mention could help identify author's residence", 15 | "risk_factors": ["geographical specificity", "local slang reference"], 16 | } 17 | 18 | TEST_CASES = [ 19 | { 20 | "text": "My phone number is 555-0123", 21 | "response": { 22 | "has_pii": True, 23 | "confidence": 0.95, 24 | "details": ["Contains phone number"], 25 | "risk_factors": ["contact_info"], 26 | "reasoning": "Phone number present", 27 | }, 28 | }, 29 | { 30 | "text": "I live at 123 Main St, Springfield", 31 | "response": { 32 | "has_pii": True, 33 | "confidence": 0.90, 34 | "details": ["Contains address"], 35 | "risk_factors": ["location"], 36 | "reasoning": "Street address present", 37 | }, 38 | }, 39 | { 40 | "text": "Just a regular comment about cats", 41 | "response": { 42 | "has_pii": False, 43 | "confidence": 0.1, 44 | "details": [], 45 | "risk_factors": [], 46 | "reasoning": "No PII detected", 47 | }, 48 | }, 49 | ] 50 | 51 | 52 | @pytest.fixture 53 | def mock_responses() -> List[Dict[str, Any]]: 54 | """Fixture providing a list of test responses""" 55 | return [case["response"] for case in TEST_CASES] 56 | 57 | 58 | @pytest.fixture 59 | def mock_texts() -> List[str]: 60 | """Fixture providing a list of test texts""" 61 | return [case["text"] for case in TEST_CASES] 62 | 63 | 64 | @pytest.fixture 65 | def mock_api_error(): 66 | """Fixture providing a mock API error""" 67 | return Exception("API Error: Rate limit exceeded") 68 | 69 | 70 | @pytest.fixture 71 | def mock_openai(): 72 | """Fixture to provide mocked OpenAI client""" 73 | with patch("openai.AsyncOpenAI") as mock: 74 | mock_client = MagicMock() 75 | mock_client.chat = MagicMock() 76 | mock.return_value = mock_client 77 | yield mock 78 | 79 | 80 | @pytest.fixture 81 | def mock_completion(): 82 | """Fixture to provide mocked completion response""" 83 | completion = MagicMock() 84 | message = MagicMock() 85 | message.content = json.dumps(SAMPLE_RESPONSE) 86 | choice = MagicMock() 87 | choice.message = message 88 | completion.choices = [choice] 89 | return completion 90 | 91 | 92 | class TestLLMDetector: 93 | """Test suite for LLMDetector class""" 94 | 95 | @pytest.fixture(autouse=True) 96 | def setup_method(self): 97 | """Setup method run before each test""" 98 | self.detector = LLMDetector(api_key="sk-test") 99 | 100 | @pytest.mark.asyncio 101 | async def test_analyze_text_success(self, mock_openai, mock_completion): 102 | """Test successful PII analysis with valid response""" 103 | mock_openai.return_value.chat.completions.create = AsyncMock(return_value=mock_completion) 104 | 105 | detector = LLMDetector(api_key="sk-test") 106 | risk_score, details = await detector.analyze_text( 107 | "RaunchyRaccoon that looks a lot like Miami Springs!" 108 | ) 109 | 110 | assert risk_score == 0.85 111 | assert details["details"] == SAMPLE_RESPONSE["details"] 112 | assert details["risk_factors"] == SAMPLE_RESPONSE["risk_factors"] 113 | mock_openai.assert_called_once_with(api_key="sk-test") 114 | 115 | @pytest.mark.asyncio 116 | async def test_analyze_invalid_key(self, mock_openai): 117 | """Test authentication error handling""" 118 | mock_openai.side_effect = Exception("Invalid API key") 119 | 120 | risk_score, details = await self.detector.analyze_text("Sample text") 121 | 122 | assert risk_score == 0.0 123 | assert "error" in details 124 | assert "Invalid API key" in details["error"] 125 | 126 | @pytest.mark.asyncio 127 | async def test_rate_limit_handling(self, mock_openai, mock_api_error): 128 | """Test handling of rate limit errors""" 129 | mock_openai.side_effect = mock_api_error 130 | 131 | risk_score, details = await self.detector.analyze_text("Test text") 132 | 133 | assert risk_score == 0.0 134 | assert "error" in details 135 | assert "Rate limit" in details["error"] 136 | 137 | @pytest.mark.asyncio 138 | async def test_empty_text_handling(self): 139 | """Test handling of empty text input""" 140 | risk_score, details = await self.detector.analyze_text("") 141 | 142 | assert risk_score == 0.0 143 | assert "error" in details 144 | assert isinstance(details["error"], str) 145 | 146 | @pytest.mark.asyncio 147 | async def test_long_text_handling(self): 148 | """Test handling of very long text input""" 149 | # Create text that exceeds token limit 150 | long_text = "test " * 5000 151 | 152 | risk_score, details = await self.detector.analyze_text(long_text) 153 | 154 | assert risk_score == 0.0 155 | assert "error" in details 156 | 157 | @pytest.mark.asyncio 158 | async def test_batch_concurrent_processing(self, mock_openai, mock_responses, mock_texts): 159 | """Test concurrent processing of batch texts""" 160 | mock_completions = [] 161 | for response in mock_responses: 162 | completion = MagicMock() 163 | message = MagicMock() 164 | message.content = json.dumps(response) 165 | choice = MagicMock() 166 | choice.message = message 167 | completion.choices = [choice] 168 | mock_completions.append(completion) 169 | 170 | mock_openai.return_value.chat.completions.create = AsyncMock(side_effect=mock_completions) 171 | 172 | results = await self.detector.analyze_batch(mock_texts) 173 | 174 | assert len(results) == len(mock_texts) 175 | assert all(isinstance(score, float) for score, _ in results) 176 | assert all(isinstance(detail, dict) for _, detail in results) 177 | 178 | @pytest.mark.asyncio 179 | async def test_batch_error_handling(self, mock_openai, mock_texts, mock_api_error): 180 | """Test error handling in batch processing""" 181 | mock_openai.return_value.chat.completions.create = AsyncMock(side_effect=mock_api_error) 182 | 183 | results = await self.detector.analyze_batch(mock_texts) 184 | 185 | assert len(results) == len(mock_texts) 186 | assert all(score == 0.0 for score, _ in results) 187 | assert all("error" in detail for _, detail in results) 188 | 189 | @pytest.mark.asyncio 190 | async def test_analyze_batch(self, mock_openai): 191 | """Test batch processing of multiple texts""" 192 | # Configure different mock responses for each text 193 | responses = [ 194 | { 195 | "has_pii": True, 196 | "confidence": 0.9, 197 | "details": ["Contains location"], 198 | "risk_factors": ["location"], 199 | }, 200 | { 201 | "has_pii": True, 202 | "confidence": 0.8, 203 | "details": ["Contains phone number"], 204 | "risk_factors": ["contact"], 205 | }, 206 | {"has_pii": False, "confidence": 0.0, "details": [], "risk_factors": []}, 207 | ] 208 | 209 | async def mock_completion(*args, **kwargs): 210 | # Get the input text from the API call 211 | messages = kwargs.get("messages", []) 212 | text_index = len(mock_completion.call_count) 213 | mock_completion.call_count.append(1) # Track number of calls 214 | 215 | # Create mock response 216 | mock_msg = MagicMock() 217 | mock_msg.content = json.dumps(responses[text_index]) 218 | mock_choice = MagicMock() 219 | mock_choice.message = mock_msg 220 | mock_resp = MagicMock() 221 | mock_resp.choices = [mock_choice] 222 | return mock_resp 223 | 224 | # Initialize call counter 225 | mock_completion.call_count = [] 226 | mock_openai.return_value.chat.completions.create = AsyncMock(side_effect=mock_completion) 227 | 228 | detector = LLMDetector(api_key="sk-test") 229 | texts = ["123 Main St, New York", "Call me at 555-0123", "Just a regular text"] 230 | 231 | results = await detector.analyze_batch(texts) 232 | 233 | # Verify results 234 | assert len(results) == len(texts) 235 | 236 | # Check first result (location) 237 | assert results[0][0] == 0.9 238 | assert results[0][1]["risk_factors"] == ["location"] 239 | 240 | # Check second result (phone) 241 | assert results[1][0] == 0.8 242 | assert results[1][1]["risk_factors"] == ["contact"] 243 | 244 | # Check third result (clean) 245 | assert results[2][0] == 0.0 246 | assert results[2][1]["risk_factors"] == [] 247 | 248 | # Verify API setup 249 | mock_openai.assert_called_once_with(api_key="sk-test") 250 | 251 | @pytest.mark.asyncio 252 | async def test_invalid_json_response(self, mock_openai): 253 | """Test handling of malformed LLM response""" 254 | # Create mock with invalid JSON response 255 | mock_completion = MagicMock() 256 | message = MagicMock() 257 | message.content = "Not valid JSON" 258 | mock_completion.choices = [MagicMock(message=message)] 259 | mock_openai.return_value.chat.completions.create = AsyncMock(return_value=mock_completion) 260 | 261 | risk_score, details = await self.detector.analyze_text("Sample text") 262 | 263 | assert risk_score == 0.0 264 | assert "error" in details 265 | assert "Expecting value" in details["error"] 266 | -------------------------------------------------------------------------------- /tests/test_scraper.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import unittest 3 | import json 4 | 5 | from unittest.mock import MagicMock 6 | 7 | from reddacted.api import scraper 8 | 9 | 10 | class ScraperPostiveTestCases(unittest.TestCase): 11 | 12 | def setUp(self): 13 | super(ScraperPostiveTestCases, self).setUp() 14 | self.maxDiff = None 15 | 16 | def tearDown(self): 17 | super(ScraperPostiveTestCases, self).tearDown() 18 | 19 | def test_parse_user(self): 20 | # Mocking the Request for obtaining json from Reddit 21 | user_data = "" 22 | with open("tests/templates/reddit/user.json") as user_file: 23 | user_data = json.loads(user_file.read()) 24 | valid_user_response = MagicMock(name="mock_response") 25 | valid_user_response.json = MagicMock(return_value=user_data) 26 | valid_user_response.status_code = 200 27 | requests.get = MagicMock(return_value=valid_user_response) 28 | 29 | expected = [ 30 | { 31 | "text": "Sure is! Appended it to my tweet.", 32 | "upvotes": 1, 33 | "downvotes": 0, 34 | "id": "fnq55o3", 35 | "permalink": "/r/WhitePeopleTwitter/comments/g35yge/the_battle_cry_of_a_generation/fnq55o3/", 36 | }, 37 | { 38 | "text": "Anyone not treating the animals properly, should simply be banned after a warning.", 39 | "upvotes": 2, 40 | "downvotes": 0, 41 | "id": "eyvcagh", 42 | "permalink": "/r/AmItheAsshole/comments/cyt2nl/aita_for_allowing_two_teenagers_to_be_spit_on_by/eyvcagh/", 43 | }, 44 | { 45 | "text": "Is it safe to shower using head and shoulders once per day?", 46 | "upvotes": 4, 47 | "downvotes": 0, 48 | "id": "eyvbx4k", 49 | "permalink": "/r/science/comments/cyx8s4/teen_went_blind_after_eating_only_pringles_fries/eyvbx4k/", 50 | }, 51 | { 52 | "text": "Its crazy how this flows so well in my mind", 53 | "upvotes": 2, 54 | "downvotes": 0, 55 | "id": "ex2ovgj", 56 | "permalink": "/r/memes/comments/cr86z6/dr_phil_review_this/ex2ovgj/", 57 | }, 58 | { 59 | "text": "Cuban's love the name fifi for girl dogs. Source: Me a Cuban American.", 60 | "upvotes": 1, 61 | "downvotes": 0, 62 | "id": "ewx8paz", 63 | "permalink": "/r/AskReddit/comments/cqdjg6/nonamericans_does_your_culture_have_oldfashioned/ewx8paz/", 64 | }, 65 | { 66 | "text": "You can simply follow the deployment guide for Ingress-NGINX, if that is the controller you are wanting to use. See [https://github.com/kubernetes/ingress-nginx/blob/master/docs/deploy/index.md](https://github.com/kubernetes/ingress-nginx/blob/master/docs/deploy/index.md) When you create your ingress resource, you can specify the host as [www.example.com](https://www.example.com) and in your /etc/hosts you can put that URL as the clusterIP. Then send a curl using the \\`Host\\` header to verify. I made a tutorial a while back, using minikube, but the example should still work on an AWS cluster. See [https://medium.com/@awkwardferny/getting-started-with-kubernetes-ingress-nginx-on-minikube-d75e58f52b6c](https://medium.com/@awkwardferny/getting-started-with-kubernetes-ingress-nginx-on-minikube-d75e58f52b6c) Also if you still have questions, you can always post on [http://slack.k8s.io/](http://slack.k8s.io/) on the #ingress-nginx channel.", 67 | "upvotes": 2, 68 | "downvotes": 0, 69 | "id": "ehsepvh", 70 | "permalink": "/r/kubernetes/comments/awvv0h/how_to_create_an_ingress_controller_on_cluster/ehsepvh/", 71 | }, 72 | { 73 | "text": "Hey u/Jokkamo Seems like the syntax is off in the template. I created a blog about templating : [https://medium.com/@awkwardferny/golang-templating-made-easy-4d69d663c558](https://medium.com/@awkwardferny/golang-templating-made-easy-4d69d663c558). Hope it helps you!! You could also create a template function to examine currentTitle. ", 74 | "upvotes": 1, 75 | "downvotes": 0, 76 | "id": "ee4r5v2", 77 | "permalink": "/r/golang/comments/afxhvk/how_can_i_check_where_a_variable_defined_in_html/ee4r5v2/", 78 | }, 79 | { 80 | "text": "I guess that's a good one to add lol.", 81 | "upvotes": 2, 82 | "downvotes": 0, 83 | "id": "e5ezchx", 84 | "permalink": "/r/programming/comments/9d1fh5/bad_software_development_patterns_and_how_to_fix/e5ezchx/", 85 | }, 86 | { 87 | "text": "RaunchyRaccoon that looks a lot like Miami Springs!", 88 | "upvotes": 1, 89 | "downvotes": 0, 90 | "id": "dmvmihx", 91 | "permalink": "/r/HumansBeingBros/comments/6zgfvk/our_neighborhood_got_battered_by_irma_many/dmvmihx/", 92 | }, 93 | { 94 | "text": "If you can't find water anywhere, I thought of a solution. Simply buy some cheap sodas/tea and drain the soda away and fill it up with tap-water! Will at least keep you with some water.", 95 | "upvotes": 2, 96 | "downvotes": 0, 97 | "id": "dmnmuve", 98 | "permalink": "/r/Miami/comments/6ydvec/hurricane_irma_megathread_2_97/dmnmuve/", 99 | }, 100 | { 101 | "text": "You ever been in a storm? https://www.youtube.com/watch?v=Pr7Y0kZ67o0", 102 | "upvotes": 1, 103 | "downvotes": 0, 104 | "id": "dld5va0", 105 | "permalink": "/r/worldnews/comments/6sfvxd/trump_if_north_korea_escalates_nuclear_threat/dld5va0/", 106 | }, 107 | { 108 | "text": "Officer Joseph.", 109 | "upvotes": 1, 110 | "downvotes": 0, 111 | "id": "dggdqs0", 112 | "permalink": "/r/funny/comments/6664cj/look_whos_taking_the_picture/dggdqs0/", 113 | }, 114 | ] 115 | 116 | sc = scraper.Scraper() 117 | result = sc.parse_user("awkwardferny") 118 | 119 | self.assertEqual(expected, result) 120 | 121 | def test_parse_listing(self): 122 | # Mocking the Request for obtaining json from Reddit 123 | article_data = "" 124 | with open("tests/templates/reddit/article.json") as article_file: 125 | article_data = json.loads(article_file.read()) 126 | valid_article_response = MagicMock(name="mock_response") 127 | valid_article_response.json = MagicMock(return_value=article_data) 128 | valid_article_response.status_code = 200 129 | requests.get = MagicMock(return_value=valid_article_response) 130 | 131 | expected = [ 132 | { 133 | "text": "Looks sick!", 134 | "upvotes": 4, 135 | "downvotes": 0, 136 | "id": "glai61b", 137 | "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/glai61b/", 138 | }, 139 | { 140 | "text": "#DOGE HOLD IT", 141 | "upvotes": 1, 142 | "downvotes": 0, 143 | "id": "glc7p50", 144 | "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/glc7p50/", 145 | }, 146 | { 147 | "text": "10/10 very art such picasso wow", 148 | "upvotes": 1, 149 | "downvotes": 0, 150 | "id": "gladezi", 151 | "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gladezi/", 152 | }, 153 | { 154 | "text": "Much drawing, very sketch. Ps I gave you the silver award.", 155 | "upvotes": 2, 156 | "downvotes": 0, 157 | "id": "gsdozky", 158 | "permalink": "/r/doge/comments/l7zp94/i_drew_this_doge_in_2013_during_my_first_years_of/gsdozky/", 159 | }, 160 | ] 161 | 162 | sc = scraper.Scraper() 163 | result = sc.parse_listing("doge", "l7zp94") 164 | 165 | self.assertEqual(expected, result) 166 | -------------------------------------------------------------------------------- /tests/test_sentiment.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class SentimentPostiveTestCases(unittest.TestCase): 5 | 6 | def setUp(self): 7 | super(SentimentPostiveTestCases, self).setUp() 8 | self.maxDiff = None 9 | 10 | def tearDown(self): 11 | super(SentimentPostiveTestCases, self).tearDown() 12 | --------------------------------------------------------------------------------