├── __init__.py ├── pytest.ini ├── scripts ├── package.json └── comment-pr-findings.js ├── claudecode ├── audit.py ├── evals │ ├── __init__.py │ ├── README.md │ └── run_eval.py ├── __init__.py ├── requirements.txt ├── constants.py ├── logger.py ├── json_parser.py ├── test_integration.py ├── prompts.py ├── test_json_parser.py ├── test_helper_functions.py ├── test_prompts.py ├── test_github_action_audit.py ├── test_eval_engine.py ├── test_github_client.py ├── findings_filter.py ├── test_claude_runner.py ├── test_findings_conversion.py └── test_hard_exclusion_rules.py ├── .gitignore ├── .github └── workflows │ ├── sast.yml │ └── test-claudecode.yml ├── examples ├── custom-security-scan-instructions.txt └── custom-false-positive-filtering.txt ├── LICENSE ├── docs ├── custom-filtering-instructions.md └── custom-security-scan-instructions.md ├── README.md ├── .claude └── commands │ └── security-review.md └── action.yml /__init__.py: -------------------------------------------------------------------------------- 1 | # SAST package -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests 3 | python_files = test_*.py 4 | python_classes = Test* 5 | python_functions = test_* 6 | addopts = -v --tb=short -------------------------------------------------------------------------------- /scripts/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@anthropic-ai/sast-scripts", 3 | "version": "1.0.0", 4 | "description": "Scripts for Anthropic SAST Action", 5 | "scripts": { 6 | "test": "bun test", 7 | "test:watch": "bun test --watch" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /claudecode/audit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Main entry point for ClaudeCode audit tool. 4 | This provides a cleaner interface for running the audit. 5 | """ 6 | 7 | from claudecode.github_action_audit import main 8 | 9 | if __name__ == "__main__": 10 | main() -------------------------------------------------------------------------------- /claudecode/evals/__init__.py: -------------------------------------------------------------------------------- 1 | """Evaluation tool for SAST.""" 2 | 3 | from .eval_engine import EvalCase, EvalResult, EvaluationEngine, run_single_evaluation 4 | 5 | __all__ = [ 6 | 'EvalCase', 7 | 'EvalResult', 8 | 'EvaluationEngine', 9 | 'run_single_evaluation', 10 | ] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Cache directories 2 | .cache/ 3 | 4 | # Python 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | *.pyc 9 | 10 | # Output files 11 | *.csv 12 | *.json 13 | security_report.* 14 | 15 | # Virtual environments 16 | venv/ 17 | env/ 18 | .venv/ 19 | 20 | # Debug files 21 | claudecode/claudecode-prompt.txt 22 | eval_results/ 23 | -------------------------------------------------------------------------------- /claudecode/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ClaudeCode - AI-Powered PR Security Audit Tool 3 | 4 | A standalone security audit tool that uses Claude Code for comprehensive 5 | security analysis of GitHub pull requests. 6 | """ 7 | 8 | __version__ = "1.0.0" 9 | __author__ = "Anthropic Security Team" 10 | 11 | # Import main components for easier access 12 | from claudecode.github_action_audit import ( 13 | GitHubActionClient, 14 | SimpleClaudeRunner, 15 | main 16 | ) 17 | 18 | __all__ = [ 19 | "GitHubActionClient", 20 | "SimpleClaudeRunner", 21 | "main" 22 | ] -------------------------------------------------------------------------------- /.github/workflows/sast.yml: -------------------------------------------------------------------------------- 1 | name: SAST 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | workflow_dispatch: 9 | 10 | jobs: 11 | security-review: 12 | runs-on: ubuntu-24.04 13 | permissions: 14 | contents: read 15 | pull-requests: write 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - uses: ./ # Points directly to action.yml 20 | with: 21 | comment-pr: true 22 | upload-results: true 23 | exclude-directories: "tests/vulnerable" 24 | claude-api-key: ${{ secrets.CLAUDE_API_KEY }} 25 | run-every-commit: true -------------------------------------------------------------------------------- /claudecode/requirements.txt: -------------------------------------------------------------------------------- 1 | # Requirements for claudecode - Claude Code PR Security Audit Tool 2 | # Core dependencies for the GitHub Action version 3 | 4 | # GitHub API client 5 | PyGithub>=1.59.0 6 | 7 | # HTTP requests for GitHub API 8 | requests>=2.28.0 9 | 10 | # JSON parsing utilities (no additional deps - uses stdlib) 11 | # prompts.py (no additional deps - uses stdlib) 12 | # findings_filter.py (uses re, built-in) 13 | 14 | # Anthropic SDK for Claude API-based false positive filtering 15 | anthropic>=0.39.0 16 | 17 | # Note: Claude CLI tool must be installed separately 18 | # The claude command-line tool is required for security analysis -------------------------------------------------------------------------------- /claudecode/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Constants and configuration values for ClaudeCode. 3 | """ 4 | 5 | import os 6 | 7 | # API Configuration 8 | DEFAULT_CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL') or 'claude-opus-4-1-20250805' 9 | DEFAULT_TIMEOUT_SECONDS = 180 # 3 minutes 10 | DEFAULT_MAX_RETRIES = 3 11 | RATE_LIMIT_BACKOFF_MAX = 30 # Maximum backoff time for rate limits 12 | 13 | # Token Limits 14 | PROMPT_TOKEN_LIMIT = 16384 # 16k tokens max for claude-opus-4 15 | 16 | # Exit Codes 17 | EXIT_SUCCESS = 0 18 | EXIT_GENERAL_ERROR = 1 19 | EXIT_CONFIGURATION_ERROR = 2 20 | 21 | # Subprocess Configuration 22 | SUBPROCESS_TIMEOUT = 1200 # 20 minutes for Claude Code execution 23 | 24 | -------------------------------------------------------------------------------- /examples/custom-security-scan-instructions.txt: -------------------------------------------------------------------------------- 1 | **Compliance-Specific Checks:** 2 | - GDPR Article 17 "Right to Erasure" implementation gaps 3 | - HIPAA PHI encryption at rest violations 4 | - PCI DSS credit card data retention beyond allowed periods 5 | - SOC2 audit trail tampering or deletion capabilities 6 | - CCPA data portability API vulnerabilities 7 | 8 | **Financial Services Security:** 9 | - Transaction replay attacks in payment processing 10 | - Double-spending vulnerabilities in ledger systems 11 | - Interest calculation manipulation through timing attacks 12 | - Regulatory reporting data tampering 13 | - Know Your Customer (KYC) bypass mechanisms 14 | 15 | **E-commerce Specific:** 16 | - Shopping cart manipulation for price changes 17 | - Inventory race conditions allowing overselling 18 | - Coupon/discount stacking exploits 19 | - Affiliate tracking manipulation 20 | - Review system authentication bypass 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Anthropic 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.github/workflows/test-claudecode.yml: -------------------------------------------------------------------------------- 1 | name: Test ClaudeCode Integration 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | workflow_dispatch: 9 | 10 | permissions: 11 | contents: read 12 | pull-requests: read 13 | actions: read 14 | checks: read 15 | 16 | jobs: 17 | test-claudecode: 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - name: Set up Python 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: '3.10' 27 | 28 | - name: Set up Node.js 29 | uses: actions/setup-node@v4 30 | with: 31 | node-version: '20' 32 | 33 | - name: Install Claude CLI 34 | run: | 35 | npm install -g @anthropic-ai/claude-code 36 | 37 | - name: Install dependencies 38 | run: | 39 | pip install pytest pytest-cov 40 | pip install -r claudecode/requirements.txt 41 | 42 | - name: Run ClaudeCode unit tests 43 | run: | 44 | export PYTHONPATH="${PYTHONPATH}:${PWD}" 45 | pytest claudecode -v --cov=claudecode --cov-report=term-missing 46 | 47 | - name: Install Bun 48 | uses: oven-sh/setup-bun@v2 49 | with: 50 | bun-version: latest 51 | 52 | - name: Install script dependencies 53 | run: | 54 | cd scripts 55 | bun install 56 | 57 | - name: Run comment script tests 58 | run: | 59 | cd scripts 60 | bun test -------------------------------------------------------------------------------- /claudecode/logger.py: -------------------------------------------------------------------------------- 1 | """Logging configuration for ClaudeCode.""" 2 | 3 | import logging 4 | import sys 5 | import os 6 | 7 | 8 | def get_logger(name: str) -> logging.Logger: 9 | """Get a configured logger that outputs to stderr. 10 | 11 | Args: 12 | name: The name of the logger (usually __name__) 13 | 14 | Returns: 15 | Configured logger instance 16 | """ 17 | logger = logging.getLogger(name) 18 | 19 | # Only configure if not already configured 20 | if not logger.handlers: 21 | handler = logging.StreamHandler(sys.stderr) 22 | 23 | # Get repo and PR number from environment for prefix 24 | repo_name = os.environ.get('GITHUB_REPOSITORY', '') 25 | pr_number = os.environ.get('PR_NUMBER', '') 26 | 27 | # Build prefix 28 | if repo_name and pr_number: 29 | prefix = f"[{repo_name}#{pr_number}]" 30 | elif repo_name: 31 | prefix = f"[{repo_name}]" 32 | elif pr_number: 33 | prefix = f"[PR#{pr_number}]" 34 | else: 35 | prefix = "" 36 | 37 | # Include prefix in format if available 38 | if prefix: 39 | format_str = f'{prefix} [%(name)s] %(message)s' 40 | else: 41 | format_str = '[%(name)s] %(message)s' 42 | 43 | formatter = logging.Formatter(format_str) 44 | handler.setFormatter(formatter) 45 | logger.addHandler(handler) 46 | logger.setLevel(logging.INFO) 47 | 48 | return logger -------------------------------------------------------------------------------- /claudecode/evals/README.md: -------------------------------------------------------------------------------- 1 | # SAST Evaluation Tool 2 | 3 | This directory contains a tool for evaluating the SAST (Static Application Security Testing) tool on individual GitHub pull requests. 4 | 5 | ## Overview 6 | 7 | The evaluation tool allows you to run the Claude Code Security Reviewer on any GitHub PR to analyze its security findings. This is useful for: 8 | - Testing the tool on specific PRs 9 | - Evaluating performance and accuracy 10 | - Debugging security analysis issues 11 | 12 | ## Requirements 13 | 14 | - Python 3.9+ 15 | - Git 2.20+ (for worktree support) 16 | - GitHub CLI (`gh`) for API access 17 | - Environment variables: 18 | - `ANTHROPIC_API_KEY`: Required for Claude API access 19 | - `GITHUB_TOKEN`: Recommended for GitHub API rate limits 20 | 21 | ## Usage 22 | 23 | Run an evaluation on a single PR: 24 | 25 | ```bash 26 | python -m claudecode.evals.run_eval example/repo#123 --verbose 27 | ``` 28 | 29 | ### Command-line Options 30 | 31 | - PR specification: Required positional argument in format `owner/repo#pr_number` 32 | - `--output-dir PATH`: Directory for results (default: `./eval_results`) 33 | - `--work-dir PATH`: Directory where git repositories will be cloned and stored (default: `~/code/audit`) 34 | - `--verbose`: Enable verbose logging to see detailed progress 35 | 36 | ## Output 37 | 38 | The evaluation generates a JSON file in the output directory with: 39 | - Success/failure status 40 | - Runtime metrics 41 | - Security findings count 42 | - Detailed findings with file, line, severity, and descriptions 43 | 44 | Example output file: `pr_example_repo_123.json` 45 | 46 | ## Architecture 47 | 48 | The evaluation tool uses git worktrees for efficient repository management: 49 | 1. Clones the repository once as a base 50 | 2. Creates lightweight worktrees for each PR evaluation 51 | 3. Automatically handles cleanup of worktrees 52 | 4. Runs the SAST audit in the PR-specific worktree -------------------------------------------------------------------------------- /examples/custom-false-positive-filtering.txt: -------------------------------------------------------------------------------- 1 | HARD EXCLUSIONS - Automatically exclude findings matching these patterns: 2 | 1. All DOS/resource exhaustion - we have k8s resource limits and autoscaling 3 | 2. Missing rate limiting - handled by our API gateway 4 | 3. Tabnabbing vulnerabilities - acceptable risk per our threat model 5 | 4. Test files (ending in _test.go, _test.js, or in __tests__ directories) 6 | 5. Documentation files (*.md, *.rst) 7 | 6. Configuration files that are not exposed to users (internal configs) 8 | 7. Memory safety in Rust, Go, or managed languages 9 | 8. GraphQL introspection queries - we intentionally expose schema in dev 10 | 9. Missing CSRF protection - we use stateless JWT auth exclusively 11 | 10. Timing attacks on non-cryptographic operations 12 | 11. Regex DoS in input validation (we have request timeouts) 13 | 12. Missing security headers in internal services (only public-facing services need them) 14 | 15 | SIGNAL QUALITY CRITERIA - For remaining findings, assess: 16 | 1. Can an unauthenticated external attacker exploit this? 17 | 2. Is there actual data exfiltration or system compromise potential? 18 | 3. Is this exploitable in our production Kubernetes environment? 19 | 4. Does this bypass our API gateway security controls? 20 | 21 | PRECEDENTS - 22 | 1. We use AWS Cognito for all authentication - auth bypass must defeat Cognito 23 | 2. All APIs require valid JWT tokens validated at the gateway level 24 | 3. SQL injection is only valid if using raw queries (we use Prisma ORM everywhere) 25 | 4. All internal services communicate over mTLS within the k8s cluster 26 | 5. Secrets are in AWS Secrets Manager or k8s secrets, never in code 27 | 6. We allow verbose error messages in dev/staging (not production) 28 | 7. File uploads go directly to S3 with presigned URLs (no local file handling) 29 | 8. All user input is considered untrusted and validated on the backend 30 | 9. Frontend validation is only for UX, not security 31 | 10. We use CSP headers and strict Content-Type validation 32 | 11. CORS is configured per-service based on actual needs 33 | 12. All webhooks use HMAC signature verification -------------------------------------------------------------------------------- /docs/custom-filtering-instructions.md: -------------------------------------------------------------------------------- 1 | # Custom False Positive Filtering Instructions 2 | 3 | The Claude Code Security Reviewer Action supports custom false positive filtering instructions, allowing you to tailor the security analysis to your specific environment and requirements. 4 | 5 | ## Overview 6 | 7 | By default, the SAST action includes a comprehensive set of exclusions and criteria for filtering out false positives. However, every organization has unique security requirements, technology stacks, and threat models. The `false-positive-filtering-instructions` input allows you to provide your own custom criteria. 8 | 9 | ## Usage 10 | 11 | 1. Create a text file containing your custom filtering instructions (e.g., `.github/false-positive-filtering.txt`) 12 | 2. Reference it in your workflow: 13 | 14 | ```yaml 15 | - uses: anthropics/claude-code-security-review@main 16 | with: 17 | false-positive-filtering-instructions: .github/false-positive-filtering.txt 18 | ``` 19 | 20 | ## File Format 21 | 22 | The file should contain plain text with three main sections: 23 | 24 | ### 1. HARD EXCLUSIONS 25 | List patterns that should be automatically excluded from findings. 26 | 27 | ### 2. SIGNAL QUALITY CRITERIA 28 | Questions to assess whether a finding represents a real vulnerability. 29 | 30 | ### 3. PRECEDENTS 31 | Specific guidance for common security patterns in your environment. 32 | 33 | ## Example 34 | 35 | See [examples/custom-false-positive-filtering.txt](../examples/custom-false-positive-filtering.txt) for a complete example tailored to a modern cloud-native application. 36 | 37 | ## Default Instructions 38 | 39 | If no custom file is provided, the action uses default instructions tuned to work well for most applications. 40 | 41 | ## Best Practices 42 | 43 | 1. **Start with defaults**: Begin with the default instructions and modify based on false positives you encounter 44 | 2. **Be specific**: Include details about your security architecture (e.g., "We use AWS Cognito for all authentication") 45 | 3. **Document assumptions**: Explain why certain patterns are excluded (e.g., "k8s resource limits prevent DOS") 46 | 4. **Version control**: Track changes to your filtering instructions alongside your code 47 | 5. **Team review**: Have your security team review and approve the filtering instructions 48 | 49 | ## Common Customizations 50 | 51 | - **Technology-specific exclusions**: Exclude findings that don't apply to your tech stack 52 | - **Infrastructure assumptions**: Document security controls at the infrastructure level 53 | - **Compliance requirements**: Adjust criteria based on your compliance needs 54 | - **Development practices**: Reflect your team's security practices and tooling -------------------------------------------------------------------------------- /claudecode/json_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Utilities for parsing JSON from text output.""" 3 | 4 | import json 5 | import re 6 | import logging 7 | 8 | # Configure logging 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def extract_json_from_text(text): 13 | """ 14 | Extract JSON object from text, looking in various formats and locations. 15 | 16 | Args: 17 | text: The text that may contain JSON 18 | 19 | Returns: 20 | dict: Parsed JSON object if found, None otherwise 21 | """ 22 | try: 23 | # First, try to extract JSON from markdown code blocks (with or without language tag) 24 | json_matches = [ 25 | re.search(r'```json\s*(.*?)\s*```', text, re.DOTALL), 26 | re.search(r'```\s*(\{.*?\})\s*```', text, re.DOTALL) 27 | ] 28 | 29 | for json_match in json_matches: 30 | if json_match: 31 | try: 32 | return json.loads(json_match.group(1)) 33 | except json.JSONDecodeError: 34 | continue 35 | 36 | # If no JSON found in code blocks, try to find JSON objects anywhere in the text 37 | # Find all potential JSON objects (looking for balanced braces) 38 | brace_count = 0 39 | json_start = -1 40 | for i, char in enumerate(text): 41 | if char == '{': 42 | if brace_count == 0: 43 | json_start = i 44 | brace_count += 1 45 | elif char == '}': 46 | brace_count -= 1 47 | if brace_count == 0 and json_start != -1: 48 | # Found a complete JSON object 49 | potential_json = text[json_start:i+1] 50 | try: 51 | return json.loads(potential_json) 52 | except json.JSONDecodeError: 53 | # This wasn't valid JSON, continue looking 54 | continue 55 | except Exception: 56 | pass 57 | 58 | return None 59 | 60 | 61 | def parse_json_with_fallbacks(text, error_context=""): 62 | """ 63 | Parse JSON from text with multiple fallback strategies and error handling. 64 | 65 | Args: 66 | text: The text to parse 67 | error_context: Context string for error messages 68 | 69 | Returns: 70 | tuple: (success, result) where result is either the parsed JSON dict or error info 71 | """ 72 | try: 73 | # First, try direct JSON parsing 74 | return True, json.loads(text) 75 | except json.JSONDecodeError: 76 | pass 77 | 78 | # Try extracting JSON from text 79 | extracted_json = extract_json_from_text(text) 80 | if extracted_json: 81 | return True, extracted_json 82 | 83 | # If all parsing failed, return error info 84 | error_msg = "Failed to parse JSON" 85 | if error_context: 86 | error_msg = f"{error_context}: {error_msg}" 87 | 88 | logger.error(f"{error_msg}. Raw output: {repr(text)}") 89 | return False, {"error": f"Invalid JSON response -- raw output: {repr(text)}"} -------------------------------------------------------------------------------- /docs/custom-security-scan-instructions.md: -------------------------------------------------------------------------------- 1 | # Custom Security Scan Instructions 2 | 3 | The Claude Code Security Reviewer Action supports custom security scan instructions, allowing you to add organization-specific vulnerability categories to the security audit. 4 | 5 | ## Overview 6 | 7 | The default security scan covers common vulnerability categories like SQL injection, XSS, authentication issues, etc. However, organizations often have specific security concerns based on their: 8 | - Technology stack (GraphQL, gRPC, specific cloud providers) 9 | - Compliance requirements (GDPR, HIPAA, PCI DSS) 10 | - Industry-specific vulnerabilities (financial services, healthcare) 11 | - Custom frameworks and libraries 12 | 13 | The `custom-security-scan-instructions` input allows you to extend the security categories that Claude checks for. 14 | 15 | ## Usage 16 | 17 | 1. Create a text file containing your custom security categories (e.g., `.github/custom-security-categories.txt`) 18 | 2. Reference it in your workflow: 19 | 20 | ```yaml 21 | - uses: anthropics/claude-code-security-review@main 22 | with: 23 | custom-security-scan-instructions: .github/custom-security-categories.txt 24 | ``` 25 | 26 | ## File Format 27 | 28 | The file should contain additional security categories in the same format as the default categories. Each category should: 29 | - Start with a descriptive header in bold (using `**Category Name:**`) 30 | - List specific vulnerabilities or patterns to check for 31 | - Use clear, actionable descriptions 32 | 33 | ### Example Structure: 34 | ``` 35 | **Category Name:** 36 | - Specific vulnerability or pattern to check 37 | - Another specific issue to look for 38 | - Detailed description of what constitutes this vulnerability 39 | 40 | **Another Category:** 41 | - More specific checks 42 | - Additional patterns to identify 43 | ``` 44 | 45 | ## Examples 46 | 47 | ### Industry-Specific Example 48 | See [examples/organization-specific-scan-instructions.txt](../examples/custom-security-scan-instructions.txt) for an example set of instructions that customize Claude Code to look for industry-specific security weaknesses including: 49 | - Compliance checks (GDPR, HIPAA, PCI DSS) 50 | - Financial services security 51 | - E-commerce specific issues 52 | 53 | ## How It Works 54 | 55 | Your custom instructions are appended to the security audit prompt after the default "Data Exposure" category. This means: 56 | 1. All default categories are still checked 57 | 2. Your custom categories extend (not replace) the default scan 58 | 3. The same HIGH/MEDIUM/LOW severity guidelines apply 59 | 60 | ## Best Practices 61 | 62 | 1. **Be Specific**: Provide clear descriptions of what constitutes each vulnerability 63 | 2. **Include Context**: Explain why something is a vulnerability in your environment 64 | 3. **Provide Examples**: Where possible, describe specific attack scenarios 65 | 4. **Avoid Duplicates**: Check the default categories to avoid redundancy 66 | 5. **Keep It Focused**: Only add categories relevant to your codebase 67 | 68 | ## Default Categories Reference 69 | 70 | The default scan already includes: 71 | - Input Validation (SQL injection, command injection, XXE, etc.) 72 | - Authentication & Authorization 73 | - Crypto & Secrets Management 74 | - Injection & Code Execution 75 | - Data Exposure 76 | 77 | Your custom categories should complement these, not duplicate them. 78 | 79 | ## Tips for Writing Effective Categories 80 | 81 | 1. **Technology-Specific**: Add checks for your specific tech stack 82 | ``` 83 | **GraphQL Security:** 84 | - Query depth attacks allowing unbounded recursion 85 | - Field-level authorization bypass 86 | - Introspection data leakage in production 87 | ``` 88 | 89 | 2. **Compliance-Focused**: Add regulatory requirements 90 | ``` 91 | **GDPR Compliance:** 92 | - Personal data processing without consent mechanisms 93 | - Missing data retention limits 94 | - Lack of data portability APIs 95 | ``` 96 | 97 | 3. **Business Logic**: Add domain-specific vulnerabilities 98 | ``` 99 | **Payment Processing:** 100 | - Transaction replay vulnerabilities 101 | - Currency conversion manipulation 102 | - Refund process bypass 103 | ``` -------------------------------------------------------------------------------- /claudecode/test_integration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Pytest tests for SAST integration components. 4 | """ 5 | 6 | import pytest 7 | import json 8 | 9 | class TestClaudeCodeAudit: 10 | """Test the main audit functionality.""" 11 | 12 | @pytest.fixture 13 | def mock_env(self, monkeypatch): 14 | """Set up mock environment variables.""" 15 | monkeypatch.setenv('GITHUB_REPOSITORY', 'test/repo') 16 | monkeypatch.setenv('PR_NUMBER', '123') 17 | monkeypatch.setenv('GITHUB_TOKEN', 'mock-token') 18 | monkeypatch.setenv('ANTHROPIC_API_KEY', 'mock-api-key') 19 | 20 | def test_missing_environment_variables(self, monkeypatch, capsys): 21 | """Test behavior with missing environment variables.""" 22 | from claudecode import github_action_audit 23 | 24 | # Test missing GITHUB_REPOSITORY 25 | monkeypatch.delenv('GITHUB_REPOSITORY', raising=False) 26 | with pytest.raises(SystemExit) as exc_info: 27 | github_action_audit.main() 28 | assert exc_info.value.code == 2 # EXIT_CONFIGURATION_ERROR 29 | captured = capsys.readouterr() 30 | output = json.loads(captured.out) 31 | assert 'GITHUB_REPOSITORY' in output['error'] 32 | 33 | # Test missing PR_NUMBER 34 | monkeypatch.setenv('GITHUB_REPOSITORY', 'test/repo') 35 | monkeypatch.delenv('PR_NUMBER', raising=False) 36 | with pytest.raises(SystemExit) as exc_info: 37 | github_action_audit.main() 38 | assert exc_info.value.code == 2 # EXIT_CONFIGURATION_ERROR 39 | captured = capsys.readouterr() 40 | output = json.loads(captured.out) 41 | assert 'PR_NUMBER' in output['error'] 42 | 43 | def test_invalid_pr_number(self, monkeypatch, capsys): 44 | """Test behavior with invalid PR number.""" 45 | from claudecode import github_action_audit 46 | 47 | monkeypatch.setenv('GITHUB_REPOSITORY', 'test/repo') 48 | monkeypatch.setenv('PR_NUMBER', 'invalid') 49 | monkeypatch.setenv('GITHUB_TOKEN', 'mock-token') 50 | 51 | with pytest.raises(SystemExit) as exc_info: 52 | github_action_audit.main() 53 | assert exc_info.value.code == 2 # EXIT_CONFIGURATION_ERROR 54 | captured = capsys.readouterr() 55 | output = json.loads(captured.out) 56 | assert 'Invalid PR_NUMBER' in output['error'] 57 | 58 | 59 | class TestEnvironmentSetup: 60 | """Test environment setup and configuration.""" 61 | 62 | def test_anthropic_api_key_handling(self, monkeypatch): 63 | """Test handling of Anthropic API key.""" 64 | from claudecode.github_action_audit import SimpleClaudeRunner 65 | 66 | runner = SimpleClaudeRunner() 67 | 68 | # Test with API key set 69 | monkeypatch.setenv('ANTHROPIC_API_KEY', 'test-key') 70 | valid, error = runner.validate_claude_available() 71 | # Note: This will fail if claude CLI is not installed, which is OK 72 | if not valid and 'not installed' in error: 73 | pytest.fail("Claude CLI not installed") 74 | 75 | # Test without API key 76 | monkeypatch.delenv('ANTHROPIC_API_KEY', raising=False) 77 | valid, error = runner.validate_claude_available() 78 | if 'not installed' not in error: 79 | assert not valid 80 | assert 'ANTHROPIC_API_KEY' in error 81 | 82 | 83 | class TestFilteringIntegration: 84 | """Test the filtering system integration.""" 85 | 86 | def test_full_filter_with_llm_disabled(self): 87 | """Test FindingsFilter with LLM filtering disabled.""" 88 | from claudecode.findings_filter import FindingsFilter 89 | 90 | # Create filter with LLM disabled 91 | filter_instance = FindingsFilter( 92 | use_hard_exclusions=True, 93 | use_claude_filtering=False 94 | ) 95 | 96 | test_findings = [ 97 | {'description': 'SQL injection vulnerability', 'severity': 'HIGH'}, 98 | {'description': 'Missing rate limiting', 'severity': 'MEDIUM'}, 99 | ] 100 | 101 | success, results, stats = filter_instance.filter_findings(test_findings) 102 | 103 | assert success is True 104 | assert stats.total_findings == 2 105 | assert stats.kept_findings == 1 # Only SQL injection 106 | assert stats.hard_excluded == 1 # Rate limiting 107 | assert stats.claude_excluded == 0 # No Claude filtering 108 | -------------------------------------------------------------------------------- /claudecode/evals/run_eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """CLI for running SAST evaluation on a single PR.""" 3 | 4 | import argparse 5 | import os 6 | import sys 7 | import json 8 | from pathlib import Path 9 | from typing import Dict, Any, Optional, List 10 | from dataclasses import dataclass, asdict 11 | 12 | # Import the minimal required functionality 13 | 14 | 15 | @dataclass 16 | class EvalCase: 17 | """Single evaluation test case.""" 18 | repo_name: str 19 | pr_number: int 20 | description: str = "" 21 | 22 | 23 | @dataclass 24 | class EvalResult: 25 | """Result of a single evaluation.""" 26 | repo_name: str 27 | pr_number: int 28 | description: str 29 | 30 | # Evaluation results 31 | success: bool 32 | runtime_seconds: float 33 | findings_count: int 34 | detected_vulnerabilities: bool 35 | 36 | # Optional fields 37 | error_message: str = "" 38 | findings_summary: Optional[List[Dict[str, Any]]] = None 39 | full_findings: Optional[List[Dict[str, Any]]] = None 40 | 41 | def to_dict(self) -> Dict[str, Any]: 42 | """Convert to dictionary for JSON serialization.""" 43 | return asdict(self) 44 | 45 | 46 | def main(): 47 | """Main entry point for single PR SAST evaluation.""" 48 | parser = argparse.ArgumentParser( 49 | description="Run SAST security evaluation on a single GitHub PR", 50 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 51 | ) 52 | 53 | parser.add_argument( 54 | "pr", 55 | type=str, 56 | help="PR to evaluate in format 'repo_owner/repo_name#pr_number' (e.g., 'example/repo#123')" 57 | ) 58 | 59 | parser.add_argument( 60 | "--output-dir", 61 | type=str, 62 | default="./eval_results", 63 | help="Directory for evaluation results" 64 | ) 65 | 66 | parser.add_argument( 67 | "--work-dir", 68 | type=str, 69 | default=None, 70 | help="Directory for temporary repositories (defaults to ~/code/audit)" 71 | ) 72 | 73 | parser.add_argument( 74 | "--verbose", 75 | action="store_true", 76 | help="Enable verbose logging" 77 | ) 78 | 79 | 80 | args = parser.parse_args() 81 | 82 | # Set EVAL_MODE=1 automatically for evaluation runs 83 | os.environ['EVAL_MODE'] = '1' 84 | 85 | # Check for required environment variables 86 | if not os.environ.get('ANTHROPIC_API_KEY'): 87 | print("Error: ANTHROPIC_API_KEY environment variable is not set") 88 | print("Please set it before running the evaluation") 89 | sys.exit(1) 90 | 91 | 92 | # Parse the PR specification 93 | try: 94 | repo_part, pr_number = args.pr.split('#') 95 | pr_number = int(pr_number) 96 | # Validate repository format 97 | if '/' not in repo_part or len(repo_part.split('/')) != 2: 98 | raise ValueError("Repository must be in format 'owner/repo'") 99 | owner, repo = repo_part.split('/') 100 | if not owner or not repo: 101 | raise ValueError("Repository owner and name cannot be empty") 102 | except ValueError as e: 103 | print(f"Error: Invalid PR format '{args.pr}': {e}") 104 | print("Expected format: 'repo_owner/repo_name#pr_number'") 105 | print("Example: 'example/repo#123'") 106 | sys.exit(1) 107 | 108 | print(f"\nEvaluating PR: {repo_part}#{pr_number}") 109 | print("-" * 60) 110 | 111 | # Create test case 112 | test_case = EvalCase( 113 | repo_name=repo_part, 114 | pr_number=pr_number, 115 | description=f"Evaluation for {repo_part}#{pr_number}" 116 | ) 117 | 118 | # Import and run the evaluation 119 | from .eval_engine import run_single_evaluation 120 | 121 | # Run the evaluation 122 | result = run_single_evaluation(test_case, verbose=args.verbose, work_dir=args.work_dir) 123 | 124 | # Display results 125 | print("\n" + "=" * 60) 126 | print("EVALUATION RESULTS:") 127 | print(f"Success: {result.success}") 128 | print(f"Runtime: {result.runtime_seconds:.1f} seconds") 129 | print(f"Vulnerabilities detected: {result.detected_vulnerabilities}") 130 | print(f"Findings count: {result.findings_count}") 131 | 132 | if result.error_message: 133 | print(f"Error: {result.error_message}") 134 | 135 | if result.full_findings: 136 | print("\nFindings:") 137 | for finding in result.full_findings: 138 | print(f" - [{finding.get('severity', 'UNKNOWN')}] {finding.get('file', 'unknown')}:{finding.get('line', '?')}") 139 | if 'category' in finding: 140 | print(f" Category: {finding['category']}") 141 | if 'description' in finding: 142 | print(f" Description: {finding['description']}") 143 | if 'exploit_scenario' in finding: 144 | print(f" Exploit: {finding['exploit_scenario']}") 145 | if 'recommendation' in finding: 146 | print(f" Fix: {finding['recommendation']}") 147 | if 'confidence' in finding: 148 | print(f" Confidence: {finding['confidence']}") 149 | print() # Empty line between findings 150 | elif result.findings_summary: 151 | # Fallback to summary if full findings not available 152 | print("\nFindings:") 153 | for finding in result.findings_summary: 154 | print(f" - [{finding.get('severity', 'UNKNOWN')}] {finding.get('file', 'unknown')}:{finding.get('line', '?')}") 155 | if 'title' in finding and finding['title'] != 'Unknown': 156 | print(f" {finding['title']}") 157 | if 'description' in finding and finding['description'] != 'Unknown': 158 | print(f" {finding['description']}") 159 | 160 | # Save result to output directory 161 | output_path = Path(args.output_dir) 162 | output_path.mkdir(parents=True, exist_ok=True) 163 | result_file = output_path / f"pr_{repo_part.replace('/', '_')}_{pr_number}.json" 164 | 165 | with open(result_file, 'w') as f: 166 | json.dump(result.to_dict(), f, indent=2) 167 | 168 | print(f"\nResult saved to: {result_file}") 169 | 170 | # Exit with appropriate code 171 | sys.exit(0 if result.success else 1) 172 | 173 | 174 | if __name__ == "__main__": 175 | main() -------------------------------------------------------------------------------- /claudecode/prompts.py: -------------------------------------------------------------------------------- 1 | """Security audit prompt templates.""" 2 | 3 | def get_security_audit_prompt(pr_data, pr_diff=None, include_diff=True, custom_scan_instructions=None): 4 | """Generate security audit prompt for Claude Code. 5 | 6 | Args: 7 | pr_data: PR data dictionary from GitHub API 8 | pr_diff: Optional complete PR diff in unified format 9 | include_diff: Whether to include the diff in the prompt (default: True) 10 | custom_scan_instructions: Optional custom security categories to append 11 | 12 | Returns: 13 | Formatted prompt string 14 | """ 15 | 16 | files_changed = "\n".join([f"- {f['filename']}" for f in pr_data['files']]) 17 | 18 | # Add diff section if provided and include_diff is True 19 | diff_section = "" 20 | if pr_diff and include_diff: 21 | diff_section = f""" 22 | 23 | PR DIFF CONTENT: 24 | ``` 25 | {pr_diff} 26 | ``` 27 | 28 | Review the complete diff above. This contains all code changes in the PR. 29 | """ 30 | elif pr_diff and not include_diff: 31 | diff_section = """ 32 | 33 | NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR. 34 | """ 35 | 36 | # Add custom security categories if provided 37 | custom_categories_section = "" 38 | if custom_scan_instructions: 39 | custom_categories_section = f"\n{custom_scan_instructions}\n" 40 | 41 | return f""" 42 | You are a senior security engineer conducting a focused security review of GitHub PR #{pr_data['number']}: "{pr_data['title']}" 43 | 44 | CONTEXT: 45 | - Repository: {pr_data.get('head', {}).get('repo', {}).get('full_name', 'unknown')} 46 | - Author: {pr_data['user']} 47 | - Files changed: {pr_data['changed_files']} 48 | - Lines added: {pr_data['additions']} 49 | - Lines deleted: {pr_data['deletions']} 50 | 51 | Files modified: 52 | {files_changed}{diff_section} 53 | 54 | OBJECTIVE: 55 | Perform a security-focused code review to identify HIGH-CONFIDENCE security vulnerabilities that could have real exploitation potential. This is not a general code review - focus ONLY on security implications newly added by this PR. Do not comment on existing security concerns. 56 | 57 | CRITICAL INSTRUCTIONS: 58 | 1. MINIMIZE FALSE POSITIVES: Only flag issues where you're >80% confident of actual exploitability 59 | 2. AVOID NOISE: Skip theoretical issues, style concerns, or low-impact findings 60 | 3. FOCUS ON IMPACT: Prioritize vulnerabilities that could lead to unauthorized access, data breaches, or system compromise 61 | 4. EXCLUSIONS: Do NOT report the following issue types: 62 | - Denial of Service (DOS) vulnerabilities, even if they allow service disruption 63 | - Secrets or sensitive data stored on disk (these are handled by other processes) 64 | - Rate limiting or resource exhaustion issues 65 | 66 | SECURITY CATEGORIES TO EXAMINE: 67 | 68 | **Input Validation Vulnerabilities:** 69 | - SQL injection via unsanitized user input 70 | - Command injection in system calls or subprocesses 71 | - XXE injection in XML parsing 72 | - Template injection in templating engines 73 | - NoSQL injection in database queries 74 | - Path traversal in file operations 75 | 76 | **Authentication & Authorization Issues:** 77 | - Authentication bypass logic 78 | - Privilege escalation paths 79 | - Session management flaws 80 | - JWT token vulnerabilities 81 | - Authorization logic bypasses 82 | 83 | **Crypto & Secrets Management:** 84 | - Hardcoded API keys, passwords, or tokens 85 | - Weak cryptographic algorithms or implementations 86 | - Improper key storage or management 87 | - Cryptographic randomness issues 88 | - Certificate validation bypasses 89 | 90 | **Injection & Code Execution:** 91 | - Remote code execution via deseralization 92 | - Pickle injection in Python 93 | - YAML deserialization vulnerabilities 94 | - Eval injection in dynamic code execution 95 | - XSS vulnerabilities in web applications (reflected, stored, DOM-based) 96 | 97 | **Data Exposure:** 98 | - Sensitive data logging or storage 99 | - PII handling violations 100 | - API endpoint data leakage 101 | - Debug information exposure 102 | {custom_categories_section} 103 | Additional notes: 104 | - Even if something is only exploitable from the local network, it can still be a HIGH severity issue 105 | 106 | ANALYSIS METHODOLOGY: 107 | 108 | Phase 1 - Repository Context Research (Use file search tools): 109 | - Identify existing security frameworks and libraries in use 110 | - Look for established secure coding patterns in the codebase 111 | - Examine existing sanitization and validation patterns 112 | - Understand the project's security model and threat model 113 | 114 | Phase 2 - Comparative Analysis: 115 | - Compare new code changes against existing security patterns 116 | - Identify deviations from established secure practices 117 | - Look for inconsistent security implementations 118 | - Flag code that introduces new attack surfaces 119 | 120 | Phase 3 - Vulnerability Assessment: 121 | - Examine each modified file for security implications 122 | - Trace data flow from user inputs to sensitive operations 123 | - Look for privilege boundaries being crossed unsafely 124 | - Identify injection points and unsafe deserialization 125 | 126 | REQUIRED OUTPUT FORMAT: 127 | 128 | You MUST output your findings as structured JSON with this exact schema: 129 | 130 | {{ 131 | "findings": [ 132 | {{ 133 | "file": "path/to/file.py", 134 | "line": 42, 135 | "severity": "HIGH", 136 | "category": "sql_injection", 137 | "description": "User input passed to SQL query without parameterization", 138 | "exploit_scenario": "Attacker could extract database contents by manipulating the 'search' parameter with SQL injection payloads like '1; DROP TABLE users--'", 139 | "recommendation": "Replace string formatting with parameterized queries using SQLAlchemy or equivalent", 140 | "confidence": 0.95 141 | }} 142 | ], 143 | "analysis_summary": {{ 144 | "files_reviewed": 8, 145 | "high_severity": 1, 146 | "medium_severity": 0, 147 | "low_severity": 0, 148 | "review_completed": true, 149 | }} 150 | }} 151 | 152 | SEVERITY GUIDELINES: 153 | - **HIGH**: Directly exploitable vulnerabilities leading to RCE, data breach, or authentication bypass 154 | - **MEDIUM**: Vulnerabilities requiring specific conditions but with significant impact 155 | - **LOW**: Defense-in-depth issues or lower-impact vulnerabilities 156 | 157 | CONFIDENCE SCORING: 158 | - 0.9-1.0: Certain exploit path identified, tested if possible 159 | - 0.8-0.9: Clear vulnerability pattern with known exploitation methods 160 | - 0.7-0.8: Suspicious pattern requiring specific conditions to exploit 161 | - Below 0.7: Don't report (too speculative) 162 | 163 | FINAL REMINDER: 164 | Focus on HIGH and MEDIUM findings only. Better to miss some theoretical issues than flood the report with false positives. Each finding should be something a security engineer would confidently raise in a PR review. 165 | 166 | IMPORTANT EXCLUSIONS - DO NOT REPORT: 167 | - Denial of Service (DOS) vulnerabilities or resource exhaustion attacks 168 | - Secrets/credentials stored on disk (these are managed separately) 169 | - Rate limiting concerns or service overload scenarios. Services do not need to implement rate limiting. 170 | - Memory consumption or CPU exhaustion issues. 171 | - Lack of input validation on non-security-critical fields. If there isn't a proven problem from a lack of input validation, don't report it. 172 | 173 | Begin your analysis now. Use the repository exploration tools to understand the codebase context, then analyze the PR changes for security implications. 174 | 175 | Your final reply must contain the JSON and nothing else. You should not reply again after outputting the JSON. 176 | """ -------------------------------------------------------------------------------- /claudecode/test_json_parser.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the json_parser module.""" 2 | 3 | import json 4 | from typing import Any, Dict 5 | from claudecode.json_parser import parse_json_with_fallbacks, extract_json_from_text 6 | 7 | 8 | class TestJsonParser: 9 | """Test JSON parsing utilities.""" 10 | 11 | def test_parse_valid_json(self): 12 | """Test parsing valid JSON string.""" 13 | valid_json = '{"key": "value", "number": 42, "array": [1, 2, 3]}' 14 | success, result = parse_json_with_fallbacks(valid_json) 15 | 16 | assert success is True 17 | assert result == {"key": "value", "number": 42, "array": [1, 2, 3]} 18 | 19 | def test_parse_json_with_whitespace(self): 20 | """Test parsing JSON with extra whitespace.""" 21 | json_with_spaces = ' \n {"key": "value"} \n ' 22 | success, result = parse_json_with_fallbacks(json_with_spaces) 23 | 24 | assert success is True 25 | assert result == {"key": "value"} 26 | 27 | def test_parse_empty_json_object(self): 28 | """Test parsing empty JSON object.""" 29 | success, result = parse_json_with_fallbacks('{}') 30 | assert success is True 31 | assert result == {} 32 | 33 | def test_parse_empty_json_array(self): 34 | """Test parsing empty JSON array.""" 35 | success, result = parse_json_with_fallbacks('[]') 36 | assert success is True 37 | assert result == [] 38 | 39 | def test_parse_nested_json(self): 40 | """Test parsing nested JSON structures.""" 41 | nested_json = ''' 42 | { 43 | "level1": { 44 | "level2": { 45 | "level3": ["a", "b", "c"] 46 | } 47 | } 48 | } 49 | ''' 50 | success, result = parse_json_with_fallbacks(nested_json) 51 | 52 | assert success is True 53 | assert isinstance(result, dict) 54 | # Type narrowing for pyright 55 | result_dict: Dict[str, Any] = result 56 | assert result_dict["level1"]["level2"]["level3"] == ["a", "b", "c"] 57 | 58 | def test_parse_json_with_unicode(self): 59 | """Test parsing JSON with unicode characters.""" 60 | unicode_json = '{"emoji": "🔒", "text": "Hello λ world"}' 61 | success, result = parse_json_with_fallbacks(unicode_json) 62 | 63 | assert success is True 64 | assert result["emoji"] == "🔒" 65 | assert result["text"] == "Hello λ world" 66 | 67 | def test_parse_json_with_escaped_characters(self): 68 | """Test parsing JSON with escaped characters.""" 69 | escaped_json = '{"path": "C:\\\\Users\\\\test", "quote": "\\"Hello\\""}' 70 | success, result = parse_json_with_fallbacks(escaped_json) 71 | 72 | assert success is True 73 | assert result["path"] == "C:\\Users\\test" 74 | assert result["quote"] == '"Hello"' 75 | 76 | def test_extract_json_from_text_with_backticks(self): 77 | """Test extracting JSON from markdown code blocks.""" 78 | text_with_json = ''' 79 | Here is some text before the JSON: 80 | 81 | ```json 82 | {"extracted": true, "value": 123} 83 | ``` 84 | 85 | And some text after. 86 | ''' 87 | result = extract_json_from_text(text_with_json) 88 | 89 | assert result == {"extracted": True, "value": 123} 90 | 91 | def test_extract_json_from_text_without_backticks(self): 92 | """Test extracting JSON from plain text.""" 93 | text_with_json = ''' 94 | Some text before 95 | {"plain": "json", "number": 456} 96 | Some text after 97 | ''' 98 | result = extract_json_from_text(text_with_json) 99 | 100 | assert result == {"plain": "json", "number": 456} 101 | 102 | def test_extract_json_array_from_text(self): 103 | """Test extracting JSON array from text (currently not supported).""" 104 | text_with_array = ''' 105 | Results: 106 | [{"id": 1}, {"id": 2}, {"id": 3}] 107 | Done. 108 | ''' 109 | result = extract_json_from_text(text_with_array) 110 | 111 | # The function currently only extracts objects, not arrays 112 | # It should extract the first object it finds 113 | assert result == {"id": 1} 114 | 115 | def test_extract_json_with_multiple_blocks(self): 116 | """Test extracting JSON when multiple JSON blocks exist.""" 117 | text_with_multiple = ''' 118 | First block: 119 | {"first": true} 120 | 121 | Second block: 122 | {"second": true, "larger": "block"} 123 | ''' 124 | # Should extract the first valid JSON block found 125 | result = extract_json_from_text(text_with_multiple) 126 | 127 | assert result == {"first": True} or result == {"second": True, "larger": "block"} 128 | 129 | def test_parse_invalid_json_returns_error(self): 130 | """Test parsing invalid JSON returns error.""" 131 | invalid_jsons = [ 132 | '{invalid json}', 133 | '{"unclosed": "string}', 134 | '{"trailing": "comma",}', 135 | '{unquoted: key}', 136 | 'not json at all', 137 | '' 138 | ] 139 | 140 | for invalid in invalid_jsons: 141 | success, result = parse_json_with_fallbacks(invalid) 142 | assert success is False 143 | assert "error" in result 144 | 145 | def test_extract_json_from_text_no_json(self): 146 | """Test extracting JSON from text with no JSON returns None.""" 147 | texts_without_json = [ 148 | 'This is just plain text', 149 | '```python\nprint("hello")\n```', 150 | '', 151 | None 152 | ] 153 | 154 | for text in texts_without_json: 155 | result = extract_json_from_text(text) 156 | assert result is None 157 | 158 | def test_parse_json_with_comments(self): 159 | """Test parsing JSON that might have comments (should fail).""" 160 | json_with_comments = ''' 161 | { 162 | // This is a comment 163 | "key": "value" 164 | } 165 | ''' 166 | success, result = parse_json_with_fallbacks(json_with_comments) 167 | assert success is False # Standard JSON doesn't support comments 168 | assert "error" in result 169 | 170 | def test_extract_json_with_syntax_errors_in_text(self): 171 | """Test extracting JSON when there are syntax errors in surrounding text.""" 172 | text = ''' 173 | Here's some code with errors: print( 174 | 175 | But the JSON is valid: 176 | {"valid": "json", "number": 789} 177 | 178 | More broken code: }{][ 179 | ''' 180 | result = extract_json_from_text(text) 181 | 182 | assert result == {"valid": "json", "number": 789} 183 | 184 | def test_large_json_parsing(self): 185 | """Test parsing large JSON structures.""" 186 | large_json = { 187 | "findings": [ 188 | { 189 | "id": i, 190 | "title": f"Finding {i}", 191 | "description": f"Description for finding {i}", 192 | "severity": "medium", 193 | "file": f"/path/to/file{i}.py", 194 | "line": i * 10 195 | } 196 | for i in range(100) 197 | ] 198 | } 199 | 200 | json_string = json.dumps(large_json) 201 | success, result = parse_json_with_fallbacks(json_string) 202 | 203 | assert success is True 204 | assert result == large_json 205 | assert len(result["findings"]) == 100 206 | 207 | def test_json_with_special_characters_in_strings(self): 208 | """Test JSON with special characters in string values.""" 209 | special_json = { 210 | "newline": "line1\nline2", 211 | "tab": "before\tafter", 212 | "backslash": "path\\to\\file", 213 | "quotes": 'He said "Hello"', 214 | "unicode": "café ☕", 215 | "emoji": "🔒 Security 🛡️" 216 | } 217 | 218 | json_string = json.dumps(special_json) 219 | success, result = parse_json_with_fallbacks(json_string) 220 | 221 | assert success is True 222 | assert result == special_json 223 | 224 | def test_extract_json_from_nested_code_blocks(self): 225 | """Test extracting JSON from nested code blocks.""" 226 | text = ''' 227 | Here's a code block within text: 228 | 229 | ``` 230 | Some other code 231 | ```json 232 | {"nested": "json"} 233 | ``` 234 | ``` 235 | ''' 236 | result = extract_json_from_text(text) 237 | 238 | # Should be able to extract the JSON 239 | assert result == {"nested": "json"} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Claude Code Security Reviewer 2 | 3 | An AI-powered security review GitHub Action using Claude to analyze code changes for security vulnerabilities. This action provides intelligent, context-aware security analysis for pull requests using Anthropic's Claude Code tool for deep semantic security analysis. See our blog post [here](https://www.anthropic.com/news/automate-security-reviews-with-claude-code) for more details. 4 | 5 | ## Features 6 | 7 | - **AI-Powered Analysis**: Uses Claude's advanced reasoning to detect security vulnerabilities with deep semantic understanding 8 | - **Diff-Aware Scanning**: For PRs, only analyzes changed files 9 | - **PR Comments**: Automatically comments on PRs with security findings 10 | - **Contextual Understanding**: Goes beyond pattern matching to understand code semantics 11 | - **Language Agnostic**: Works with any programming language 12 | - **False Positive Filtering**: Advanced filtering to reduce noise and focus on real vulnerabilities 13 | 14 | ## Quick Start 15 | 16 | Add this to your repository's `.github/workflows/security.yml`: 17 | 18 | ```yaml 19 | name: Security Review 20 | 21 | permissions: 22 | pull-requests: write # Needed for leaving PR comments 23 | contents: read 24 | 25 | on: 26 | pull_request: 27 | 28 | jobs: 29 | security: 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/checkout@v4 33 | with: 34 | ref: ${{ github.event.pull_request.head.sha || github.sha }} 35 | fetch-depth: 2 36 | 37 | - uses: anthropics/claude-code-security-review@main 38 | with: 39 | comment-pr: true 40 | claude-api-key: ${{ secrets.CLAUDE_API_KEY }} 41 | ``` 42 | 43 | ## Security Considerations 44 | 45 | This action is not hardened against prompt injection attacks and should only be used to review trusted PRs. We recommend [configuring your repository](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/enabling-features-for-your-repository/managing-github-actions-settings-for-a-repository#controlling-changes-from-forks-to-workflows-in-public-repositories) to use the "Require approval for all external contributors" option to ensure workflows only run after a maintainer has reviewed the PR. 46 | 47 | ## Configuration Options 48 | 49 | ### Action Inputs 50 | 51 | | Input | Description | Default | Required | 52 | |-------|-------------|---------|----------| 53 | | `claude-api-key` | Anthropic Claude API key for security analysis.
*Note*: This API key needs to be enabled for both the Claude API and Claude Code usage. | None | Yes | 54 | | `comment-pr` | Whether to comment on PRs with findings | `true` | No | 55 | | `upload-results` | Whether to upload results as artifacts | `true` | No | 56 | | `exclude-directories` | Comma-separated list of directories to exclude from scanning | None | No | 57 | | `claude-model` | Claude [model name](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-names) to use. Defaults to Opus 4.1. | `claude-opus-4-1-20250805` | No | 58 | | `claudecode-timeout` | Timeout for ClaudeCode analysis in minutes | `20` | No | 59 | | `run-every-commit` | Run ClaudeCode on every commit (skips cache check). Warning: May increase false positives on PRs with many commits. | `false` | No | 60 | | `false-positive-filtering-instructions` | Path to custom false positive filtering instructions text file | None | No | 61 | | `custom-security-scan-instructions` | Path to custom security scan instructions text file to append to audit prompt | None | No | 62 | 63 | ### Action Outputs 64 | 65 | | Output | Description | 66 | |--------|-------------| 67 | | `findings-count` | Total number of security findings | 68 | | `results-file` | Path to the results JSON file | 69 | 70 | ## How It Works 71 | 72 | ### Architecture 73 | 74 | ``` 75 | claudecode/ 76 | ├── github_action_audit.py # Main audit script for GitHub Actions 77 | ├── prompts.py # Security audit prompt templates 78 | ├── findings_filter.py # False positive filtering logic 79 | ├── claude_api_client.py # Claude API client for false positive filtering 80 | ├── json_parser.py # Robust JSON parsing utilities 81 | ├── requirements.txt # Python dependencies 82 | ├── test_*.py # Test suites 83 | └── evals/ # Eval tooling to test CC on arbitrary PRs 84 | ``` 85 | 86 | ### Workflow 87 | 88 | 1. **PR Analysis**: When a pull request is opened, Claude analyzes the diff to understand what changed 89 | 2. **Contextual Review**: Claude examines the code changes in context, understanding the purpose and potential security implications 90 | 3. **Finding Generation**: Security issues are identified with detailed explanations, severity ratings, and remediation guidance 91 | 4. **False Positive Filtering**: Advanced filtering removes low-impact or false positive prone findings to reduce noise 92 | 5. **PR Comments**: Findings are posted as review comments on the specific lines of code 93 | 94 | ## Security Analysis Capabilities 95 | 96 | ### Types of Vulnerabilities Detected 97 | 98 | - **Injection Attacks**: SQL injection, command injection, LDAP injection, XPath injection, NoSQL injection, XXE 99 | - **Authentication & Authorization**: Broken authentication, privilege escalation, insecure direct object references, bypass logic, session flaws 100 | - **Data Exposure**: Hardcoded secrets, sensitive data logging, information disclosure, PII handling violations 101 | - **Cryptographic Issues**: Weak algorithms, improper key management, insecure random number generation 102 | - **Input Validation**: Missing validation, improper sanitization, buffer overflows 103 | - **Business Logic Flaws**: Race conditions, time-of-check-time-of-use (TOCTOU) issues 104 | - **Configuration Security**: Insecure defaults, missing security headers, permissive CORS 105 | - **Supply Chain**: Vulnerable dependencies, typosquatting risks 106 | - **Code Execution**: RCE via deserialization, pickle injection, eval injection 107 | - **Cross-Site Scripting (XSS)**: Reflected, stored, and DOM-based XSS 108 | 109 | ### False Positive Filtering 110 | 111 | The tool automatically excludes a variety of low-impact and false positive prone findings to focus on high-impact vulnerabilities: 112 | - Denial of Service vulnerabilities 113 | - Rate limiting concerns 114 | - Memory/CPU exhaustion issues 115 | - Generic input validation without proven impact 116 | - Open redirect vulnerabilities 117 | 118 | The false positive filtering can also be tuned as needed for a given project's security goals. 119 | 120 | ### Benefits Over Traditional SAST 121 | 122 | - **Contextual Understanding**: Understands code semantics and intent, not just patterns 123 | - **Lower False Positives**: AI-powered analysis reduces noise by understanding when code is actually vulnerable 124 | - **Detailed Explanations**: Provides clear explanations of why something is a vulnerability and how to fix it 125 | - **Adaptive Learning**: Can be customized with organization-specific security requirements 126 | 127 | ## Installation & Setup 128 | 129 | ### GitHub Actions 130 | 131 | Follow the Quick Start guide above. The action handles all dependencies automatically. 132 | 133 | ### Local Development 134 | 135 | To run the security scanner locally against a specific PR, see the [evaluation framework documentation](claudecode/evals/README.md). 136 | 137 | 138 | 139 | ## Claude Code Integration: /security-review Command 140 | 141 | By default, Claude Code ships a `/security-review` [slash command](https://docs.anthropic.com/en/docs/claude-code/slash-commands) that provides the same security analysis capabilities as the GitHub Action workflow, but integrated directly into your Claude Code development environment. To use this, simply run `/security-review` to perform a comprehensive security review of all pending changes. 142 | 143 | ### Customizing the Command 144 | 145 | The default `/security-review` command is designed to work well in most cases, but it can also be customized based on your specific security needs. To do so: 146 | 147 | 1. Copy the [`security-review.md`](https://github.com/anthropics/claude-code-security-review/blob/main/.claude/commands/security-review.md?plain=1) file from this repository to your project's `.claude/commands/` folder. 148 | 2. Edit `security-review.md` to customize the security analysis. For example, you could add additional organization-specific directions to the false positive filtering instructions. 149 | 150 | ## Custom Scanning Configuration 151 | 152 | It is also possible to configure custom scanning and false positive filtering instructions, see the [`docs/`](docs/) folder for more details. 153 | 154 | ## Testing 155 | 156 | Run the test suite to validate functionality: 157 | 158 | ```bash 159 | cd claude-code-security-review 160 | # Run all tests 161 | pytest claudecode -v 162 | ``` 163 | 164 | ## Support 165 | 166 | For issues or questions: 167 | - Open an issue in this repository 168 | - Check the [GitHub Actions logs](https://docs.github.com/en/actions/monitoring-and-troubleshooting-workflows/viewing-workflow-run-history) for debugging information 169 | 170 | ## License 171 | 172 | MIT License - see [LICENSE](LICENSE) file for details. 173 | -------------------------------------------------------------------------------- /scripts/comment-pr-findings.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /** 4 | * Script to comment on PRs with security findings from ClaudeCode 5 | */ 6 | 7 | const fs = require('fs'); 8 | const { spawnSync } = require('child_process'); 9 | 10 | // Parse GitHub context from environment 11 | const context = { 12 | repo: { 13 | owner: process.env.GITHUB_REPOSITORY?.split('/')[0] || '', 14 | repo: process.env.GITHUB_REPOSITORY?.split('/')[1] || '' 15 | }, 16 | issue: { 17 | number: parseInt(process.env.GITHUB_EVENT_PATH ? JSON.parse(fs.readFileSync(process.env.GITHUB_EVENT_PATH, 'utf8')).pull_request?.number : '') || 0 18 | }, 19 | payload: { 20 | pull_request: process.env.GITHUB_EVENT_PATH ? JSON.parse(fs.readFileSync(process.env.GITHUB_EVENT_PATH, 'utf8')).pull_request : {} 21 | } 22 | }; 23 | 24 | // GitHub API helper using gh CLI 25 | function ghApi(endpoint, method = 'GET', data = null) { 26 | // Build arguments array safely to prevent command injection 27 | const args = ['api', endpoint, '--method', method]; 28 | 29 | if (data) { 30 | args.push('--input', '-'); 31 | } 32 | 33 | try { 34 | const result = spawnSync('gh', args, { 35 | encoding: 'utf8', 36 | input: data ? JSON.stringify(data) : undefined, 37 | stdio: ['pipe', 'pipe', 'pipe'] 38 | }); 39 | 40 | if (result.error) { 41 | throw new Error(`Failed to spawn gh process: ${result.error.message}`); 42 | } 43 | 44 | if (result.status !== 0) { 45 | console.error(`Error calling GitHub API: ${result.stderr}`); 46 | throw new Error(`gh process exited with code ${result.status}: ${result.stderr}`); 47 | } 48 | 49 | return JSON.parse(result.stdout); 50 | } catch (error) { 51 | console.error(`Error calling GitHub API: ${error.message}`); 52 | throw error; 53 | } 54 | } 55 | 56 | // Helper function to add reactions to a comment 57 | function addReactionsToComment(commentId, isReviewComment = true) { 58 | const reactions = ['+1', '-1']; // thumbs up and thumbs down 59 | const endpoint = isReviewComment 60 | ? `/repos/${context.repo.owner}/${context.repo.repo}/pulls/comments/${commentId}/reactions` 61 | : `/repos/${context.repo.owner}/${context.repo.repo}/issues/comments/${commentId}/reactions`; 62 | 63 | for (const reaction of reactions) { 64 | try { 65 | ghApi(endpoint, 'POST', { content: reaction }); 66 | console.log(`Added ${reaction} reaction to comment ${commentId}`); 67 | } catch (error) { 68 | console.error(`Failed to add ${reaction} reaction to comment ${commentId}:`, error.message); 69 | } 70 | } 71 | } 72 | 73 | // Helper function to add reactions to all comments in a review 74 | function addReactionsToReview(reviewId) { 75 | try { 76 | // Get all comments from the review 77 | const reviewComments = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/reviews/${reviewId}/comments`); 78 | 79 | if (reviewComments && Array.isArray(reviewComments)) { 80 | for (const comment of reviewComments) { 81 | if (comment.id) { 82 | addReactionsToComment(comment.id, true); 83 | } 84 | } 85 | } 86 | } catch (error) { 87 | console.error(`Failed to get review comments for review ${reviewId}:`, error.message); 88 | } 89 | } 90 | 91 | async function run() { 92 | try { 93 | // Read the findings 94 | let newFindings = []; 95 | try { 96 | const findingsData = fs.readFileSync('findings.json', 'utf8'); 97 | newFindings = JSON.parse(findingsData); 98 | } catch (e) { 99 | console.log('Could not read findings file'); 100 | return; 101 | } 102 | 103 | if (newFindings.length === 0) { 104 | return; 105 | } 106 | 107 | // Get the PR diff to map file lines to diff positions 108 | const prFiles = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/files?per_page=100`); 109 | 110 | // Create a map of file paths to their diff information 111 | const fileMap = {}; 112 | prFiles.forEach(file => { 113 | fileMap[file.filename] = file; 114 | }); 115 | 116 | // Prepare review comments 117 | const reviewComments = []; 118 | 119 | // Check if ClaudeCode comments should be silenced 120 | const silenceClaudeCodeComments = process.env.SILENCE_CLAUDECODE_COMMENTS === 'true'; 121 | 122 | if (silenceClaudeCodeComments) { 123 | console.log(`ClaudeCode comments silenced - excluding ${newFindings.length} findings from comments`); 124 | return; 125 | } 126 | 127 | 128 | // Process findings synchronously (gh cli doesn't support async well) 129 | for (const finding of newFindings) { 130 | const file = finding.file || finding.path; 131 | const line = finding.line || (finding.start && finding.start.line) || 1; 132 | const message = finding.description || (finding.extra && finding.extra.message) || 'Security vulnerability detected'; 133 | const severity = finding.severity || 'HIGH'; 134 | const category = finding.category || 'security_issue'; 135 | 136 | // Check if this file is part of the PR diff 137 | if (!fileMap[file]) { 138 | console.log(`File ${file} not in PR diff, skipping`); 139 | continue; 140 | } 141 | 142 | // Build the comment body 143 | let commentBody = `🤖 **Security Issue: ${message}**\n\n`; 144 | commentBody += `**Severity:** ${severity}\n`; 145 | commentBody += `**Category:** ${category}\n`; 146 | commentBody += `**Tool:** ClaudeCode AI Security Analysis\n`; 147 | 148 | // Add exploit scenario if available 149 | if (finding.exploit_scenario || (finding.extra && finding.extra.metadata && finding.extra.metadata.exploit_scenario)) { 150 | const exploitScenario = finding.exploit_scenario || finding.extra.metadata.exploit_scenario; 151 | commentBody += `\n**Exploit Scenario:** ${exploitScenario}\n`; 152 | } 153 | 154 | // Add recommendation if available 155 | if (finding.recommendation || (finding.extra && finding.extra.metadata && finding.extra.metadata.recommendation)) { 156 | const recommendation = finding.recommendation || finding.extra.metadata.recommendation; 157 | commentBody += `\n**Recommendation:** ${recommendation}\n`; 158 | } 159 | 160 | // Prepare the review comment 161 | const reviewComment = { 162 | path: file, 163 | line: line, 164 | side: 'RIGHT', 165 | body: commentBody 166 | }; 167 | 168 | reviewComments.push(reviewComment); 169 | } 170 | 171 | if (reviewComments.length === 0) { 172 | console.log('No findings to comment on PR diff'); 173 | return; 174 | } 175 | 176 | // Check for existing review comments to avoid duplicates 177 | const comments = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/comments`); 178 | 179 | // Check if we've already commented on these findings 180 | const existingSecurityComments = comments.filter(comment => 181 | comment.user.type === 'Bot' && 182 | comment.body && comment.body.includes('🤖 **Security Issue:') 183 | ); 184 | 185 | if (existingSecurityComments.length > 0) { 186 | console.log(`Found ${existingSecurityComments.length} existing security comments, skipping to avoid duplicates`); 187 | return; 188 | } 189 | 190 | try { 191 | // Create a review with all the comments 192 | const reviewData = { 193 | commit_id: context.payload.pull_request.head.sha, 194 | event: 'COMMENT', 195 | comments: reviewComments 196 | }; 197 | 198 | const reviewResponse = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/reviews`, 'POST', reviewData); 199 | 200 | console.log(`Created review with ${reviewComments.length} inline comments`); 201 | 202 | // Add reactions to the comments 203 | if (reviewResponse && reviewResponse.id) { 204 | addReactionsToReview(reviewResponse.id); 205 | } 206 | } catch (error) { 207 | console.error('Error creating review:', error); 208 | 209 | // Fallback: try to create individual comments if review fails 210 | // This might happen if line numbers are outside the diff context 211 | console.log('Attempting fallback with adjusted line numbers...'); 212 | 213 | for (const comment of reviewComments) { 214 | try { 215 | // Try to create comment with the original line 216 | const commentData = { 217 | path: comment.path, 218 | line: comment.line, 219 | side: comment.side, 220 | body: comment.body, 221 | commit_id: context.payload.pull_request.head.sha 222 | }; 223 | 224 | const commentResponse = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/comments`, 'POST', commentData); 225 | 226 | // Add reactions to the individual comment 227 | if (commentResponse && commentResponse.id) { 228 | addReactionsToComment(commentResponse.id, true); 229 | } 230 | } catch (lineError) { 231 | console.log(`Could not comment on ${comment.path}:${comment.line} - line might not be in diff context`); 232 | // If the specific line fails, try to get the file's patch and find a suitable line 233 | const fileInfo = fileMap[comment.path]; 234 | if (fileInfo && fileInfo.patch) { 235 | // This is a simplified approach - in production you'd want more sophisticated line mapping 236 | console.log(`File ${comment.path} has additions but line ${comment.line} is not in the diff`); 237 | } 238 | } 239 | } 240 | } 241 | } catch (error) { 242 | console.error('Failed to comment on PR:', error); 243 | process.exit(1); 244 | } 245 | } 246 | 247 | run(); -------------------------------------------------------------------------------- /claudecode/test_helper_functions.py: -------------------------------------------------------------------------------- 1 | """Unit tests for helper functions in github_action_audit module.""" 2 | 3 | import pytest 4 | import os 5 | from unittest.mock import patch, MagicMock 6 | 7 | 8 | from claudecode.github_action_audit import ( 9 | get_environment_config, 10 | initialize_clients, 11 | initialize_findings_filter, 12 | run_security_audit, 13 | apply_findings_filter, 14 | ConfigurationError, 15 | AuditError 16 | ) 17 | from claudecode.findings_filter import FindingsFilter 18 | 19 | 20 | class TestHelperFunctions: 21 | """Test helper functions in github_action_audit module.""" 22 | 23 | def test_get_environment_config_success(self): 24 | """Test successful environment configuration retrieval.""" 25 | with patch.dict(os.environ, { 26 | 'GITHUB_REPOSITORY': 'owner/repo', 27 | 'PR_NUMBER': '123' 28 | }): 29 | repo_name, pr_number = get_environment_config() 30 | 31 | assert repo_name == 'owner/repo' 32 | assert pr_number == 123 33 | 34 | def test_get_environment_config_missing_repository(self): 35 | """Test error when GITHUB_REPOSITORY is missing.""" 36 | with patch.dict(os.environ, {'PR_NUMBER': '123'}, clear=True): 37 | with pytest.raises(ConfigurationError) as exc_info: 38 | get_environment_config() 39 | 40 | assert "GITHUB_REPOSITORY environment variable required" in str(exc_info.value) 41 | 42 | def test_get_environment_config_missing_pr_number(self): 43 | """Test error when PR_NUMBER is missing.""" 44 | with patch.dict(os.environ, {'GITHUB_REPOSITORY': 'owner/repo'}, clear=True): 45 | with pytest.raises(ConfigurationError) as exc_info: 46 | get_environment_config() 47 | 48 | assert "PR_NUMBER environment variable required" in str(exc_info.value) 49 | 50 | def test_get_environment_config_invalid_pr_number(self): 51 | """Test error when PR_NUMBER is not a valid integer.""" 52 | with patch.dict(os.environ, { 53 | 'GITHUB_REPOSITORY': 'owner/repo', 54 | 'PR_NUMBER': 'not-a-number' 55 | }): 56 | with pytest.raises(ConfigurationError) as exc_info: 57 | get_environment_config() 58 | 59 | assert "Invalid PR_NUMBER" in str(exc_info.value) 60 | 61 | @patch('claudecode.github_action_audit.GitHubActionClient') 62 | @patch('claudecode.github_action_audit.SimpleClaudeRunner') 63 | def test_initialize_clients_success(self, mock_claude_runner, mock_github_client): 64 | """Test successful client initialization.""" 65 | mock_github_instance = MagicMock() 66 | mock_claude_instance = MagicMock() 67 | mock_github_client.return_value = mock_github_instance 68 | mock_claude_runner.return_value = mock_claude_instance 69 | 70 | github_client, claude_runner = initialize_clients() 71 | 72 | assert github_client == mock_github_instance 73 | assert claude_runner == mock_claude_instance 74 | mock_github_client.assert_called_once() 75 | mock_claude_runner.assert_called_once() 76 | 77 | @patch('claudecode.github_action_audit.GitHubActionClient') 78 | def test_initialize_clients_github_failure(self, mock_github_client): 79 | """Test error when GitHub client initialization fails.""" 80 | mock_github_client.side_effect = Exception("GitHub API error") 81 | 82 | with pytest.raises(ConfigurationError) as exc_info: 83 | initialize_clients() 84 | 85 | assert "Failed to initialize GitHub client" in str(exc_info.value) 86 | assert "GitHub API error" in str(exc_info.value) 87 | 88 | @patch('claudecode.github_action_audit.GitHubActionClient') 89 | @patch('claudecode.github_action_audit.SimpleClaudeRunner') 90 | def test_initialize_clients_claude_failure(self, mock_claude_runner, mock_github_client): 91 | """Test error when Claude runner initialization fails.""" 92 | mock_github_client.return_value = MagicMock() 93 | mock_claude_runner.side_effect = Exception("Claude init error") 94 | 95 | with pytest.raises(ConfigurationError) as exc_info: 96 | initialize_clients() 97 | 98 | assert "Failed to initialize Claude runner" in str(exc_info.value) 99 | assert "Claude init error" in str(exc_info.value) 100 | 101 | @patch('claudecode.github_action_audit.FindingsFilter') 102 | def test_initialize_findings_filter_with_claude(self, mock_filter): 103 | """Test initializing findings filter with Claude API enabled.""" 104 | mock_filter_instance = MagicMock() 105 | mock_filter.return_value = mock_filter_instance 106 | 107 | with patch.dict(os.environ, { 108 | 'ENABLE_CLAUDE_FILTERING': 'true', 109 | 'ANTHROPIC_API_KEY': 'test-key-123' 110 | }): 111 | result = initialize_findings_filter() 112 | 113 | assert result == mock_filter_instance 114 | mock_filter.assert_called_once_with( 115 | use_hard_exclusions=True, 116 | use_claude_filtering=True, 117 | api_key='test-key-123', 118 | custom_filtering_instructions=None 119 | ) 120 | 121 | @patch('claudecode.github_action_audit.FindingsFilter') 122 | def test_initialize_findings_filter_without_claude(self, mock_simple_filter): 123 | """Test initializing findings filter without Claude API.""" 124 | mock_filter_instance = MagicMock() 125 | mock_simple_filter.return_value = mock_filter_instance 126 | 127 | with patch.dict(os.environ, { 128 | 'ENABLE_CLAUDE_FILTERING': 'false' 129 | }, clear=True): 130 | result = initialize_findings_filter() 131 | 132 | assert result == mock_filter_instance 133 | mock_simple_filter.assert_called_once() 134 | 135 | @patch('claudecode.github_action_audit.FindingsFilter') 136 | def test_initialize_findings_filter_with_defaults(self, mock_simple_filter): 137 | """Test initializing findings filter with defaults.""" 138 | mock_filter_instance = MagicMock() 139 | mock_simple_filter.return_value = mock_filter_instance 140 | 141 | with patch.dict(os.environ, {}, clear=True): 142 | result = initialize_findings_filter() 143 | 144 | assert result == mock_filter_instance 145 | 146 | def test_run_security_audit_success(self): 147 | """Test successful security audit execution.""" 148 | mock_runner = MagicMock() 149 | mock_runner.run_security_audit.return_value = ( 150 | True, 151 | "", 152 | {"findings": [{"id": 1}], "analysis_summary": {}} 153 | ) 154 | 155 | result = run_security_audit(mock_runner, "test prompt") 156 | 157 | assert result == {"findings": [{"id": 1}], "analysis_summary": {}} 158 | mock_runner.run_security_audit.assert_called_once() 159 | 160 | def test_run_security_audit_failure(self): 161 | """Test security audit execution failure.""" 162 | mock_runner = MagicMock() 163 | mock_runner.run_security_audit.return_value = ( 164 | False, 165 | "Audit failed: timeout", 166 | {} 167 | ) 168 | 169 | with pytest.raises(AuditError) as exc_info: 170 | run_security_audit(mock_runner, "test prompt") 171 | 172 | assert "Security audit failed: Audit failed: timeout" in str(exc_info.value) 173 | 174 | def test_apply_findings_filter_with_findings_filter(self): 175 | """Test applying FindingsFilter to findings.""" 176 | mock_filter = MagicMock(spec=FindingsFilter) 177 | mock_filter.filter_findings.return_value = ( 178 | True, 179 | { 180 | 'filtered_findings': [{"id": 1}], 181 | 'excluded_findings': [{"id": 2}], 182 | 'analysis_summary': {'total': 2, 'kept': 1} 183 | }, 184 | MagicMock() # filter_stats 185 | ) 186 | 187 | original_findings = [{"id": 1}, {"id": 2}] 188 | pr_context = {"repo_name": "test/repo"} 189 | 190 | # Create mock github client 191 | mock_github_client = MagicMock() 192 | mock_github_client._is_excluded.return_value = False 193 | 194 | kept, excluded, summary = apply_findings_filter( 195 | mock_filter, original_findings, pr_context, mock_github_client 196 | ) 197 | 198 | assert kept == [{"id": 1}] 199 | assert excluded == [{"id": 2}] 200 | assert summary == {'total': 2, 'kept': 1, 'directory_excluded_count': 0} 201 | 202 | mock_filter.filter_findings.assert_called_once_with(original_findings, pr_context) 203 | 204 | def test_apply_findings_filter_with_simple_filter(self): 205 | """Test applying FindingsFilter to findings.""" 206 | mock_filter = MagicMock(spec=FindingsFilter) 207 | mock_filter.filter_findings.return_value = ( 208 | True, 209 | { 210 | 'filtered_findings': [{"id": 1}], 211 | 'excluded_findings': [{"id": 2}], 212 | 'analysis_summary': {} 213 | }, 214 | MagicMock() # filter_stats 215 | ) 216 | 217 | original_findings = [{"id": 1}, {"id": 2}] 218 | pr_context = {"repo_name": "test/repo"} 219 | 220 | # Create mock github client 221 | mock_github_client = MagicMock() 222 | mock_github_client._is_excluded.return_value = False 223 | 224 | kept, excluded, summary = apply_findings_filter( 225 | mock_filter, original_findings, pr_context, mock_github_client 226 | ) 227 | 228 | assert kept == [{"id": 1}] 229 | assert excluded == [{"id": 2}] 230 | assert summary == {'directory_excluded_count': 0} 231 | 232 | mock_filter.filter_findings.assert_called_once_with(original_findings, pr_context) 233 | 234 | def test_apply_findings_filter_failure(self): 235 | """Test handling of filter failure.""" 236 | mock_filter = MagicMock(spec=FindingsFilter) 237 | mock_filter.filter_findings.return_value = ( 238 | False, # filter failed 239 | {}, 240 | MagicMock() 241 | ) 242 | 243 | original_findings = [{"id": 1}, {"id": 2}] 244 | pr_context = {"repo_name": "test/repo"} 245 | 246 | # Create mock github client 247 | mock_github_client = MagicMock() 248 | mock_github_client._is_excluded.return_value = False 249 | 250 | kept, excluded, summary = apply_findings_filter( 251 | mock_filter, original_findings, pr_context, mock_github_client 252 | ) 253 | 254 | # On failure, should keep all findings 255 | assert kept == original_findings 256 | assert excluded == [] 257 | assert summary == {'directory_excluded_count': 0} -------------------------------------------------------------------------------- /.claude/commands/security-review.md: -------------------------------------------------------------------------------- 1 | --- 2 | allowed-tools: Bash(git diff:*), Bash(git status:*), Bash(git log:*), Bash(git show:*), Bash(git remote show:*), Read, Glob, Grep, LS, Task 3 | description: Complete a security review of the pending changes on the current branch 4 | --- 5 | 6 | You are a senior security engineer conducting a focused security review of the changes on this branch. 7 | 8 | GIT STATUS: 9 | 10 | ``` 11 | !`git status` 12 | ``` 13 | 14 | FILES MODIFIED: 15 | 16 | ``` 17 | !`git diff --name-only origin/HEAD...` 18 | ``` 19 | 20 | COMMITS: 21 | 22 | ``` 23 | !`git log --no-decorate origin/HEAD...` 24 | ``` 25 | 26 | DIFF CONTENT: 27 | 28 | ``` 29 | !`git diff --merge-base origin/HEAD` 30 | ``` 31 | 32 | Review the complete diff above. This contains all code changes in the PR. 33 | 34 | 35 | OBJECTIVE: 36 | Perform a security-focused code review to identify HIGH-CONFIDENCE security vulnerabilities that could have real exploitation potential. This is not a general code review - focus ONLY on security implications newly added by this PR. Do not comment on existing security concerns. 37 | 38 | CRITICAL INSTRUCTIONS: 39 | 1. MINIMIZE FALSE POSITIVES: Only flag issues where you're >80% confident of actual exploitability 40 | 2. AVOID NOISE: Skip theoretical issues, style concerns, or low-impact findings 41 | 3. FOCUS ON IMPACT: Prioritize vulnerabilities that could lead to unauthorized access, data breaches, or system compromise 42 | 4. EXCLUSIONS: Do NOT report the following issue types: 43 | - Denial of Service (DOS) vulnerabilities, even if they allow service disruption 44 | - Secrets or sensitive data stored on disk (these are handled by other processes) 45 | - Rate limiting or resource exhaustion issues 46 | 47 | SECURITY CATEGORIES TO EXAMINE: 48 | 49 | **Input Validation Vulnerabilities:** 50 | - SQL injection via unsanitized user input 51 | - Command injection in system calls or subprocesses 52 | - XXE injection in XML parsing 53 | - Template injection in templating engines 54 | - NoSQL injection in database queries 55 | - Path traversal in file operations 56 | 57 | **Authentication & Authorization Issues:** 58 | - Authentication bypass logic 59 | - Privilege escalation paths 60 | - Session management flaws 61 | - JWT token vulnerabilities 62 | - Authorization logic bypasses 63 | 64 | **Crypto & Secrets Management:** 65 | - Hardcoded API keys, passwords, or tokens 66 | - Weak cryptographic algorithms or implementations 67 | - Improper key storage or management 68 | - Cryptographic randomness issues 69 | - Certificate validation bypasses 70 | 71 | **Injection & Code Execution:** 72 | - Remote code execution via deseralization 73 | - Pickle injection in Python 74 | - YAML deserialization vulnerabilities 75 | - Eval injection in dynamic code execution 76 | - XSS vulnerabilities in web applications (reflected, stored, DOM-based) 77 | 78 | **Data Exposure:** 79 | - Sensitive data logging or storage 80 | - PII handling violations 81 | - API endpoint data leakage 82 | - Debug information exposure 83 | 84 | Additional notes: 85 | - Even if something is only exploitable from the local network, it can still be a HIGH severity issue 86 | 87 | ANALYSIS METHODOLOGY: 88 | 89 | Phase 1 - Repository Context Research (Use file search tools): 90 | - Identify existing security frameworks and libraries in use 91 | - Look for established secure coding patterns in the codebase 92 | - Examine existing sanitization and validation patterns 93 | - Understand the project's security model and threat model 94 | 95 | Phase 2 - Comparative Analysis: 96 | - Compare new code changes against existing security patterns 97 | - Identify deviations from established secure practices 98 | - Look for inconsistent security implementations 99 | - Flag code that introduces new attack surfaces 100 | 101 | Phase 3 - Vulnerability Assessment: 102 | - Examine each modified file for security implications 103 | - Trace data flow from user inputs to sensitive operations 104 | - Look for privilege boundaries being crossed unsafely 105 | - Identify injection points and unsafe deserialization 106 | 107 | REQUIRED OUTPUT FORMAT: 108 | 109 | You MUST output your findings in markdown. The markdown output should contain the file, line number, severity, category (e.g. `sql_injection` or `xss`), description, exploit scenario, and fix recommendation. 110 | 111 | For example: 112 | 113 | # Vuln 1: XSS: `foo.py:42` 114 | 115 | * Severity: High 116 | * Description: User input from `username` parameter is directly interpolated into HTML without escaping, allowing reflected XSS attacks 117 | * Exploit Scenario: Attacker crafts URL like /bar?q= to execute JavaScript in victim's browser, enabling session hijacking or data theft 118 | * Recommendation: Use Flask's escape() function or Jinja2 templates with auto-escaping enabled for all user inputs rendered in HTML 119 | 120 | SEVERITY GUIDELINES: 121 | - **HIGH**: Directly exploitable vulnerabilities leading to RCE, data breach, or authentication bypass 122 | - **MEDIUM**: Vulnerabilities requiring specific conditions but with significant impact 123 | - **LOW**: Defense-in-depth issues or lower-impact vulnerabilities 124 | 125 | CONFIDENCE SCORING: 126 | - 0.9-1.0: Certain exploit path identified, tested if possible 127 | - 0.8-0.9: Clear vulnerability pattern with known exploitation methods 128 | - 0.7-0.8: Suspicious pattern requiring specific conditions to exploit 129 | - Below 0.7: Don't report (too speculative) 130 | 131 | FINAL REMINDER: 132 | Focus on HIGH and MEDIUM findings only. Better to miss some theoretical issues than flood the report with false positives. Each finding should be something a security engineer would confidently raise in a PR review. 133 | 134 | FALSE POSITIVE FILTERING: 135 | 136 | > You do not need to run commands to reproduce the vulnerability, just read the code to determine if it is a real vulnerability. Do not use the bash tool or write to any files. 137 | > 138 | > HARD EXCLUSIONS - Automatically exclude findings matching these patterns: 139 | > 1. Denial of Service (DOS) vulnerabilities or resource exhaustion attacks. 140 | > 2. Secrets or credentials stored on disk if they are otherwise secured. 141 | > 3. Rate limiting concerns or service overload scenarios. 142 | > 4. Memory consumption or CPU exhaustion issues. 143 | > 5. Lack of input validation on non-security-critical fields without proven security impact. 144 | > 6. Input sanitization concerns for GitHub Action workflows unless they are clearly triggerable via untrusted input. 145 | > 7. A lack of hardening measures. Code is not expected to implement all security best practices, only flag concrete vulnerabilities. 146 | > 8. Race conditions or timing attacks that are theoretical rather than practical issues. Only report a race condition if it is concretely problematic. 147 | > 9. Vulnerabilities related to outdated third-party libraries. These are managed separately and should not be reported here. 148 | > 10. Memory safety issues such as buffer overflows or use-after-free-vulnerabilities are impossible in rust. Do not report memory safety issues in rust or any other memory safe languages. 149 | > 11. Files that are only unit tests or only used as part of running tests. 150 | > 12. Log spoofing concerns. Outputting un-sanitized user input to logs is not a vulnerability. 151 | > 13. SSRF vulnerabilities that only control the path. SSRF is only a concern if it can control the host or protocol. 152 | > 14. Including user-controlled content in AI system prompts is not a vulnerability. 153 | > 15. Regex injection. Injecting untrusted content into a regex is not a vulnerability. 154 | > 16. Regex DOS concerns. 155 | > 16. Insecure documentation. Do not report any findings in documentation files such as markdown files. 156 | > 17. A lack of audit logs is not a vulnerability. 157 | > 158 | > PRECEDENTS - 159 | > 1. Logging high value secrets in plaintext is a vulnerability. Logging URLs is assumed to be safe. 160 | > 2. UUIDs can be assumed to be unguessable and do not need to be validated. 161 | > 3. Environment variables and CLI flags are trusted values. Attackers are generally not able to modify them in a secure environment. Any attack that relies on controlling an environment variable is invalid. 162 | > 4. Resource management issues such as memory or file descriptor leaks are not valid. 163 | > 5. Subtle or low impact web vulnerabilities such as tabnabbing, XS-Leaks, prototype pollution, and open redirects should not be reported unless they are extremely high confidence. 164 | > 6. React and Angular are generally secure against XSS. These frameworks do not need to sanitize or escape user input unless it is using dangerouslySetInnerHTML, bypassSecurityTrustHtml, or similar methods. Do not report XSS vulnerabilities in React or Angular components or tsx files unless they are using unsafe methods. 165 | > 7. Most vulnerabilities in github action workflows are not exploitable in practice. Before validating a github action workflow vulnerability ensure it is concrete and has a very specific attack path. 166 | > 8. A lack of permission checking or authentication in client-side JS/TS code is not a vulnerability. Client-side code is not trusted and does not need to implement these checks, they are handled on the server-side. The same applies to all flows that send untrusted data to the backend, the backend is responsible for validating and sanitizing all inputs. 167 | > 9. Only include MEDIUM findings if they are obvious and concrete issues. 168 | > 10. Most vulnerabilities in ipython notebooks (*.ipynb files) are not exploitable in practice. Before validating a notebook vulnerability ensure it is concrete and has a very specific attack path where untrusted input can trigger the vulnerability. 169 | > 11. Logging non-PII data is not a vulnerability even if the data may be sensitive. Only report logging vulnerabilities if they expose sensitive information such as secrets, passwords, or personally identifiable information (PII). 170 | > 12. Command injection vulnerabilities in shell scripts are generally not exploitable in practice since shell scripts generally do not run with untrusted user input. Only report command injection vulnerabilities in shell scripts if they are concrete and have a very specific attack path for untrusted input. 171 | > 172 | > SIGNAL QUALITY CRITERIA - For remaining findings, assess: 173 | > 1. Is there a concrete, exploitable vulnerability with a clear attack path? 174 | > 2. Does this represent a real security risk vs theoretical best practice? 175 | > 3. Are there specific code locations and reproduction steps? 176 | > 4. Would this finding be actionable for a security team? 177 | > 178 | > For each finding, assign a confidence score from 1-10: 179 | > - 1-3: Low confidence, likely false positive or noise 180 | > - 4-6: Medium confidence, needs investigation 181 | > - 7-10: High confidence, likely true vulnerability 182 | 183 | START ANALYSIS: 184 | 185 | Begin your analysis now. Do this in 3 steps: 186 | 187 | 1. Use a sub-task to identify vulnerabilities. Use the repository exploration tools to understand the codebase context, then analyze the PR changes for security implications. In the prompt for this sub-task, include all of the above. 188 | 2. Then for each vulnerability identified by the above sub-task, create a new sub-task to filter out false-positives. Launch these sub-tasks as parallel sub-tasks. In the prompt for these sub-tasks, include everything in the "FALSE POSITIVE FILTERING" instructions. 189 | 3. Filter out any vulnerabilities where the sub-task reported a confidence less than 8. 190 | 191 | Your final reply must contain the markdown report and nothing else. -------------------------------------------------------------------------------- /claudecode/test_prompts.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the prompts module.""" 2 | 3 | from claudecode.prompts import get_security_audit_prompt 4 | 5 | 6 | class TestPrompts: 7 | """Test prompt generation functions.""" 8 | 9 | def test_get_security_audit_prompt_basic(self): 10 | """Test basic security audit prompt generation.""" 11 | pr_data = { 12 | "number": 123, 13 | "title": "Add new feature", 14 | "body": "This PR adds a new feature to handle user input", 15 | "user": "testuser", 16 | "changed_files": 1, 17 | "additions": 10, 18 | "deletions": 5, 19 | "head": { 20 | "repo": { 21 | "full_name": "owner/repo" 22 | } 23 | }, 24 | "files": [ 25 | { 26 | "filename": "app.py", 27 | "status": "modified", 28 | "additions": 10, 29 | "deletions": 5 30 | } 31 | ] 32 | } 33 | 34 | pr_diff = """ 35 | diff --git a/app.py b/app.py 36 | @@ -1,5 +1,10 @@ 37 | def process_input(user_input): 38 | - return user_input 39 | + # Process the input 40 | + result = eval(user_input) # Potential security issue 41 | + return result 42 | """ 43 | 44 | prompt = get_security_audit_prompt(pr_data, pr_diff) 45 | 46 | # Check that prompt contains expected elements 47 | assert isinstance(prompt, str) 48 | assert len(prompt) > 0 49 | assert "123" in prompt # PR number 50 | assert "Add new feature" in prompt # PR title 51 | assert "testuser" in prompt # Author 52 | assert "app.py" in prompt # File name 53 | assert "eval(user_input)" in prompt # The actual diff content 54 | 55 | def test_get_security_audit_prompt_empty_body(self): 56 | """Test prompt generation with empty PR body.""" 57 | pr_data = { 58 | "number": 456, 59 | "title": "Quick fix", 60 | "body": None, # Empty body 61 | "user": "author", 62 | "changed_files": 0, 63 | "additions": 0, 64 | "deletions": 0, 65 | "head": { 66 | "repo": { 67 | "full_name": "owner/repo" 68 | } 69 | }, 70 | "files": [] 71 | } 72 | 73 | pr_diff = "diff --git a/test.js b/test.js" 74 | 75 | prompt = get_security_audit_prompt(pr_data, pr_diff) 76 | 77 | assert isinstance(prompt, str) 78 | assert "456" in prompt 79 | assert "Quick fix" in prompt 80 | assert "author" in prompt 81 | 82 | def test_get_security_audit_prompt_multiple_files(self): 83 | """Test prompt generation with multiple files.""" 84 | pr_data = { 85 | "number": 789, 86 | "title": "Security improvements", 87 | "body": "Fixing various security issues", 88 | "user": "security-team", 89 | "changed_files": 3, 90 | "additions": 70, 91 | "deletions": 110, 92 | "head": { 93 | "repo": { 94 | "full_name": "owner/repo" 95 | } 96 | }, 97 | "files": [ 98 | { 99 | "filename": "auth.py", 100 | "status": "modified", 101 | "additions": 20, 102 | "deletions": 10 103 | }, 104 | { 105 | "filename": "config.yaml", 106 | "status": "added", 107 | "additions": 50, 108 | "deletions": 0 109 | }, 110 | { 111 | "filename": "old_auth.py", 112 | "status": "deleted", 113 | "additions": 0, 114 | "deletions": 100 115 | } 116 | ] 117 | } 118 | 119 | pr_diff = """ 120 | diff --git a/auth.py b/auth.py 121 | @@ -1,10 +1,20 @@ 122 | +import secrets 123 | + 124 | diff --git a/config.yaml b/config.yaml 125 | @@ -0,0 +1,50 @@ 126 | +database: 127 | + password: "hardcoded_password" 128 | """ 129 | 130 | prompt = get_security_audit_prompt(pr_data, pr_diff) 131 | 132 | # Check all files are mentioned 133 | assert "auth.py" in prompt 134 | assert "config.yaml" in prompt 135 | assert "old_auth.py" in prompt 136 | 137 | # Check file statuses 138 | assert "modified" in prompt.lower() 139 | assert "added" in prompt.lower() 140 | assert "deleted" in prompt.lower() 141 | 142 | def test_get_security_audit_prompt_special_characters(self): 143 | """Test prompt generation with special characters.""" 144 | pr_data = { 145 | "number": 999, 146 | "title": "Fix SQL injection in user's profile", 147 | "body": "This fixes a SQL injection vulnerability in the `get_user()` function", 148 | "user": "user-with-dash", 149 | "changed_files": 1, 150 | "additions": 5, 151 | "deletions": 3, 152 | "head": { 153 | "repo": { 154 | "full_name": "owner/repo" 155 | } 156 | }, 157 | "files": [ 158 | { 159 | "filename": "src/db/queries.py", 160 | "status": "modified", 161 | "additions": 5, 162 | "deletions": 3 163 | } 164 | ] 165 | } 166 | 167 | pr_diff = """ 168 | diff --git a/src/db/queries.py b/src/db/queries.py 169 | @@ -10,3 +10,5 @@ 170 | - query = f"SELECT * FROM users WHERE id = {user_id}" 171 | + query = "SELECT * FROM users WHERE id = ?" 172 | + cursor.execute(query, (user_id,)) 173 | """ 174 | 175 | prompt = get_security_audit_prompt(pr_data, pr_diff) 176 | 177 | # Check special characters are preserved 178 | assert "user's" in prompt 179 | assert "user-with-dash" in prompt 180 | assert "src/db/queries.py" in prompt 181 | 182 | def test_get_security_audit_prompt_no_files(self): 183 | """Test prompt generation with no files (edge case).""" 184 | pr_data = { 185 | "number": 111, 186 | "title": "Documentation update", 187 | "body": "Just updating docs", 188 | "user": "doc-author", 189 | "changed_files": 0, 190 | "additions": 0, 191 | "deletions": 0, 192 | "head": { 193 | "repo": { 194 | "full_name": "owner/repo" 195 | } 196 | }, 197 | "files": [] # No files 198 | } 199 | 200 | pr_diff = "" # Empty diff 201 | 202 | prompt = get_security_audit_prompt(pr_data, pr_diff) 203 | 204 | assert isinstance(prompt, str) 205 | assert "111" in prompt 206 | assert "Documentation update" in prompt 207 | 208 | def test_get_security_audit_prompt_structure(self): 209 | """Test that prompt has expected structure.""" 210 | pr_data = { 211 | "number": 42, 212 | "title": "Test PR", 213 | "body": "Test description", 214 | "user": "testuser", 215 | "changed_files": 1, 216 | "additions": 1, 217 | "deletions": 1, 218 | "head": { 219 | "repo": { 220 | "full_name": "owner/repo" 221 | } 222 | }, 223 | "files": [ 224 | { 225 | "filename": "test.py", 226 | "status": "modified", 227 | "additions": 1, 228 | "deletions": 1 229 | } 230 | ] 231 | } 232 | 233 | pr_diff = "diff --git a/test.py b/test.py\n+print('test')" 234 | 235 | prompt = get_security_audit_prompt(pr_data, pr_diff) 236 | 237 | # Should contain sections for metadata and diff 238 | assert "PR #" in prompt or "Pull Request" in prompt 239 | assert "Title:" in prompt or pr_data["title"] in prompt 240 | assert "Author:" in prompt or pr_data["user"]["login"] in prompt 241 | assert "Files:" in prompt or "test.py" in prompt 242 | 243 | # Should contain the actual diff 244 | assert pr_diff in prompt or "print('test')" in prompt 245 | 246 | def test_get_security_audit_prompt_long_diff(self): 247 | """Test prompt generation with very long diff.""" 248 | pr_data = { 249 | "number": 12345, 250 | "title": "Major refactoring", 251 | "body": "Refactoring the entire codebase", 252 | "user": "refactor-bot", 253 | "changed_files": 10, 254 | "additions": 1000, 255 | "deletions": 500, 256 | "head": { 257 | "repo": { 258 | "full_name": "owner/repo" 259 | } 260 | }, 261 | "files": [ 262 | { 263 | "filename": f"file{i}.py", 264 | "status": "modified", 265 | "additions": 100, 266 | "deletions": 50 267 | } 268 | for i in range(10) 269 | ] 270 | } 271 | 272 | # Create a large diff 273 | pr_diff = "\n".join([ 274 | f"diff --git a/file{i}.py b/file{i}.py\n" + 275 | "\n".join([f"+line {j}" for j in range(50)]) 276 | for i in range(10) 277 | ]) 278 | 279 | prompt = get_security_audit_prompt(pr_data, pr_diff) 280 | 281 | # Should handle large diffs without error 282 | assert isinstance(prompt, str) 283 | assert len(prompt) > 1000 # Should be substantial 284 | assert "12345" in prompt 285 | assert "Major refactoring" in prompt 286 | 287 | def test_get_security_audit_prompt_unicode(self): 288 | """Test prompt generation with unicode characters.""" 289 | pr_data = { 290 | "number": 666, 291 | "title": "Add emoji support 🎉", 292 | "body": "This PR adds emoji rendering 🔒 🛡️", 293 | "user": "émoji-user", 294 | "changed_files": 1, 295 | "additions": 42, 296 | "deletions": 0, 297 | "head": { 298 | "repo": { 299 | "full_name": "owner/repo" 300 | } 301 | }, 302 | "files": [ 303 | { 304 | "filename": "émojis.py", 305 | "status": "added", 306 | "additions": 42, 307 | "deletions": 0 308 | } 309 | ] 310 | } 311 | 312 | pr_diff = """ 313 | diff --git a/émojis.py b/émojis.py 314 | +# 🔒 Security check 315 | +def check_input(text: str) -> bool: 316 | + return "🚨" not in text 317 | """ 318 | 319 | prompt = get_security_audit_prompt(pr_data, pr_diff) 320 | 321 | # Check unicode is preserved 322 | assert "🎉" in prompt # Title emoji 323 | assert "émoji-user" in prompt 324 | assert "émojis.py" in prompt 325 | assert "🚨" in prompt # From diff -------------------------------------------------------------------------------- /claudecode/test_github_action_audit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Pytest tests for GitHub Action audit script components. 4 | """ 5 | 6 | 7 | class TestImports: 8 | """Test that all required modules can be imported.""" 9 | 10 | def test_main_module_import(self): 11 | """Test that the main module can be imported.""" 12 | from claudecode import github_action_audit 13 | assert hasattr(github_action_audit, 'GitHubActionClient') 14 | assert hasattr(github_action_audit, 'SimpleClaudeRunner') 15 | # SimpleFindingsFilter was removed 16 | assert hasattr(github_action_audit, 'main') 17 | 18 | def test_component_imports(self): 19 | """Test that all component modules can be imported.""" 20 | from claudecode.prompts import get_security_audit_prompt 21 | from claudecode.json_parser import parse_json_with_fallbacks, extract_json_from_text 22 | 23 | # Verify they're callable/usable 24 | assert callable(get_security_audit_prompt) 25 | assert callable(parse_json_with_fallbacks) 26 | assert callable(extract_json_from_text) 27 | 28 | 29 | class TestHardExclusionRules: 30 | """Test the HardExclusionRules patterns.""" 31 | 32 | def test_dos_patterns(self): 33 | """Test DOS pattern exclusions.""" 34 | from claudecode.findings_filter import HardExclusionRules 35 | 36 | dos_findings = [ 37 | {'description': 'Potential denial of service vulnerability'}, 38 | {'description': 'DOS attack through resource exhaustion'}, 39 | {'description': 'Infinite loop causing resource exhaustion'}, 40 | ] 41 | 42 | for finding in dos_findings: 43 | reason = HardExclusionRules.get_exclusion_reason(finding) 44 | assert reason is not None 45 | assert 'dos' in reason.lower() 46 | 47 | def test_rate_limiting_patterns(self): 48 | """Test rate limiting pattern exclusions.""" 49 | from claudecode.findings_filter import HardExclusionRules 50 | 51 | rate_limit_findings = [ 52 | {'description': 'Missing rate limiting on endpoint'}, 53 | {'description': 'No rate limit implemented for API'}, 54 | {'description': 'Implement rate limiting for this route'}, 55 | ] 56 | 57 | for finding in rate_limit_findings: 58 | reason = HardExclusionRules.get_exclusion_reason(finding) 59 | assert reason is not None 60 | assert 'rate limit' in reason.lower() 61 | 62 | def test_open_redirect_patterns(self): 63 | """Test open redirect pattern exclusions.""" 64 | from claudecode.findings_filter import HardExclusionRules 65 | 66 | redirect_findings = [ 67 | {'description': 'Open redirect vulnerability found'}, 68 | {'description': 'Unvalidated redirect in URL parameter'}, 69 | {'description': 'Redirect attack possible through user input'}, 70 | ] 71 | 72 | for finding in redirect_findings: 73 | reason = HardExclusionRules.get_exclusion_reason(finding) 74 | assert reason is not None 75 | assert 'open redirect' in reason.lower() 76 | 77 | def test_markdown_file_exclusion(self): 78 | """Test that findings in .md files are excluded.""" 79 | from claudecode.findings_filter import HardExclusionRules 80 | 81 | md_findings = [ 82 | {'file': 'README.md', 'description': 'SQL injection vulnerability'}, 83 | {'file': 'docs/security.md', 'description': 'Command injection found'}, 84 | {'file': 'CHANGELOG.MD', 'description': 'XSS vulnerability'}, # Test case insensitive 85 | {'file': 'path/to/file.Md', 'description': 'Path traversal'}, # Mixed case 86 | ] 87 | 88 | for finding in md_findings: 89 | reason = HardExclusionRules.get_exclusion_reason(finding) 90 | assert reason is not None 91 | assert 'markdown' in reason.lower() 92 | 93 | def test_non_markdown_files_not_excluded(self): 94 | """Test that findings in non-.md files are not excluded due to file extension.""" 95 | from claudecode.findings_filter import HardExclusionRules 96 | 97 | non_md_findings = [ 98 | {'file': 'main.py', 'description': 'SQL injection vulnerability'}, 99 | {'file': 'server.js', 'description': 'Command injection found'}, 100 | {'file': 'index.html', 'description': 'XSS vulnerability'}, 101 | {'file': 'config.yml', 'description': 'Hardcoded credentials'}, 102 | {'file': 'README.txt', 'description': 'Path traversal'}, 103 | {'file': 'file.mdx', 'description': 'Security issue'}, # Not .md 104 | ] 105 | 106 | for finding in non_md_findings: 107 | reason = HardExclusionRules.get_exclusion_reason(finding) 108 | # Should not be excluded for being a markdown file 109 | # (might be excluded for other reasons like DOS patterns) 110 | if reason: 111 | assert 'markdown' not in reason.lower() 112 | 113 | def test_keeps_real_vulnerabilities(self): 114 | """Test that real vulnerabilities are not excluded.""" 115 | from claudecode.findings_filter import HardExclusionRules 116 | 117 | real_vulns = [ 118 | {'file': 'auth.py', 'description': 'SQL injection in user authentication'}, 119 | {'file': 'exec.js', 'description': 'Command injection through user input'}, 120 | {'file': 'comments.php', 'description': 'Cross-site scripting in comment field'}, 121 | {'file': 'upload.go', 'description': 'Path traversal in file upload'}, 122 | ] 123 | 124 | for finding in real_vulns: 125 | reason = HardExclusionRules.get_exclusion_reason(finding) 126 | assert reason is None 127 | 128 | 129 | class TestJSONParser: 130 | """Test JSON parsing utilities.""" 131 | 132 | def test_parse_valid_json(self): 133 | """Test parsing valid JSON.""" 134 | from claudecode.json_parser import parse_json_with_fallbacks 135 | 136 | valid_json = '{"test": "data", "number": 123}' 137 | success, result = parse_json_with_fallbacks(valid_json, "test") 138 | 139 | assert success is True 140 | assert result == {"test": "data", "number": 123} 141 | 142 | def test_parse_invalid_json(self): 143 | """Test parsing invalid JSON.""" 144 | from claudecode.json_parser import parse_json_with_fallbacks 145 | 146 | invalid_json = '{invalid json}' 147 | success, result = parse_json_with_fallbacks(invalid_json, "test") 148 | 149 | assert success is False 150 | assert 'error' in result 151 | assert 'Invalid JSON response' in result['error'] 152 | 153 | def test_extract_json_from_text(self): 154 | """Test extracting JSON from mixed text.""" 155 | from claudecode.json_parser import extract_json_from_text 156 | 157 | mixed_text = 'Some text before {"key": "value"} some text after' 158 | result = extract_json_from_text(mixed_text) 159 | 160 | assert result == {"key": "value"} 161 | 162 | def test_extract_json_from_text_no_json(self): 163 | """Test extracting JSON when none exists.""" 164 | from claudecode.json_parser import extract_json_from_text 165 | 166 | plain_text = 'This is just plain text with no JSON' 167 | result = extract_json_from_text(plain_text) 168 | 169 | assert result is None 170 | 171 | 172 | class TestPromptsModule: 173 | """Test the prompts module.""" 174 | 175 | def test_get_security_audit_prompt(self): 176 | """Test security audit prompt generation.""" 177 | from claudecode.prompts import get_security_audit_prompt 178 | 179 | pr_data = { 180 | 'number': 123, 181 | 'title': 'Test PR', 182 | 'body': 'Test description', 183 | 'user': 'testuser', 184 | 'changed_files': 1, 185 | 'additions': 10, 186 | 'deletions': 5, 187 | 'head': { 188 | 'repo': { 189 | 'full_name': 'owner/repo' 190 | } 191 | }, 192 | 'files': [ 193 | { 194 | 'filename': 'test.py', 195 | 'status': 'modified', 196 | 'additions': 10, 197 | 'deletions': 5, 198 | 'patch': '@@ -1,5 +1,10 @@\n+added line' 199 | } 200 | ] 201 | } 202 | 203 | pr_diff = "diff --git a/test.py b/test.py\n+added line" 204 | 205 | prompt = get_security_audit_prompt(pr_data, pr_diff) 206 | 207 | assert isinstance(prompt, str) 208 | assert 'security' in prompt.lower() 209 | assert 'PR #123' in prompt 210 | assert 'test.py' in prompt 211 | 212 | 213 | class TestDeploymentPRDetection: 214 | """Test deployment PR title pattern matching.""" 215 | 216 | def test_deployment_pr_patterns(self): 217 | """Test that deployment PR titles are correctly identified.""" 218 | import re 219 | 220 | deployment_pattern = r'^Deploy\s+[a-f0-9]{6,}\s+to\s+(production|staging|development|production-services)' 221 | 222 | # These should match 223 | deployment_titles = [ 224 | "Deploy 53f395b0 to production-services", 225 | "Deploy af179b5b to production", 226 | "Deploy 1a3cb909 to production", 227 | "Deploy 49c09ea5 to production-services", 228 | "Deploy 8e7acc60 to production", 229 | "Deploy e0b1fe0b to production-services", 230 | "Deploy c53e6010 to production", 231 | "Deploy 42c4a061 to production", 232 | "Deploy 9de55976 to production-services", 233 | "deploy abcdef123456 to staging", # lowercase should work 234 | "DEPLOY ABCDEF01 TO DEVELOPMENT", # uppercase should work 235 | ] 236 | 237 | for title in deployment_titles: 238 | assert re.match(deployment_pattern, title, re.IGNORECASE), f"Failed to match deployment PR: {title}" 239 | 240 | def test_non_deployment_pr_patterns(self): 241 | """Test that non-deployment PR titles are not matched.""" 242 | import re 243 | 244 | deployment_pattern = r'^Deploy\s+[a-f0-9]{6,}\s+to\s+(production|staging|development|production-services)' 245 | 246 | # These should NOT match 247 | non_deployment_titles = [ 248 | "Add new feature", 249 | "Fix bug in deployment script", 250 | "Update deployment documentation", 251 | "Deploy new feature to production", # No commit hash 252 | "Deploy abc to production", # Too short hash 253 | "Deploy 12345g to production", # Non-hex character 254 | "Preparing deploy af179b5b to production", # Doesn't start with Deploy 255 | "Deploy af179b5b to testing", # Wrong environment 256 | "Deploy af179b5b", # Missing environment 257 | "af179b5b to production", # Missing Deploy prefix 258 | ] 259 | 260 | for title in non_deployment_titles: 261 | assert not re.match(deployment_pattern, title, re.IGNORECASE), f"Incorrectly matched non-deployment PR: {title}" 262 | 263 | -------------------------------------------------------------------------------- /claudecode/test_eval_engine.py: -------------------------------------------------------------------------------- 1 | """Tests for eval_engine module.""" 2 | 3 | import os 4 | from unittest.mock import Mock, patch 5 | import pytest 6 | import json 7 | 8 | from claudecode.evals.eval_engine import ( 9 | EvaluationEngine, EvalResult, EvalCase, run_single_evaluation 10 | ) 11 | 12 | 13 | class TestEvalResult: 14 | """Test the EvalResult dataclass.""" 15 | 16 | def test_eval_result_creation(self): 17 | """Test creating an EvalResult instance.""" 18 | result = EvalResult( 19 | repo_name="test/repo", 20 | pr_number=123, 21 | description="Test PR", 22 | success=True, 23 | runtime_seconds=10.5, 24 | findings_count=2, 25 | detected_vulnerabilities=True 26 | ) 27 | 28 | assert result.repo_name == "test/repo" 29 | assert result.pr_number == 123 30 | assert result.description == "Test PR" 31 | assert result.success is True 32 | assert result.runtime_seconds == 10.5 33 | assert result.findings_count == 2 34 | assert result.detected_vulnerabilities is True 35 | assert result.error_message == "" 36 | assert result.findings_summary is None 37 | 38 | def test_eval_result_with_error(self): 39 | """Test creating an EvalResult with error.""" 40 | result = EvalResult( 41 | repo_name="test/repo", 42 | pr_number=456, 43 | description="Failed PR", 44 | success=False, 45 | runtime_seconds=5.0, 46 | findings_count=0, 47 | detected_vulnerabilities=False, 48 | error_message="Failed to clone repository" 49 | ) 50 | 51 | assert result.success is False 52 | assert result.error_message == "Failed to clone repository" 53 | assert result.findings_count == 0 54 | 55 | def test_eval_result_with_findings(self): 56 | """Test creating an EvalResult with findings.""" 57 | findings = [ 58 | {"file": "test.py", "line": 10, "severity": "HIGH"} 59 | ] 60 | result = EvalResult( 61 | repo_name="test/repo", 62 | pr_number=789, 63 | description="PR with findings", 64 | success=True, 65 | runtime_seconds=15.0, 66 | findings_count=1, 67 | detected_vulnerabilities=True, 68 | findings_summary=findings, 69 | full_findings=findings 70 | ) 71 | 72 | assert result.findings_count == 1 73 | assert result.detected_vulnerabilities is True 74 | assert result.findings_summary is not None 75 | assert len(result.findings_summary) == 1 76 | 77 | def test_eval_result_to_dict(self): 78 | """Test converting EvalResult to dictionary.""" 79 | result = EvalResult( 80 | repo_name="test/repo", 81 | pr_number=123, 82 | description="Test", 83 | success=True, 84 | runtime_seconds=10.0, 85 | findings_count=0, 86 | detected_vulnerabilities=False 87 | ) 88 | 89 | result_dict = result.to_dict() 90 | assert result_dict['repo_name'] == "test/repo" 91 | assert result_dict['pr_number'] == 123 92 | assert result_dict['success'] is True 93 | 94 | 95 | class TestEvalCase: 96 | """Test the EvalCase dataclass.""" 97 | 98 | def test_eval_case_creation(self): 99 | """Test creating an EvalCase instance.""" 100 | case = EvalCase( 101 | repo_name="test/repo", 102 | pr_number=123, 103 | description="Test case" 104 | ) 105 | 106 | assert case.repo_name == "test/repo" 107 | assert case.pr_number == 123 108 | assert case.description == "Test case" 109 | 110 | 111 | class TestEvaluationEngine: 112 | """Test the EvaluationEngine class.""" 113 | 114 | def test_engine_initialization(self): 115 | """Test engine initialization with API key.""" 116 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 117 | engine = EvaluationEngine() 118 | 119 | assert engine.work_dir == os.path.expanduser("~/code/audit") 120 | assert engine.claude_api_key == 'test-key' 121 | 122 | def test_engine_initialization_no_api_key(self): 123 | """Test engine initialization without API key.""" 124 | with patch.dict(os.environ, {}, clear=True): 125 | with pytest.raises(ValueError, match="ANTHROPIC_API_KEY"): 126 | EvaluationEngine() 127 | 128 | def test_get_eval_branch_name(self): 129 | """Test branch name generation.""" 130 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 131 | engine = EvaluationEngine() 132 | 133 | case = EvalCase("owner/repo", 123) 134 | branch_name = engine._get_eval_branch_name(case) 135 | 136 | assert branch_name.startswith("eval-pr-owner-repo-123-") 137 | assert len(branch_name) > len("eval-pr-owner-repo-123-") 138 | 139 | @patch('os.path.exists') 140 | @patch('subprocess.run') 141 | def test_clean_worktrees(self, mock_run, mock_exists): 142 | """Test worktree cleanup.""" 143 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 144 | # Mock git auth token call in __init__ 145 | mock_run.side_effect = [ 146 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token) 147 | Mock(returncode=0), # prune 148 | Mock(returncode=0, stdout=""), # list (empty) 149 | Mock(returncode=0, stdout=""), # branch --list (empty) 150 | ] 151 | 152 | engine = EvaluationEngine() 153 | 154 | mock_exists.return_value = True # repo_path exists 155 | 156 | engine._clean_worktrees("/repo/path", "eval-pr-test-123") 157 | 158 | # Should call run four times: gh auth token (in __init__), prune, list, branch --list 159 | assert mock_run.call_count == 4 160 | 161 | @patch('subprocess.run') 162 | @patch('os.path.exists') 163 | def test_setup_repository_clone(self, mock_exists, mock_run): 164 | """Test repository setup with cloning.""" 165 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 166 | mock_exists.return_value = False # Repository doesn't exist 167 | mock_run.side_effect = [ 168 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token) 169 | Mock(returncode=0), # git clone 170 | Mock(returncode=0), # git fetch 171 | Mock(returncode=0), # git worktree add 172 | ] 173 | 174 | engine = EvaluationEngine() 175 | 176 | case = EvalCase("owner/repo", 123) 177 | success, worktree_path, error = engine._setup_repository(case) 178 | 179 | assert success is True 180 | assert worktree_path != "" 181 | assert error == "" 182 | 183 | @patch('subprocess.run') 184 | @patch('os.path.exists') 185 | def test_setup_repository_existing(self, mock_exists, mock_run): 186 | """Test repository setup with existing repository.""" 187 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 188 | # First call checks base_repo_path, second checks repo_path inside _clean_worktrees 189 | mock_exists.side_effect = [True, True] 190 | mock_run.side_effect = [ 191 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token) 192 | Mock(returncode=0), # worktree prune 193 | Mock(returncode=0, stdout=""), # worktree list 194 | Mock(returncode=0, stdout=""), # git branch --list 195 | Mock(returncode=0), # git fetch 196 | Mock(returncode=0), # git worktree add 197 | ] 198 | 199 | engine = EvaluationEngine() 200 | 201 | case = EvalCase("owner/repo", 123) 202 | success, worktree_path, error = engine._setup_repository(case) 203 | 204 | assert success is True 205 | assert error == "" 206 | 207 | @patch('subprocess.run') 208 | def test_run_sast_audit_success(self, mock_run): 209 | """Test successful SAST audit run.""" 210 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 211 | # Mock gh auth token call first, then the audit 212 | mock_run.side_effect = [ 213 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token) 214 | Mock(returncode=0, stdout=json.dumps({ 215 | "findings": [ 216 | {"file": "test.py", "line": 10, "severity": "HIGH"} 217 | ] 218 | }), stderr="") # SAST audit 219 | ] 220 | 221 | engine = EvaluationEngine() 222 | 223 | case = EvalCase("owner/repo", 123) 224 | success, output, parsed, error = engine._run_sast_audit(case, "/repo/path") 225 | 226 | assert success is True 227 | assert parsed is not None 228 | assert len(parsed["findings"]) == 1 229 | assert error is None 230 | 231 | @patch('subprocess.run') 232 | def test_run_sast_audit_failure(self, mock_run): 233 | """Test failed SAST audit run.""" 234 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 235 | mock_run.side_effect = [ 236 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token) 237 | Mock(returncode=1, stdout="", stderr="Error running audit") # SAST audit fails 238 | ] 239 | 240 | engine = EvaluationEngine() 241 | 242 | case = EvalCase("owner/repo", 123) 243 | success, output, parsed, error = engine._run_sast_audit(case, "/repo/path") 244 | 245 | assert success is False 246 | assert error is not None 247 | assert "Exit code 1" in error 248 | 249 | @patch.object(EvaluationEngine, '_setup_repository') 250 | @patch.object(EvaluationEngine, '_run_sast_audit') 251 | @patch.object(EvaluationEngine, '_cleanup_worktree') 252 | def test_run_evaluation_success(self, mock_cleanup, mock_audit, mock_setup): 253 | """Test successful evaluation run.""" 254 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 255 | engine = EvaluationEngine() 256 | 257 | mock_setup.return_value = (True, "/worktree/path", "") 258 | mock_audit.return_value = ( 259 | True, 260 | '{"findings": [{"file": "test.py", "line": 10}]}', 261 | {"findings": [{"file": "test.py", "line": 10}]}, 262 | None 263 | ) 264 | 265 | case = EvalCase("owner/repo", 123, "Test PR") 266 | result = engine.run_evaluation(case) 267 | 268 | assert result.success is True 269 | assert result.findings_count == 1 270 | assert result.detected_vulnerabilities is True 271 | assert result.findings_summary is not None 272 | assert len(result.findings_summary) == 1 273 | 274 | mock_cleanup.assert_called_once() 275 | 276 | @patch.object(EvaluationEngine, '_setup_repository') 277 | def test_run_evaluation_setup_failure(self, mock_setup): 278 | """Test evaluation with repository setup failure.""" 279 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 280 | engine = EvaluationEngine() 281 | 282 | mock_setup.return_value = (False, "", "Clone failed") 283 | 284 | case = EvalCase("owner/repo", 123, "Test PR") 285 | result = engine.run_evaluation(case) 286 | 287 | assert result.success is False 288 | assert result.findings_count == 0 289 | assert "Repository setup failed" in result.error_message 290 | 291 | 292 | class TestHelperFunctions: 293 | """Test helper functions.""" 294 | 295 | @patch.object(EvaluationEngine, 'run_evaluation') 296 | def test_run_single_evaluation(self, mock_run): 297 | """Test run_single_evaluation helper.""" 298 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 299 | mock_result = Mock(spec=EvalResult) 300 | mock_run.return_value = mock_result 301 | 302 | case = EvalCase("owner/repo", 123) 303 | result = run_single_evaluation(case, verbose=True) 304 | 305 | assert result == mock_result 306 | mock_run.assert_called_once_with(case) -------------------------------------------------------------------------------- /claudecode/test_github_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Unit tests for GitHubActionClient. 4 | """ 5 | 6 | import pytest 7 | import os 8 | from unittest.mock import Mock, patch 9 | 10 | from claudecode.github_action_audit import GitHubActionClient 11 | 12 | 13 | class TestGitHubActionClient: 14 | """Test GitHubActionClient functionality.""" 15 | 16 | def test_init_requires_token(self): 17 | """Test that client initialization requires GITHUB_TOKEN.""" 18 | # Remove token if it exists 19 | original_token = os.environ.pop('GITHUB_TOKEN', None) 20 | 21 | try: 22 | with pytest.raises(ValueError, match="GITHUB_TOKEN environment variable required"): 23 | GitHubActionClient() 24 | finally: 25 | # Restore token 26 | if original_token: 27 | os.environ['GITHUB_TOKEN'] = original_token 28 | 29 | def test_init_with_token(self): 30 | """Test successful initialization with token.""" 31 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 32 | client = GitHubActionClient() 33 | assert client.github_token == 'test-token' 34 | assert client.headers['Authorization'] == 'Bearer test-token' 35 | assert 'Accept' in client.headers 36 | assert 'X-GitHub-Api-Version' in client.headers 37 | 38 | @patch('requests.get') 39 | def test_get_pr_data_success(self, mock_get): 40 | """Test successful PR data retrieval.""" 41 | # Mock responses 42 | pr_response = Mock() 43 | pr_response.json.return_value = { 44 | 'number': 123, 45 | 'title': 'Test PR', 46 | 'body': 'PR description', 47 | 'user': {'login': 'testuser'}, 48 | 'created_at': '2024-01-01T00:00:00Z', 49 | 'updated_at': '2024-01-01T01:00:00Z', 50 | 'state': 'open', 51 | 'head': { 52 | 'ref': 'feature-branch', 53 | 'sha': 'abc123', 54 | 'repo': { 55 | 'full_name': 'owner/repo' 56 | } 57 | }, 58 | 'base': { 59 | 'ref': 'main', 60 | 'sha': 'def456' 61 | }, 62 | 'additions': 50, 63 | 'deletions': 10, 64 | 'changed_files': 3 65 | } 66 | 67 | files_response = Mock() 68 | files_response.json.return_value = [ 69 | { 70 | 'filename': 'src/main.py', 71 | 'status': 'modified', 72 | 'additions': 30, 73 | 'deletions': 5, 74 | 'changes': 35, 75 | 'patch': '@@ -1,5 +1,10 @@\n+import os\n def main():' 76 | }, 77 | { 78 | 'filename': 'tests/test_main.py', 79 | 'status': 'added', 80 | 'additions': 20, 81 | 'deletions': 5, 82 | 'changes': 25, 83 | 'patch': '@@ -0,0 +1,20 @@\n+def test_main():' 84 | } 85 | ] 86 | 87 | mock_get.side_effect = [pr_response, files_response] 88 | 89 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 90 | client = GitHubActionClient() 91 | result = client.get_pr_data('owner/repo', 123) 92 | 93 | # Verify API calls 94 | assert mock_get.call_count == 2 95 | mock_get.assert_any_call( 96 | 'https://api.github.com/repos/owner/repo/pulls/123', 97 | headers=client.headers 98 | ) 99 | mock_get.assert_any_call( 100 | 'https://api.github.com/repos/owner/repo/pulls/123/files?per_page=100', 101 | headers=client.headers 102 | ) 103 | 104 | # Verify result structure 105 | assert result['number'] == 123 106 | assert result['title'] == 'Test PR' 107 | assert result['user'] == 'testuser' 108 | assert len(result['files']) == 2 109 | assert result['files'][0]['filename'] == 'src/main.py' 110 | assert result['files'][1]['status'] == 'added' 111 | 112 | @patch('requests.get') 113 | def test_get_pr_data_null_head_repo(self, mock_get): 114 | """Test PR data retrieval when head repo is null (deleted fork).""" 115 | pr_response = Mock() 116 | pr_response.json.return_value = { 117 | 'number': 123, 118 | 'title': 'Test PR', 119 | # Don't include body key to test the get() default 120 | 'user': {'login': 'testuser'}, 121 | 'created_at': '2024-01-01T00:00:00Z', 122 | 'updated_at': '2024-01-01T01:00:00Z', 123 | 'state': 'open', 124 | 'head': { 125 | 'ref': 'feature-branch', 126 | 'sha': 'abc123', 127 | 'repo': None # Deleted fork 128 | }, 129 | 'base': { 130 | 'ref': 'main', 131 | 'sha': 'def456' 132 | }, 133 | 'additions': 50, 134 | 'deletions': 10, 135 | 'changed_files': 3 136 | } 137 | 138 | files_response = Mock() 139 | files_response.json.return_value = [] 140 | 141 | mock_get.side_effect = [pr_response, files_response] 142 | 143 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 144 | client = GitHubActionClient() 145 | result = client.get_pr_data('owner/repo', 123) 146 | 147 | # Should use original repo name when head repo is None 148 | assert result['head']['repo']['full_name'] == 'owner/repo' 149 | # The implementation passes None through, test should match that 150 | assert result['body'] == '' 151 | 152 | @patch('requests.get') 153 | def test_get_pr_data_api_error(self, mock_get): 154 | """Test PR data retrieval with API error.""" 155 | mock_response = Mock() 156 | mock_response.raise_for_status.side_effect = Exception("API Error") 157 | mock_get.return_value = mock_response 158 | 159 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 160 | client = GitHubActionClient() 161 | with pytest.raises(Exception, match="API Error"): 162 | client.get_pr_data('owner/repo', 123) 163 | 164 | @patch('requests.get') 165 | def test_get_pr_diff_success(self, mock_get): 166 | """Test successful PR diff retrieval.""" 167 | diff_content = """diff --git a/src/main.py b/src/main.py 168 | index abc123..def456 100644 169 | --- a/src/main.py 170 | +++ b/src/main.py 171 | @@ -1,5 +1,10 @@ 172 | +import os 173 | def main(): 174 | print("Hello") 175 | + # New feature 176 | + process_data() 177 | """ 178 | 179 | mock_response = Mock() 180 | mock_response.text = diff_content 181 | mock_response.raise_for_status.return_value = None 182 | mock_get.return_value = mock_response 183 | 184 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 185 | client = GitHubActionClient() 186 | result = client.get_pr_diff('owner/repo', 123) 187 | 188 | # Verify API call 189 | mock_get.assert_called_once() 190 | call_args = mock_get.call_args 191 | assert call_args[0][0] == 'https://api.github.com/repos/owner/repo/pulls/123' 192 | assert call_args[1]['headers']['Accept'] == 'application/vnd.github.diff' 193 | 194 | # Verify result 195 | assert 'import os' in result 196 | assert 'process_data()' in result 197 | 198 | @patch('requests.get') 199 | def test_get_pr_diff_filters_generated_files(self, mock_get): 200 | """Test that generated files are filtered from diff.""" 201 | diff_with_generated = """diff --git a/src/main.py b/src/main.py 202 | index abc123..def456 100644 203 | --- a/src/main.py 204 | +++ b/src/main.py 205 | @@ -1,5 +1,10 @@ 206 | +import os 207 | def main(): 208 | print("Hello") 209 | diff --git a/generated/code.py b/generated/code.py 210 | index 111..222 100644 211 | --- a/generated/code.py 212 | +++ b/generated/code.py 213 | @@ -1,3 +1,5 @@ 214 | # @generated by protoc 215 | +# More generated code 216 | +print("generated") 217 | diff --git a/src/feature.py b/src/feature.py 218 | index 333..444 100644 219 | --- a/src/feature.py 220 | +++ b/src/feature.py 221 | @@ -1,3 +1,5 @@ 222 | +# Real code 223 | def feature(): 224 | pass 225 | """ 226 | 227 | mock_response = Mock() 228 | mock_response.text = diff_with_generated 229 | mock_response.raise_for_status.return_value = None 230 | mock_get.return_value = mock_response 231 | 232 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 233 | client = GitHubActionClient() 234 | result = client.get_pr_diff('owner/repo', 123) 235 | 236 | # Verify generated file is filtered out 237 | assert 'src/main.py' in result 238 | assert 'src/feature.py' in result 239 | assert 'generated/code.py' not in result 240 | assert '@generated' not in result 241 | assert 'More generated code' not in result 242 | 243 | def test_filter_generated_files_edge_cases(self): 244 | """Test edge cases in generated file filtering.""" 245 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 246 | client = GitHubActionClient() 247 | 248 | # Empty diff 249 | assert client._filter_generated_files('') == '' 250 | 251 | # No diff markers - if no diff format, everything is filtered 252 | text = "Just some random text\nwith @generated in it" 253 | # Since there's no 'diff --git' marker, the split results in one section 254 | # that contains @generated, so it gets filtered out 255 | assert client._filter_generated_files(text) == '' 256 | 257 | # Multiple generated markers 258 | diff = """diff --git a/a.py b/a.py 259 | @generated by tool 260 | content 261 | diff --git a/b.py b/b.py 262 | normal content 263 | diff --git a/c.py b/c.py 264 | # This file is @generated 265 | more content 266 | """ 267 | result = client._filter_generated_files(diff) 268 | assert 'a.py' not in result 269 | assert 'b.py' in result 270 | assert 'c.py' not in result 271 | 272 | 273 | class TestGitHubAPIIntegration: 274 | """Test GitHub API integration scenarios.""" 275 | 276 | @patch('requests.get') 277 | def test_rate_limit_handling(self, mock_get): 278 | """Test that rate limit headers are respected.""" 279 | mock_response = Mock() 280 | mock_response.headers = { 281 | 'X-RateLimit-Remaining': '0', 282 | 'X-RateLimit-Reset': '1234567890' 283 | } 284 | mock_response.status_code = 403 285 | mock_response.json.return_value = { 286 | 'message': 'API rate limit exceeded' 287 | } 288 | mock_response.raise_for_status.side_effect = Exception("Rate limit exceeded") 289 | mock_get.return_value = mock_response 290 | 291 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 292 | client = GitHubActionClient() 293 | with pytest.raises(Exception, match="Rate limit exceeded"): 294 | client.get_pr_data('owner/repo', 123) 295 | 296 | @patch('requests.get') 297 | def test_pagination_not_needed_for_pr_files(self, mock_get): 298 | """Test that PR files endpoint returns all files without pagination.""" 299 | # GitHub API returns up to 3000 files per PR without pagination 300 | large_file_list = [ 301 | { 302 | 'filename': f'file{i}.py', 303 | 'status': 'added', 304 | 'additions': 10, 305 | 'deletions': 0, 306 | 'changes': 10, 307 | 'patch': f'@@ -0,0 +1,10 @@\n+# File {i}' 308 | } 309 | for i in range(100) # 100 files 310 | ] 311 | 312 | pr_response = Mock() 313 | pr_response.json.return_value = { 314 | 'number': 123, 315 | 'title': 'Large PR', 316 | 'body': 'Many files', 317 | 'user': {'login': 'testuser'}, 318 | 'created_at': '2024-01-01T00:00:00Z', 319 | 'updated_at': '2024-01-01T01:00:00Z', 320 | 'state': 'open', 321 | 'head': {'ref': 'feature', 'sha': 'abc123', 'repo': {'full_name': 'owner/repo'}}, 322 | 'base': {'ref': 'main', 'sha': 'def456'}, 323 | 'additions': 1000, 324 | 'deletions': 0, 325 | 'changed_files': 100 326 | } 327 | 328 | files_response = Mock() 329 | files_response.json.return_value = large_file_list 330 | 331 | mock_get.side_effect = [pr_response, files_response] 332 | 333 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}): 334 | client = GitHubActionClient() 335 | result = client.get_pr_data('owner/repo', 123) 336 | 337 | assert len(result['files']) == 100 338 | assert result['files'][0]['filename'] == 'file0.py' 339 | assert result['files'][99]['filename'] == 'file99.py' 340 | -------------------------------------------------------------------------------- /action.yml: -------------------------------------------------------------------------------- 1 | name: 'Claude Code Security Reviewer' 2 | description: 'AI-powered security review GitHub Action using Claude to analyze code changes for security vulnerabilities' 3 | author: 'Anthropic' 4 | 5 | inputs: 6 | comment-pr: 7 | description: 'Whether to comment on PRs with findings' 8 | required: false 9 | default: 'true' 10 | 11 | upload-results: 12 | description: 'Whether to upload results as artifacts' 13 | required: false 14 | default: 'true' 15 | 16 | exclude-directories: 17 | description: 'Comma-separated list of directories to exclude from scanning' 18 | required: false 19 | default: '' 20 | 21 | claudecode-timeout: 22 | description: 'Timeout for ClaudeCode analysis in minutes' 23 | required: false 24 | default: '20' 25 | 26 | claude-api-key: 27 | description: 'Anthropic Claude API key for security analysis' 28 | required: true 29 | default: '' 30 | 31 | claude-model: 32 | description: 'Claude model to use for security analysis (e.g., claude-sonnet-4-20250514)' 33 | required: false 34 | default: '' 35 | 36 | run-every-commit: 37 | description: 'Run ClaudeCode on every commit (skips cache check). Warning: This may lead to more false positives on PRs with many commits as the AI analyzes the same code multiple times.' 38 | required: false 39 | default: 'false' 40 | 41 | false-positive-filtering-instructions: 42 | description: 'Path to custom false positive filtering instructions text file' 43 | required: false 44 | default: '' 45 | 46 | custom-security-scan-instructions: 47 | description: 'Path to custom security scan instructions text file to append to audit prompt' 48 | required: false 49 | default: '' 50 | 51 | outputs: 52 | findings-count: 53 | description: 'Number of security findings' 54 | value: ${{ steps.claudecode-scan.outputs.findings_count }} 55 | 56 | results-file: 57 | description: 'Path to the results JSON file' 58 | value: ${{ steps.claudecode-scan.outputs.results_file }} 59 | 60 | runs: 61 | using: 'composite' 62 | steps: 63 | - name: Install GitHub CLI 64 | shell: bash 65 | run: | 66 | echo "::group::Install gh CLI" 67 | # Install GitHub CLI for PR operations 68 | sudo apt-get update && sudo apt-get install -y gh 69 | echo "::endgroup::" 70 | 71 | - name: Set up Python 72 | uses: actions/setup-python@v5 73 | with: 74 | python-version: '3.x' 75 | 76 | - name: Check ClaudeCode run history 77 | id: claudecode-history 78 | if: github.event_name == 'pull_request' 79 | uses: actions/cache@v4 80 | with: 81 | path: .claudecode-marker 82 | key: claudecode-${{ github.repository_id }}-pr-${{ github.event.pull_request.number }}-${{ github.sha }} 83 | restore-keys: | 84 | claudecode-${{ github.repository_id }}-pr-${{ github.event.pull_request.number }}- 85 | 86 | - name: Determine ClaudeCode enablement 87 | id: claudecode-check 88 | shell: bash 89 | env: 90 | PR_NUMBER: ${{ github.event.pull_request.number }} 91 | RUN_EVERY_COMMIT: ${{ inputs.run-every-commit }} 92 | run: | 93 | # Check if ClaudeCode should be enabled 94 | ENABLE_CLAUDECODE="true" 95 | SILENCE_CLAUDECODE_COMMENTS="false" 96 | 97 | # For PRs, check sampling and cache 98 | if [ "${{ github.event_name }}" == "pull_request" ]; then 99 | PR_NUMBER="$PR_NUMBER" 100 | CACHE_HIT="${{ steps.claudecode-history.outputs.cache-hit }}" 101 | 102 | # Now check cache - if ClaudeCode has already run, disable unless run-every-commit is true 103 | # Check if marker file exists (cache may have been restored from a different SHA) 104 | if [ "$RUN_EVERY_COMMIT" != "true" ] && [ -f ".claudecode-marker/marker.json" ]; then 105 | echo "ClaudeCode has already run on PR #$PR_NUMBER (found marker file), forcing disable to avoid false positives" 106 | ENABLE_CLAUDECODE="false" 107 | elif [ "$RUN_EVERY_COMMIT" == "true" ] && [ -f ".claudecode-marker/marker.json" ]; then 108 | echo "ClaudeCode has already run on PR #$PR_NUMBER but run-every-commit is enabled, running again" 109 | elif [ "$ENABLE_CLAUDECODE" == "true" ]; then 110 | echo "ClaudeCode will run for PR #$PR_NUMBER (first run)" 111 | fi 112 | fi 113 | 114 | echo "enable_claudecode=$ENABLE_CLAUDECODE" >> $GITHUB_OUTPUT 115 | echo "silence_claudecode_comments=$SILENCE_CLAUDECODE_COMMENTS" >> $GITHUB_OUTPUT 116 | 117 | if [ "$ENABLE_CLAUDECODE" == "true" ]; then 118 | echo "ClaudeCode is enabled for this run" 119 | else 120 | echo "ClaudeCode is disabled for this run" 121 | fi 122 | 123 | - name: Reserve ClaudeCode slot to prevent race conditions 124 | if: steps.claudecode-check.outputs.enable_claudecode == 'true' && github.event_name == 'pull_request' 125 | shell: bash 126 | env: 127 | REPOSITORY_ID: ${{ github.repository_id }} 128 | REPOSITORY: ${{ github.repository }} 129 | PR_NUMBER: ${{ github.event.pull_request.number }} 130 | SHA: ${{ github.sha }} 131 | RUN_ID: ${{ github.run_id }} 132 | RUN_NUMBER: ${{ github.run_number }} 133 | run: | 134 | # Create a reservation marker immediately to prevent other concurrent runs 135 | mkdir -p .claudecode-marker 136 | cat > .claudecode-marker/marker.json << EOF 137 | { 138 | "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", 139 | "repository_id": "$REPOSITORY_ID", 140 | "repository": "$REPOSITORY", 141 | "pr_number": $PR_NUMBER, 142 | "sha": "$SHA", 143 | "status": "reserved", 144 | "run_id": "$RUN_ID", 145 | "run_number": "$RUN_NUMBER" 146 | } 147 | EOF 148 | echo "Created ClaudeCode reservation marker for PR #$PR_NUMBER" 149 | 150 | - name: Save ClaudeCode reservation to cache 151 | if: steps.claudecode-check.outputs.enable_claudecode == 'true' && github.event_name == 'pull_request' 152 | uses: actions/cache/save@v4 153 | with: 154 | path: .claudecode-marker 155 | key: claudecode-${{ github.repository_id }}-pr-${{ github.event.pull_request.number }}-${{ github.sha }} 156 | 157 | - name: Set up Node.js 158 | if: steps.claudecode-check.outputs.enable_claudecode == 'true' 159 | uses: actions/setup-node@v4 160 | with: 161 | node-version: '18' 162 | 163 | - name: Install dependencies 164 | shell: bash 165 | env: 166 | ACTION_PATH: ${{ github.action_path }} 167 | run: | 168 | echo "::group::Install Deps" 169 | if [ "${{ steps.claudecode-check.outputs.enable_claudecode }}" == "true" ]; then 170 | pip install -r "$ACTION_PATH/claudecode/requirements.txt" 171 | npm install -g @anthropic-ai/claude-code 172 | fi 173 | sudo apt-get update && sudo apt-get install -y jq 174 | echo "::endgroup::" 175 | 176 | - name: Run ClaudeCode scan 177 | id: claudecode-scan 178 | if: steps.claudecode-check.outputs.enable_claudecode == 'true' 179 | shell: bash 180 | env: 181 | GITHUB_TOKEN: ${{ github.token }} 182 | GITHUB_REPOSITORY: ${{ github.repository }} 183 | PR_NUMBER: ${{ github.event.pull_request.number }} 184 | ANTHROPIC_API_KEY: ${{ inputs.claude-api-key }} 185 | ENABLE_CLAUDE_FILTERING: 'true' 186 | EXCLUDE_DIRECTORIES: ${{ inputs.exclude-directories }} 187 | FALSE_POSITIVE_FILTERING_INSTRUCTIONS: ${{ inputs.false-positive-filtering-instructions }} 188 | CUSTOM_SECURITY_SCAN_INSTRUCTIONS: ${{ inputs.custom-security-scan-instructions }} 189 | CLAUDE_MODEL: ${{ inputs.claude-model }} 190 | CLAUDECODE_TIMEOUT: ${{ inputs.claudecode-timeout }} 191 | ACTION_PATH: ${{ github.action_path }} 192 | run: | 193 | echo "Running ClaudeCode AI security analysis..." 194 | echo "----------------------------------------" 195 | 196 | # Initialize outputs 197 | echo "findings_count=0" >> $GITHUB_OUTPUT 198 | echo "results_file=claudecode/claudecode-results.json" >> $GITHUB_OUTPUT 199 | 200 | # Skip ClaudeCode if not a PR 201 | if [ "${{ github.event_name }}" != "pull_request" ]; then 202 | echo "ClaudeCode only runs on pull requests, skipping" 203 | exit 0 204 | fi 205 | 206 | # Validate API key is provided 207 | if [ -z "$ANTHROPIC_API_KEY" ]; then 208 | echo "::error::ANTHROPIC_API_KEY is not set. Please provide the claude-api-key input to the action." 209 | echo "Example usage:" 210 | echo " - uses: anthropics/claude-code-security-reviewer@main" 211 | echo " with:" 212 | echo " claude-api-key: \$\{{ secrets.ANTHROPIC_API_KEY }}" 213 | exit 1 214 | fi 215 | 216 | # Set timeout 217 | export CLAUDE_TIMEOUT="$CLAUDECODE_TIMEOUT" 218 | 219 | # Run ClaudeCode audit with verbose debugging 220 | export REPO_PATH=$(pwd) 221 | cd "$ACTION_PATH" 222 | 223 | # Enable verbose debugging 224 | echo "::group::ClaudeCode Environment" 225 | echo "Current directory: $(pwd)" 226 | echo "Python version: $(python --version)" 227 | echo "Claude CLI version: $(claude --version 2>&1 || echo 'Claude CLI not found')" 228 | echo "ANTHROPIC_API_KEY set: $(if [ -n "$ANTHROPIC_API_KEY" ]; then echo 'Yes'; else echo 'No'; fi)" 229 | echo "GITHUB_REPOSITORY: $GITHUB_REPOSITORY" 230 | echo "PR_NUMBER: $PR_NUMBER" 231 | echo "Python path: $PYTHONPATH" 232 | echo "Files in claudecode directory:" 233 | ls -la claudecode/ 234 | echo "::endgroup::" 235 | 236 | echo "::group::ClaudeCode Execution" 237 | # Add current directory to Python path so it can find the claudecode module 238 | export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}$(pwd)" 239 | echo "Updated PYTHONPATH: $PYTHONPATH" 240 | 241 | # Run from the action root directory so Python can find the claudecode module 242 | python -u claudecode/github_action_audit.py > claudecode/claudecode-results.json 2>claudecode/claudecode-error.log || CLAUDECODE_EXIT_CODE=$? 243 | 244 | if [ -n "$CLAUDECODE_EXIT_CODE" ]; then 245 | echo "::warning::ClaudeCode exited with code $CLAUDECODE_EXIT_CODE" 246 | else 247 | echo "ClaudeCode scan completed successfully" 248 | fi 249 | 250 | # Parse ClaudeCode results and count findings regardless of exit code 251 | if [ -f claudecode/claudecode-results.json ]; then 252 | FILE_SIZE=$(wc -c < claudecode/claudecode-results.json) 253 | echo "ClaudeCode results file size: $FILE_SIZE bytes" 254 | 255 | # Check if file is empty or too small 256 | if [ "$FILE_SIZE" -lt 2 ]; then 257 | echo "::warning::ClaudeCode results file is empty or invalid (size: $FILE_SIZE bytes)" 258 | echo "::warning::ClaudeCode may have failed silently. Check claudecode-error.log" 259 | if [ -f claudecode/claudecode-error.log ]; then 260 | echo "Error log contents:" 261 | cat claudecode/claudecode-error.log 262 | fi 263 | echo "findings_count=0" >> $GITHUB_OUTPUT 264 | else 265 | echo "ClaudeCode results preview:" 266 | head -n 300 claudecode/claudecode-results.json || echo "Unable to preview results" 267 | 268 | # Check if the result is an error 269 | if jq -e '.error' claudecode/claudecode-results.json > /dev/null 2>&1; then 270 | ERROR_MSG=$(jq -r '.error' claudecode/claudecode-results.json) 271 | echo "::warning::ClaudeCode error: $ERROR_MSG" 272 | echo "findings_count=0" >> $GITHUB_OUTPUT 273 | else 274 | # Use -r to get raw output and handle potential null/missing findings array 275 | CLAUDECODE_FINDINGS_COUNT=$(jq -r '.findings | if . == null then 0 else length end' claudecode/claudecode-results.json 2>/dev/null || echo "0") 276 | echo "::debug::Extracted ClaudeCode findings count: $CLAUDECODE_FINDINGS_COUNT" 277 | echo "findings_count=$CLAUDECODE_FINDINGS_COUNT" >> $GITHUB_OUTPUT 278 | echo "ClaudeCode found $CLAUDECODE_FINDINGS_COUNT security issues" 279 | 280 | # Also create findings.json for PR comment script 281 | jq '.findings // []' claudecode/claudecode-results.json > findings.json || echo '[]' > findings.json 282 | fi 283 | fi 284 | else 285 | echo "::warning::ClaudeCode results file not found" 286 | if [ -f claudecode/claudecode-error.log ]; then 287 | echo "Error log contents:" 288 | cat claudecode/claudecode-error.log 289 | fi 290 | echo "findings_count=0" >> $GITHUB_OUTPUT 291 | fi 292 | 293 | # Always copy files to workspace root regardless of the outcome 294 | # This ensures artifact upload and PR commenting can find them 295 | if [ -f findings.json ]; then 296 | cp findings.json ${{ github.workspace }}/findings.json || true 297 | fi 298 | if [ -f claudecode/claudecode-results.json ]; then 299 | cp claudecode/claudecode-results.json ${{ github.workspace }}/claudecode-results.json || true 300 | fi 301 | if [ -f claudecode/claudecode-error.log ]; then 302 | cp claudecode/claudecode-error.log ${{ github.workspace }}/claudecode-error.log || true 303 | fi 304 | 305 | echo "::endgroup::" 306 | 307 | 308 | - name: Upload scan results 309 | if: always() && inputs.upload-results == 'true' 310 | uses: actions/upload-artifact@v4 311 | with: 312 | name: security-review-results 313 | path: | 314 | findings.json 315 | claudecode-results.json 316 | claudecode-error.log 317 | retention-days: 7 318 | if-no-files-found: ignore 319 | 320 | - name: Comment PR with findings 321 | if: github.event_name == 'pull_request' && inputs.comment-pr == 'true' && steps.claudecode-check.outputs.enable_claudecode == 'true' 322 | shell: bash 323 | env: 324 | GITHUB_TOKEN: ${{ github.token }} 325 | CLAUDECODE_FINDINGS: ${{ steps.claudecode-scan.outputs.findings_count }} 326 | SILENCE_CLAUDECODE_COMMENTS: ${{ steps.claudecode-check.outputs.silence_claudecode_comments }} 327 | ACTION_PATH: ${{ github.action_path }} 328 | run: | 329 | node "$ACTION_PATH/scripts/comment-pr-findings.js" 330 | 331 | branding: 332 | icon: 'shield' 333 | color: 'red' 334 | -------------------------------------------------------------------------------- /claudecode/findings_filter.py: -------------------------------------------------------------------------------- 1 | """Findings filter for reducing false positives in security audit results.""" 2 | 3 | import re 4 | from typing import Dict, Any, List, Tuple, Optional, Pattern 5 | import time 6 | from dataclasses import dataclass, field 7 | 8 | from claudecode.claude_api_client import ClaudeAPIClient 9 | from claudecode.constants import DEFAULT_CLAUDE_MODEL 10 | from claudecode.logger import get_logger 11 | 12 | logger = get_logger(__name__) 13 | 14 | 15 | @dataclass 16 | class FilterStats: 17 | """Statistics about the filtering process.""" 18 | total_findings: int = 0 19 | hard_excluded: int = 0 20 | claude_excluded: int = 0 21 | kept_findings: int = 0 22 | exclusion_breakdown: Dict[str, int] = field(default_factory=dict) 23 | confidence_scores: List[float] = field(default_factory=list) 24 | runtime_seconds: float = 0.0 25 | 26 | 27 | class HardExclusionRules: 28 | """Hard exclusion rules for common false positives.""" 29 | 30 | # Pre-compiled regex patterns for better performance 31 | _DOS_PATTERNS: List[Pattern] = [ 32 | re.compile(r'\b(denial of service|dos attack|resource exhaustion)\b', re.IGNORECASE), 33 | re.compile(r'\b(exhaust|overwhelm|overload).*?(resource|memory|cpu)\b', re.IGNORECASE), 34 | re.compile(r'\b(infinite|unbounded).*?(loop|recursion)\b', re.IGNORECASE), 35 | ] 36 | 37 | 38 | _RATE_LIMITING_PATTERNS: List[Pattern] = [ 39 | re.compile(r'\b(missing|lack of|no)\s+rate\s+limit', re.IGNORECASE), 40 | re.compile(r'\brate\s+limiting\s+(missing|required|not implemented)', re.IGNORECASE), 41 | re.compile(r'\b(implement|add)\s+rate\s+limit', re.IGNORECASE), 42 | re.compile(r'\bunlimited\s+(requests|calls|api)', re.IGNORECASE), 43 | ] 44 | 45 | _RESOURCE_PATTERNS: List[Pattern] = [ 46 | re.compile(r'\b(resource|memory|file)\s+leak\s+potential', re.IGNORECASE), 47 | re.compile(r'\bunclosed\s+(resource|file|connection)', re.IGNORECASE), 48 | re.compile(r'\b(close|cleanup|release)\s+(resource|file|connection)', re.IGNORECASE), 49 | re.compile(r'\bpotential\s+memory\s+leak', re.IGNORECASE), 50 | re.compile(r'\b(database|thread|socket|connection)\s+leak', re.IGNORECASE), 51 | ] 52 | 53 | _OPEN_REDIRECT_PATTERNS: List[Pattern] = [ 54 | re.compile(r'\b(open redirect|unvalidated redirect)\b', re.IGNORECASE), 55 | re.compile(r'\b(redirect.(attack|exploit|vulnerability))\b', re.IGNORECASE), 56 | re.compile(r'\b(malicious.redirect)\b', re.IGNORECASE), 57 | ] 58 | 59 | _MEMORY_SAFETY_PATTERNS: List[Pattern] = [ 60 | re.compile(r'\b(buffer overflow|stack overflow|heap overflow)\b', re.IGNORECASE), 61 | re.compile(r'\b(oob)\s+(read|write|access)\b', re.IGNORECASE), 62 | re.compile(r'\b(out.?of.?bounds?)\b', re.IGNORECASE), 63 | re.compile(r'\b(memory safety|memory corruption)\b', re.IGNORECASE), 64 | re.compile(r'\b(use.?after.?free|double.?free|null.?pointer.?dereference)\b', re.IGNORECASE), 65 | re.compile(r'\b(segmentation fault|segfault|memory violation)\b', re.IGNORECASE), 66 | re.compile(r'\b(bounds check|boundary check|array bounds)\b', re.IGNORECASE), 67 | re.compile(r'\b(integer overflow|integer underflow|integer conversion)\b', re.IGNORECASE), 68 | re.compile(r'\barbitrary.?(memory read|pointer dereference|memory address|memory pointer)\b', re.IGNORECASE), 69 | ] 70 | 71 | _REGEX_INJECTION: List[Pattern] = [ 72 | re.compile(r'\b(regex|regular expression)\s+injection\b', re.IGNORECASE), 73 | re.compile(r'\b(regex|regular expression)\s+denial of service\b', re.IGNORECASE), 74 | re.compile(r'\b(regex|regular expression)\s+flooding\b', re.IGNORECASE), 75 | ] 76 | 77 | _SSRF_PATTERNS: List[Pattern] = [ 78 | re.compile(r'\b(ssrf|server\s+.?side\s+.?request\s+.?forgery)\b', re.IGNORECASE), 79 | ] 80 | 81 | @classmethod 82 | def get_exclusion_reason(cls, finding: Dict[str, Any]) -> Optional[str]: 83 | """Check if a finding should be excluded based on hard rules. 84 | 85 | Args: 86 | finding: Security finding to check 87 | 88 | Returns: 89 | Exclusion reason if finding should be excluded, None otherwise 90 | """ 91 | # Check if finding is in a Markdown file 92 | file_path = finding.get('file', '') 93 | if file_path.lower().endswith('.md'): 94 | return "Finding in Markdown documentation file" 95 | 96 | description = finding.get('description', '') 97 | title = finding.get('title', '') 98 | 99 | # Handle None values 100 | if description is None: 101 | description = '' 102 | if title is None: 103 | title = '' 104 | 105 | combined_text = f"{title} {description}".lower() 106 | 107 | # Check DOS patterns 108 | for pattern in cls._DOS_PATTERNS: 109 | if pattern.search(combined_text): 110 | return "Generic DOS/resource exhaustion finding (low signal)" 111 | 112 | 113 | # Check rate limiting patterns 114 | for pattern in cls._RATE_LIMITING_PATTERNS: 115 | if pattern.search(combined_text): 116 | return "Generic rate limiting recommendation" 117 | 118 | # Check resource patterns - always exclude 119 | for pattern in cls._RESOURCE_PATTERNS: 120 | if pattern.search(combined_text): 121 | return "Resource management finding (not a security vulnerability)" 122 | 123 | # Check open redirect patterns 124 | for pattern in cls._OPEN_REDIRECT_PATTERNS: 125 | if pattern.search(combined_text): 126 | return "Open redirect vulnerability (not high impact)" 127 | 128 | # Check regex injection patterns 129 | for pattern in cls._REGEX_INJECTION: 130 | if pattern.search(combined_text): 131 | return "Regex injection finding (not applicable)" 132 | 133 | # Check memory safety patterns - exclude if NOT in C/C++ files 134 | c_cpp_extensions = {'.c', '.cc', '.cpp', '.h'} 135 | file_ext = '' 136 | if '.' in file_path: 137 | file_ext = f".{file_path.lower().split('.')[-1]}" 138 | 139 | # If file doesn't have a C/C++ extension (including no extension), exclude memory safety findings 140 | if file_ext not in c_cpp_extensions: 141 | for pattern in cls._MEMORY_SAFETY_PATTERNS: 142 | if pattern.search(combined_text): 143 | return "Memory safety finding in non-C/C++ code (not applicable)" 144 | 145 | # Check SSRF patterns - exclude if in HTML files only 146 | html_extensions = {'.html'} 147 | 148 | # If file has HTML extension, exclude SSRF findings 149 | if file_ext in html_extensions: 150 | for pattern in cls._SSRF_PATTERNS: 151 | if pattern.search(combined_text): 152 | return "SSRF finding in HTML file (not applicable to client-side code)" 153 | 154 | return None 155 | 156 | 157 | class FindingsFilter: 158 | """Main filter class for security findings.""" 159 | 160 | def __init__(self, 161 | use_hard_exclusions: bool = True, 162 | use_claude_filtering: bool = True, 163 | api_key: Optional[str] = None, 164 | model: str = DEFAULT_CLAUDE_MODEL, 165 | custom_filtering_instructions: Optional[str] = None): 166 | """Initialize findings filter. 167 | 168 | Args: 169 | use_hard_exclusions: Whether to apply hard exclusion rules 170 | use_claude_filtering: Whether to use Claude API for filtering 171 | api_key: Anthropic API key for Claude filtering 172 | model: Claude model to use for filtering 173 | custom_filtering_instructions: Optional custom filtering instructions 174 | """ 175 | self.use_hard_exclusions = use_hard_exclusions 176 | self.use_claude_filtering = use_claude_filtering 177 | self.custom_filtering_instructions = custom_filtering_instructions 178 | 179 | # Initialize Claude client if filtering is enabled 180 | self.claude_client = None 181 | if self.use_claude_filtering: 182 | try: 183 | self.claude_client = ClaudeAPIClient( 184 | model=model, 185 | api_key=api_key 186 | ) 187 | # Validate API access 188 | valid, error = self.claude_client.validate_api_access() 189 | if not valid: 190 | logger.warning(f"Claude API validation failed: {error}") 191 | self.claude_client = None 192 | self.use_claude_filtering = False 193 | except Exception as e: 194 | logger.error(f"Failed to initialize Claude client: {str(e)}") 195 | self.use_claude_filtering = False 196 | 197 | def filter_findings(self, 198 | findings: List[Dict[str, Any]], 199 | pr_context: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any], FilterStats]: 200 | """Filter security findings to remove false positives. 201 | 202 | Args: 203 | findings: List of security findings from Claude Code audit 204 | pr_context: Optional PR context for better analysis 205 | 206 | Returns: 207 | Tuple of (success, filtered_results, stats) 208 | """ 209 | start_time = time.time() 210 | 211 | if not findings: 212 | stats = FilterStats(total_findings=0, runtime_seconds=0.0) 213 | return True, { 214 | "filtered_findings": [], 215 | "excluded_findings": [], 216 | "analysis_summary": { 217 | "total_findings": 0, 218 | "kept_findings": 0, 219 | "excluded_findings": 0, 220 | "exclusion_breakdown": {} 221 | } 222 | }, stats 223 | 224 | logger.info(f"Filtering {len(findings)} security findings") 225 | 226 | # Initialize statistics 227 | stats = FilterStats(total_findings=len(findings)) 228 | 229 | # Step 1: Apply hard exclusion rules 230 | findings_after_hard = [] 231 | excluded_hard = [] 232 | 233 | if self.use_hard_exclusions: 234 | for i, finding in enumerate(findings): 235 | exclusion_reason = HardExclusionRules.get_exclusion_reason(finding) 236 | if exclusion_reason: 237 | excluded_hard.append({ 238 | "finding": finding, 239 | "index": i, 240 | "exclusion_reason": exclusion_reason, 241 | "filter_stage": "hard_rules" 242 | }) 243 | stats.hard_excluded += 1 244 | 245 | # Track exclusion breakdown 246 | key = exclusion_reason.split('(')[0].strip() 247 | stats.exclusion_breakdown[key] = stats.exclusion_breakdown.get(key, 0) + 1 248 | else: 249 | findings_after_hard.append((i, finding)) 250 | 251 | logger.info(f"Hard exclusions removed {stats.hard_excluded} findings") 252 | else: 253 | findings_after_hard = [(i, f) for i, f in enumerate(findings)] 254 | 255 | # Step 2: Apply Claude API filtering if enabled 256 | findings_after_claude = [] 257 | excluded_claude = [] 258 | 259 | if self.use_claude_filtering and self.claude_client and findings_after_hard: 260 | # Process findings individually 261 | logger.info(f"Processing {len(findings_after_hard)} findings individually through Claude API") 262 | 263 | for orig_idx, finding in findings_after_hard: 264 | # Call Claude API for single finding 265 | success, analysis_result, error_msg = self.claude_client.analyze_single_finding( 266 | finding, pr_context, self.custom_filtering_instructions 267 | ) 268 | 269 | if success and analysis_result: 270 | # Process Claude's analysis for single finding 271 | confidence = analysis_result.get('confidence_score', 10.0) 272 | keep_finding = analysis_result.get('keep_finding', True) 273 | justification = analysis_result.get('justification', '') 274 | exclusion_reason = analysis_result.get('exclusion_reason') 275 | 276 | stats.confidence_scores.append(confidence) 277 | 278 | if not keep_finding: 279 | # Claude recommends excluding 280 | excluded_claude.append({ 281 | "finding": finding, 282 | "confidence_score": confidence, 283 | "exclusion_reason": exclusion_reason or f"Low confidence score: {confidence}", 284 | "justification": justification, 285 | "filter_stage": "claude_api" 286 | }) 287 | stats.claude_excluded += 1 288 | else: 289 | # Keep finding with metadata 290 | enriched_finding = finding.copy() 291 | enriched_finding['_filter_metadata'] = { 292 | 'confidence_score': confidence, 293 | 'justification': justification, 294 | } 295 | findings_after_claude.append(enriched_finding) 296 | stats.kept_findings += 1 297 | else: 298 | # Claude API call failed for this finding - keep it with warning 299 | logger.warning(f"Claude API call failed for finding {orig_idx}: {error_msg}") 300 | enriched_finding = finding.copy() 301 | enriched_finding['_filter_metadata'] = { 302 | 'confidence_score': 10.0, # Default high confidence 303 | 'justification': f'Claude API failed: {error_msg}', 304 | } 305 | findings_after_claude.append(enriched_finding) 306 | stats.kept_findings += 1 307 | else: 308 | # Claude filtering disabled or no client - keep all findings from hard filter 309 | for orig_idx, finding in findings_after_hard: 310 | enriched_finding = finding.copy() 311 | enriched_finding['_filter_metadata'] = { 312 | 'confidence_score': 10.0, # Default high confidence 313 | 'justification': 'Claude filtering disabled', 314 | } 315 | findings_after_claude.append(enriched_finding) 316 | stats.kept_findings += 1 317 | 318 | # Combine all excluded findings 319 | all_excluded = excluded_hard + excluded_claude 320 | 321 | # Calculate final statistics 322 | stats.runtime_seconds = time.time() - start_time 323 | 324 | # Build filtered results 325 | filtered_results = { 326 | "filtered_findings": findings_after_claude, 327 | "excluded_findings": all_excluded, 328 | "analysis_summary": { 329 | "total_findings": stats.total_findings, 330 | "kept_findings": stats.kept_findings, 331 | "excluded_findings": len(all_excluded), 332 | "hard_excluded": stats.hard_excluded, 333 | "claude_excluded": stats.claude_excluded, 334 | "exclusion_breakdown": stats.exclusion_breakdown, 335 | "average_confidence": sum(stats.confidence_scores) / len(stats.confidence_scores) if stats.confidence_scores else None, 336 | "runtime_seconds": stats.runtime_seconds 337 | } 338 | } 339 | 340 | logger.info(f"Filtering completed: {stats.kept_findings}/{stats.total_findings} findings kept " 341 | f"({stats.runtime_seconds:.1f}s)") 342 | 343 | return True, filtered_results, stats 344 | -------------------------------------------------------------------------------- /claudecode/test_claude_runner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Unit tests for SimpleClaudeRunner. 4 | """ 5 | 6 | import json 7 | import os 8 | import subprocess 9 | from unittest.mock import Mock, patch 10 | from pathlib import Path 11 | 12 | from claudecode.github_action_audit import SimpleClaudeRunner 13 | from claudecode.constants import DEFAULT_CLAUDE_MODEL 14 | 15 | 16 | class TestSimpleClaudeRunner: 17 | """Test SimpleClaudeRunner functionality.""" 18 | 19 | def test_init(self): 20 | """Test runner initialization.""" 21 | runner = SimpleClaudeRunner(timeout_minutes=30) 22 | assert runner.timeout_seconds == 1800 23 | 24 | runner2 = SimpleClaudeRunner() # Default 25 | assert runner2.timeout_seconds == 1200 # 20 minutes default 26 | 27 | @patch('subprocess.run') 28 | def test_validate_claude_available_success(self, mock_run): 29 | """Test successful Claude validation.""" 30 | mock_run.return_value = Mock( 31 | returncode=0, 32 | stdout='claude version 1.0.0', 33 | stderr='' 34 | ) 35 | 36 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}): 37 | runner = SimpleClaudeRunner() 38 | success, error = runner.validate_claude_available() 39 | 40 | assert success is True 41 | assert error == '' 42 | mock_run.assert_called_once_with( 43 | ['claude', '--version'], 44 | capture_output=True, 45 | text=True, 46 | timeout=10 47 | ) 48 | 49 | @patch('subprocess.run') 50 | def test_validate_claude_available_no_api_key(self, mock_run): 51 | """Test Claude validation without API key.""" 52 | mock_run.return_value = Mock( 53 | returncode=0, 54 | stdout='claude version 1.0.0', 55 | stderr='' 56 | ) 57 | 58 | # Remove API key 59 | env = os.environ.copy() 60 | env.pop('ANTHROPIC_API_KEY', None) 61 | 62 | with patch.dict(os.environ, env, clear=True): 63 | runner = SimpleClaudeRunner() 64 | success, error = runner.validate_claude_available() 65 | 66 | assert success is False 67 | assert 'ANTHROPIC_API_KEY environment variable is not set' in error 68 | 69 | @patch('subprocess.run') 70 | def test_validate_claude_available_not_installed(self, mock_run): 71 | """Test Claude validation when not installed.""" 72 | mock_run.side_effect = FileNotFoundError() 73 | 74 | runner = SimpleClaudeRunner() 75 | success, error = runner.validate_claude_available() 76 | 77 | assert success is False 78 | assert 'Claude Code is not installed or not in PATH' in error 79 | 80 | @patch('subprocess.run') 81 | def test_validate_claude_available_error(self, mock_run): 82 | """Test Claude validation with error.""" 83 | mock_run.return_value = Mock( 84 | returncode=1, 85 | stdout='', 86 | stderr='Error: Authentication failed' 87 | ) 88 | 89 | runner = SimpleClaudeRunner() 90 | success, error = runner.validate_claude_available() 91 | 92 | assert success is False 93 | assert 'exit code 1' in error 94 | assert 'Authentication failed' in error 95 | 96 | @patch('subprocess.run') 97 | def test_validate_claude_available_timeout(self, mock_run): 98 | """Test Claude validation timeout.""" 99 | mock_run.side_effect = subprocess.TimeoutExpired(['claude'], 10) 100 | 101 | runner = SimpleClaudeRunner() 102 | success, error = runner.validate_claude_available() 103 | 104 | assert success is False 105 | assert 'timed out' in error 106 | 107 | def test_run_security_audit_missing_directory(self): 108 | """Test audit with missing directory.""" 109 | runner = SimpleClaudeRunner() 110 | success, error, results = runner.run_security_audit( 111 | Path('/non/existent/path'), 112 | "test prompt" 113 | ) 114 | 115 | assert success is False 116 | assert 'Repository directory does not exist' in error 117 | assert results == {} 118 | 119 | @patch('subprocess.run') 120 | def test_run_security_audit_success(self, mock_run): 121 | """Test successful security audit.""" 122 | # Claude Code returns wrapped format with 'result' field 123 | findings_data = { 124 | "findings": [ 125 | { 126 | "file": "test.py", 127 | "line": 10, 128 | "severity": "HIGH", 129 | "description": "SQL injection vulnerability" 130 | } 131 | ], 132 | "analysis_summary": { 133 | "files_reviewed": 5, 134 | "high_severity": 1, 135 | "medium_severity": 0, 136 | "low_severity": 0, 137 | "review_completed": True 138 | } 139 | } 140 | 141 | audit_result = { 142 | "result": json.dumps(findings_data) 143 | } 144 | 145 | mock_run.return_value = Mock( 146 | returncode=0, 147 | stdout=json.dumps(audit_result), 148 | stderr='' 149 | ) 150 | 151 | runner = SimpleClaudeRunner() 152 | with patch('pathlib.Path.exists', return_value=True): 153 | success, error, results = runner.run_security_audit( 154 | Path('/tmp/test'), 155 | "test prompt" 156 | ) 157 | 158 | assert success is True 159 | assert error == '' 160 | assert len(results['findings']) == 1 161 | assert results['findings'][0]['severity'] == 'HIGH' 162 | 163 | # Verify subprocess call 164 | mock_run.assert_called_once() 165 | call_args = mock_run.call_args 166 | assert call_args[0][0] == [ 167 | 'claude', 168 | '--output-format', 'json', 169 | '--model', DEFAULT_CLAUDE_MODEL, 170 | '--disallowed-tools', 'Bash(ps:*)' 171 | ] 172 | assert call_args[1]['input'] == 'test prompt' 173 | assert call_args[1]['cwd'] == Path('/tmp/test') 174 | 175 | @patch('subprocess.run') 176 | def test_run_security_audit_large_prompt_warning(self, mock_run, capsys): 177 | """Test warning for large prompts.""" 178 | mock_run.return_value = Mock( 179 | returncode=0, 180 | stdout='{"findings": []}', 181 | stderr='' 182 | ) 183 | 184 | # Create a prompt larger than 1MB 185 | large_prompt = 'x' * (1024 * 1024 + 1000) 186 | 187 | runner = SimpleClaudeRunner() 188 | with patch('pathlib.Path.exists', return_value=True): 189 | success, error, results = runner.run_security_audit( 190 | Path('/tmp/test'), 191 | large_prompt 192 | ) 193 | 194 | captured = capsys.readouterr() 195 | assert '[Warning] Large prompt size' in captured.err 196 | assert success is True 197 | 198 | @patch('subprocess.run') 199 | def test_run_security_audit_retry_on_failure(self, mock_run): 200 | """Test retry logic on failure.""" 201 | # First call fails, second succeeds 202 | mock_run.side_effect = [ 203 | Mock(returncode=1, stdout='', stderr='Temporary error'), 204 | Mock(returncode=0, stdout='{"findings": []}', stderr='') 205 | ] 206 | 207 | runner = SimpleClaudeRunner() 208 | with patch('pathlib.Path.exists', return_value=True): 209 | success, error, results = runner.run_security_audit( 210 | Path('/tmp/test'), 211 | "test prompt" 212 | ) 213 | 214 | assert success is True 215 | assert error == '' 216 | assert mock_run.call_count == 2 # Retried once 217 | 218 | @patch('subprocess.run') 219 | def test_run_security_audit_retry_on_error_during_execution(self, mock_run): 220 | """Test retry on error_during_execution result.""" 221 | error_result = { 222 | "type": "result", 223 | "subtype": "error_during_execution", 224 | "error": "Temporary execution error" 225 | } 226 | 227 | success_result = { 228 | "result": json.dumps({ 229 | "findings": [{"file": "test.py", "line": 1, "severity": "LOW", "description": "Issue"}], 230 | "analysis_summary": { 231 | "files_reviewed": 1, 232 | "high_severity": 0, 233 | "medium_severity": 0, 234 | "low_severity": 1, 235 | "review_completed": True 236 | } 237 | }) 238 | } 239 | 240 | mock_run.side_effect = [ 241 | Mock(returncode=0, stdout=json.dumps(error_result), stderr=''), 242 | Mock(returncode=0, stdout=json.dumps(success_result), stderr='') 243 | ] 244 | 245 | runner = SimpleClaudeRunner() 246 | with patch('pathlib.Path.exists', return_value=True): 247 | success, error, results = runner.run_security_audit( 248 | Path('/tmp/test'), 249 | "test prompt" 250 | ) 251 | 252 | assert success is True 253 | assert len(results['findings']) == 1 254 | assert mock_run.call_count == 2 255 | 256 | @patch('subprocess.run') 257 | def test_run_security_audit_timeout(self, mock_run): 258 | """Test timeout handling.""" 259 | mock_run.side_effect = subprocess.TimeoutExpired(['claude'], 1200) 260 | 261 | runner = SimpleClaudeRunner() 262 | with patch('pathlib.Path.exists', return_value=True): 263 | success, error, results = runner.run_security_audit( 264 | Path('/tmp/test'), 265 | "test prompt" 266 | ) 267 | 268 | assert success is False 269 | assert 'timed out after 20 minutes' in error 270 | assert results == {} 271 | 272 | @patch('subprocess.run') 273 | def test_run_security_audit_json_parse_failure_with_retry(self, mock_run): 274 | """Test JSON parse failure with retry.""" 275 | mock_run.side_effect = [ 276 | Mock(returncode=0, stdout='Invalid JSON', stderr=''), 277 | Mock(returncode=0, stdout='Still invalid', stderr='') 278 | ] 279 | 280 | runner = SimpleClaudeRunner() 281 | with patch('pathlib.Path.exists', return_value=True): 282 | success, error, results = runner.run_security_audit( 283 | Path('/tmp/test'), 284 | "test prompt" 285 | ) 286 | 287 | assert success is False 288 | assert 'Failed to parse Claude output' in error 289 | assert mock_run.call_count == 2 290 | 291 | def test_extract_security_findings_claude_wrapper(self): 292 | """Test extraction from Claude Code wrapper format.""" 293 | runner = SimpleClaudeRunner() 294 | 295 | # Test with result field containing JSON string 296 | claude_output = { 297 | "result": json.dumps({ 298 | "findings": [ 299 | {"file": "test.py", "line": 10, "severity": "HIGH"} 300 | ] 301 | }) 302 | } 303 | 304 | result = runner._extract_security_findings(claude_output) 305 | assert len(result['findings']) == 1 306 | assert result['findings'][0]['file'] == 'test.py' 307 | 308 | def test_extract_security_findings_direct_format(self): 309 | """Test that direct findings format was removed - only wrapped format is supported.""" 310 | runner = SimpleClaudeRunner() 311 | 312 | # Direct format (without 'result' wrapper) should return empty 313 | claude_output = { 314 | "findings": [ 315 | {"file": "main.py", "line": 20, "severity": "MEDIUM"} 316 | ], 317 | "analysis_summary": { 318 | "files_reviewed": 3, 319 | "high_severity": 0, 320 | "medium_severity": 1, 321 | "low_severity": 0 322 | } 323 | } 324 | 325 | result = runner._extract_security_findings(claude_output) 326 | # Should return empty structure since direct format is not supported 327 | assert len(result['findings']) == 0 328 | assert result['analysis_summary']['review_completed'] is False 329 | 330 | def test_extract_security_findings_text_fallback(self): 331 | """Test that text fallback was removed - only JSON is supported.""" 332 | runner = SimpleClaudeRunner() 333 | 334 | # Test with result containing text (not JSON) 335 | claude_output = { 336 | "result": "Found SQL injection vulnerability in database.py line 45" 337 | } 338 | 339 | # Should return empty findings since we don't parse text anymore 340 | result = runner._extract_security_findings(claude_output) 341 | assert len(result['findings']) == 0 342 | assert result['analysis_summary']['review_completed'] is False 343 | 344 | def test_extract_security_findings_empty(self): 345 | """Test extraction with no findings.""" 346 | runner = SimpleClaudeRunner() 347 | 348 | # Various empty formats 349 | for output in [None, {}, {"result": ""}, {"other": "data"}]: 350 | result = runner._extract_security_findings(output) 351 | assert result['findings'] == [] 352 | assert result['analysis_summary']['review_completed'] is False 353 | 354 | def test_create_findings_from_text(self): 355 | """Test that _create_findings_from_text was removed.""" 356 | runner = SimpleClaudeRunner() 357 | 358 | # Method should not exist 359 | assert not hasattr(runner, '_create_findings_from_text') 360 | 361 | def test_create_findings_from_text_no_issues(self): 362 | """Test that _create_findings_from_text was removed.""" 363 | runner = SimpleClaudeRunner() 364 | 365 | # Method should not exist 366 | assert not hasattr(runner, '_create_findings_from_text') 367 | 368 | 369 | class TestClaudeRunnerEdgeCases: 370 | """Test edge cases and error scenarios.""" 371 | 372 | @patch('subprocess.run') 373 | def test_claude_output_formats(self, mock_run): 374 | """Test various Claude output formats.""" 375 | runner = SimpleClaudeRunner() 376 | 377 | # Test nested JSON in result - result field should be string 378 | nested_output = { 379 | "type": "result", 380 | "result": json.dumps({ 381 | "findings": [ 382 | {"file": "test.py", "line": 1, "severity": "HIGH", "description": "Issue"} 383 | ] 384 | }) 385 | } 386 | 387 | with patch('pathlib.Path.exists', return_value=True): 388 | mock_run.return_value = Mock( 389 | returncode=0, 390 | stdout=json.dumps(nested_output), 391 | stderr='' 392 | ) 393 | 394 | success, error, results = runner.run_security_audit( 395 | Path('/tmp/test'), 396 | "test" 397 | ) 398 | 399 | # Should extract findings from nested structure 400 | assert success is True 401 | assert len(results['findings']) == 1 402 | 403 | @patch('subprocess.run') 404 | def test_partial_json_recovery(self, mock_run): 405 | """Test recovery from partial JSON output.""" 406 | # Simulate truncated JSON 407 | partial_json = '{"findings": [{"file": "test.py", "line": 10, "sev' 408 | 409 | mock_run.return_value = Mock( 410 | returncode=0, 411 | stdout=partial_json, 412 | stderr='' 413 | ) 414 | 415 | runner = SimpleClaudeRunner() 416 | with patch('pathlib.Path.exists', return_value=True): 417 | success, error, results = runner.run_security_audit( 418 | Path('/tmp/test'), 419 | "test" 420 | ) 421 | 422 | # Should fail to parse and retry 423 | assert mock_run.call_count == 2 424 | 425 | @patch('subprocess.run') 426 | def test_exception_handling(self, mock_run): 427 | """Test general exception handling.""" 428 | mock_run.side_effect = Exception("Unexpected error") 429 | 430 | runner = SimpleClaudeRunner() 431 | with patch('pathlib.Path.exists', return_value=True): 432 | success, error, results = runner.run_security_audit( 433 | Path('/tmp/test'), 434 | "test" 435 | ) 436 | 437 | assert success is False 438 | assert 'Unexpected error' in error 439 | assert results == {} 440 | -------------------------------------------------------------------------------- /claudecode/test_findings_conversion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Unit tests for findings conversion and edge cases. 4 | """ 5 | 6 | import pytest 7 | import json 8 | 9 | from claudecode.findings_filter import FindingsFilter, HardExclusionRules 10 | from claudecode.json_parser import parse_json_with_fallbacks, extract_json_from_text 11 | 12 | 13 | def create_simple_filter(): 14 | """Create a filter that only uses hard exclusions.""" 15 | return FindingsFilter(use_hard_exclusions=True, use_claude_filtering=False) 16 | 17 | 18 | def filter_findings_simple(filter_instance, findings): 19 | """Helper to get simple kept/excluded tuple from FindingsFilter.""" 20 | if findings is None: 21 | raise TypeError("'NoneType' object is not iterable") 22 | 23 | success, results, stats = filter_instance.filter_findings(findings) 24 | if success: 25 | kept = results.get('filtered_findings', []) 26 | excluded = results.get('excluded_findings', []) 27 | else: 28 | kept = findings 29 | excluded = [] 30 | return kept, excluded 31 | 32 | 33 | class TestFindingsConversionEdgeCases: 34 | """Test edge cases in findings conversion and filtering.""" 35 | 36 | def test_empty_findings_list(self): 37 | """Test filtering empty findings list.""" 38 | filter = create_simple_filter() 39 | kept, excluded = filter_findings_simple(filter, []) 40 | 41 | assert kept == [] 42 | assert excluded == [] 43 | 44 | def test_none_findings_list(self): 45 | """Test filtering None findings list.""" 46 | filter = create_simple_filter() 47 | # Should raise TypeError for None input 48 | with pytest.raises(TypeError): 49 | filter_findings_simple(filter, None) 50 | 51 | def test_malformed_finding_missing_fields(self): 52 | """Test filtering findings with missing required fields.""" 53 | findings = [ 54 | {'description': 'Issue 1'}, # Missing severity 55 | {'severity': 'HIGH'}, # Missing description 56 | {}, # Empty finding 57 | {'severity': 'HIGH', 'description': 'Valid issue'}, 58 | ] 59 | 60 | filter = create_simple_filter() 61 | # The filter will process all findings, even with missing fields 62 | kept, excluded = filter_findings_simple(filter, findings) 63 | 64 | # All findings without exclusion patterns are kept 65 | assert len(kept) == 4 66 | assert len(excluded) == 0 67 | 68 | def test_finding_with_extra_fields(self): 69 | """Test findings with extra/unexpected fields.""" 70 | findings = [ 71 | { 72 | 'severity': 'HIGH', 73 | 'description': 'SQL injection', 74 | 'extra_field': 'value', 75 | 'nested': {'data': 'here'}, 76 | 'array': [1, 2, 3] 77 | } 78 | ] 79 | 80 | filter = create_simple_filter() 81 | kept, excluded = filter_findings_simple(filter, findings) 82 | 83 | # Extra fields should be preserved 84 | assert len(kept) == 1 85 | assert kept[0]['extra_field'] == 'value' 86 | assert kept[0]['nested'] == {'data': 'here'} 87 | 88 | def test_unicode_in_findings(self): 89 | """Test findings with unicode characters.""" 90 | findings = [ 91 | { 92 | 'severity': 'HIGH', 93 | 'description': 'SQL injection in 用户输入', 94 | 'file': 'файл.py', 95 | 'exploit_scenario': 'Attacker könnte dies ausnutzen' 96 | } 97 | ] 98 | 99 | filter = create_simple_filter() 100 | kept, excluded = filter_findings_simple(filter, findings) 101 | 102 | assert len(kept) == 1 103 | assert '用户输入' in kept[0]['description'] 104 | assert kept[0]['file'] == 'файл.py' 105 | 106 | def test_very_long_description(self): 107 | """Test findings with very long descriptions.""" 108 | long_desc = 'A' * 10000 # 10k character description 109 | findings = [ 110 | { 111 | 'severity': 'HIGH', 112 | 'description': f'SQL injection vulnerability. {long_desc}' 113 | } 114 | ] 115 | 116 | filter = create_simple_filter() 117 | kept, excluded = filter_findings_simple(filter, findings) 118 | 119 | # Should not crash on long descriptions 120 | assert len(kept) == 1 121 | assert len(kept[0]['description']) > 10000 122 | 123 | def test_special_characters_in_description(self): 124 | """Test findings with special regex characters.""" 125 | findings = [ 126 | {'severity': 'HIGH', 'description': 'Issue with [brackets] and (parens)'}, 127 | {'severity': 'HIGH', 'description': 'Path: C:\\Users\\test\\file.py'}, 128 | {'severity': 'HIGH', 'description': 'Regex pattern: .*$^[]{}'}, 129 | {'severity': 'HIGH', 'description': 'Missing rate limiting for API'}, 130 | ] 131 | 132 | filter = create_simple_filter() 133 | kept, excluded = filter_findings_simple(filter, findings) 134 | 135 | # Special characters shouldn't break filtering 136 | assert len(kept) == 3 # "Missing rate limiting" should be excluded 137 | assert len(excluded) == 1 138 | 139 | def test_case_sensitivity_in_exclusions(self): 140 | """Test case sensitivity in exclusion rules.""" 141 | findings = [ 142 | {'severity': 'HIGH', 'description': 'DENIAL OF SERVICE attack'}, 143 | {'severity': 'HIGH', 'description': 'Denial Of Service issue'}, 144 | {'severity': 'HIGH', 'description': 'dos vulnerability'}, 145 | {'severity': 'HIGH', 'description': 'DoS attack vector'}, 146 | ] 147 | 148 | filter = create_simple_filter() 149 | kept, excluded = filter_findings_simple(filter, findings) 150 | 151 | # DOS patterns use word boundaries, "dos vulnerability" doesn't match \bdos attack\b 152 | assert len(kept) == 1 # "dos vulnerability" is kept 153 | assert len(excluded) == 3 154 | assert kept[0]['description'] == 'dos vulnerability' 155 | 156 | def test_severity_normalization(self): 157 | """Test various severity formats.""" 158 | findings = [ 159 | {'severity': 'high', 'description': 'Issue 1'}, 160 | {'severity': 'HIGH', 'description': 'Issue 2'}, 161 | {'severity': 'High', 'description': 'Issue 3'}, 162 | {'severity': 'CRITICAL', 'description': 'Issue 4'}, 163 | {'severity': 'unknown', 'description': 'Issue 5'}, 164 | {'severity': '', 'description': 'Issue 6'}, 165 | {'severity': None, 'description': 'Issue 7'}, 166 | ] 167 | 168 | filter = create_simple_filter() 169 | kept, excluded = filter_findings_simple(filter, findings) 170 | 171 | # All should be processed regardless of severity format 172 | assert len(kept) == 7 173 | 174 | def test_json_injection_in_findings(self): 175 | """Test findings that might contain JSON injection attempts.""" 176 | findings = [ 177 | { 178 | 'severity': 'HIGH', 179 | 'description': '{"injected": "json", "description": "fake"}' 180 | }, 181 | { 182 | 'severity': 'HIGH', 183 | 'description': 'Issue with "}]} payload' 184 | } 185 | ] 186 | 187 | filter = create_simple_filter() 188 | kept, excluded = filter_findings_simple(filter, findings) 189 | 190 | # Should handle JSON-like content in descriptions 191 | assert len(kept) == 2 192 | 193 | 194 | class TestJsonParserEdgeCases: 195 | """Test JSON parser edge cases.""" 196 | 197 | def test_parse_empty_string(self): 198 | """Test parsing empty string.""" 199 | success, result = parse_json_with_fallbacks('', 'test') 200 | assert success is False 201 | # Empty string returns error structure 202 | assert 'error' in result 203 | assert "Invalid JSON response" in result['error'] 204 | 205 | def test_parse_whitespace_only(self): 206 | """Test parsing whitespace-only string.""" 207 | success, result = parse_json_with_fallbacks(' \n\t ', 'test') 208 | assert success is False 209 | # Whitespace returns error structure 210 | assert 'error' in result 211 | assert "Invalid JSON response" in result['error'] 212 | 213 | def test_parse_truncated_json(self): 214 | """Test parsing truncated JSON.""" 215 | truncated = '{"findings": [{"severity": "HIGH", "desc' 216 | success, result = parse_json_with_fallbacks(truncated, 'test') 217 | assert success is False 218 | 219 | def test_parse_json_with_comments(self): 220 | """Test parsing JSON with comments (invalid JSON).""" 221 | json_with_comments = """{ 222 | "findings": [ 223 | // This is a comment 224 | {"severity": "HIGH", "description": "Issue"} 225 | ] 226 | }""" 227 | success, result = parse_json_with_fallbacks(json_with_comments, 'test') 228 | assert success is False 229 | 230 | def test_parse_json_with_trailing_comma(self): 231 | """Test parsing JSON with trailing comma.""" 232 | json_with_comma = '{"findings": [{"severity": "HIGH"},]}' 233 | success, result = parse_json_with_fallbacks(json_with_comma, 'test') 234 | assert success is False 235 | 236 | def test_parse_nested_json_string(self): 237 | """Test parsing JSON embedded in string.""" 238 | nested = '{"result": "{\\"findings\\": [{\\"severity\\": \\"HIGH\\"}]}"}' 239 | success, result = parse_json_with_fallbacks(nested, 'test') 240 | assert success is True 241 | assert 'result' in result 242 | 243 | def test_extract_json_from_text_edge_cases(self): 244 | """Test JSON extraction from various text formats.""" 245 | # No JSON 246 | assert extract_json_from_text('Just plain text') is None 247 | 248 | # Multiple JSON objects 249 | text = 'First: {"a": 1} Second: {"b": 2}' 250 | result = extract_json_from_text(text) 251 | assert result == {"a": 1} # Should extract first 252 | 253 | # JSON in markdown code block 254 | text = '''```json 255 | {"findings": [{"severity": "HIGH"}]} 256 | ```''' 257 | result = extract_json_from_text(text) 258 | assert result is not None 259 | assert 'findings' in result 260 | 261 | # Malformed JSON attempts 262 | text = 'Result: {invalid json}' 263 | assert extract_json_from_text(text) is None 264 | 265 | # Very large JSON 266 | large_obj = {"data": ["x" * 100 for _ in range(1000)]} 267 | text = f"Result: {json.dumps(large_obj)}" 268 | result = extract_json_from_text(text) 269 | assert result is not None 270 | assert len(result['data']) == 1000 271 | 272 | def test_extract_json_with_unicode(self): 273 | """Test JSON extraction with unicode.""" 274 | text = 'Result: {"message": "Error: 文件未找到"}' 275 | result = extract_json_from_text(text) 276 | assert result is not None 277 | assert result['message'] == "Error: 文件未找到" 278 | 279 | def test_parse_json_arrays(self): 280 | """Test parsing JSON arrays.""" 281 | # Direct array 282 | success, result = parse_json_with_fallbacks('[1, 2, 3]', 'test') 283 | assert success is True 284 | assert result == [1, 2, 3] 285 | 286 | # Array of findings 287 | findings_array = '[{"severity": "HIGH", "description": "Issue"}]' 288 | success, result = parse_json_with_fallbacks(findings_array, 'test') 289 | assert success is True 290 | assert isinstance(result, list) 291 | assert len(result) == 1 292 | 293 | 294 | class TestHardExclusionRulesEdgeCases: 295 | """Test hard exclusion rules edge cases.""" 296 | 297 | def test_overlapping_patterns(self): 298 | """Test findings that match multiple exclusion patterns.""" 299 | finding = { 300 | 'severity': 'HIGH', 301 | 'description': 'Denial of service via rate limiting bypass allows brute force attack' 302 | } 303 | 304 | # Matches both DOS and rate limiting patterns 305 | reason = HardExclusionRules.get_exclusion_reason(finding) 306 | assert reason is not None 307 | assert "DOS" in reason # Should match DOS pattern first 308 | 309 | def test_pattern_boundary_matching(self): 310 | """Test pattern matching at word boundaries.""" 311 | findings = [ 312 | {'severity': 'HIGH', 'description': 'dosomething() function'}, # Should not match DOS 313 | {'severity': 'HIGH', 'description': 'windows path issue'}, # Should not match 314 | {'severity': 'HIGH', 'description': 'pseudorandom number'}, # Should not match 315 | ] 316 | 317 | filter = create_simple_filter() 318 | kept, excluded = filter_findings_simple(filter, findings) 319 | 320 | # None should be excluded (no word boundary match) 321 | assert len(kept) == 3 322 | assert len(excluded) == 0 323 | 324 | def test_html_entities_in_description(self): 325 | """Test findings with HTML entities.""" 326 | findings = [ 327 | {'severity': 'HIGH', 'description': 'XSS via <script> tag'}, 328 | {'severity': 'HIGH', 'description': 'Missing rate limiting & throttling'}, 329 | ] 330 | 331 | filter = create_simple_filter() 332 | kept, excluded = filter_findings_simple(filter, findings) 333 | 334 | # "Missing rate limiting" should be excluded even with HTML entity 335 | assert len(kept) == 1 336 | assert len(excluded) == 1 337 | assert 'XSS' in kept[0]['description'] 338 | 339 | def test_multiline_descriptions(self): 340 | """Test findings with multiline descriptions.""" 341 | findings = [ 342 | { 343 | 'severity': 'HIGH', 344 | 'description': '''SQL injection vulnerability 345 | in user input handling. 346 | This could lead to data exposure.''' 347 | }, 348 | { 349 | 'severity': 'HIGH', 350 | 'description': '''Performance issue that could 351 | cause denial of service under 352 | heavy load conditions.''' 353 | } 354 | ] 355 | 356 | filter = create_simple_filter() 357 | kept, excluded = filter_findings_simple(filter, findings) 358 | 359 | # DOS should be found even across lines 360 | assert len(kept) == 1 361 | assert len(excluded) == 1 362 | assert 'SQL injection' in kept[0]['description'] 363 | 364 | 365 | class TestFilteringCombinations: 366 | """Test combinations of filtering scenarios.""" 367 | 368 | def test_mixed_valid_invalid_findings(self): 369 | """Test mix of valid, invalid, and excludable findings.""" 370 | findings = [ 371 | {'severity': 'HIGH', 'description': 'SQL injection'}, # Valid 372 | {'description': 'Missing severity'}, # Valid (no exclusion pattern) 373 | {'severity': 'HIGH', 'description': 'Missing rate limiting'}, # Excludable 374 | {'severity': 'MEDIUM', 'description': 'XSS vulnerability'}, # Valid 375 | {'severity': 'LOW', 'description': 'Denial of service attack'}, # Excludable 376 | {'severity': '', 'description': ''}, # Valid (no exclusion pattern) 377 | {'severity': 'HIGH', 'description': 'RCE possibility'}, # Valid 378 | ] 379 | 380 | filter = create_simple_filter() 381 | kept, excluded = filter_findings_simple(filter, findings) 382 | 383 | assert len(kept) == 5 # All except rate limiting and DOS 384 | assert len(excluded) == 2 # Rate limiting, DOS 385 | 386 | # Verify excluded findings 387 | excluded_descs = [e['finding']['description'] for e in excluded] 388 | assert 'Missing rate limiting' in excluded_descs 389 | assert 'Denial of service attack' in excluded_descs 390 | 391 | def test_duplicate_findings(self): 392 | """Test handling of duplicate findings.""" 393 | finding = {'severity': 'HIGH', 'description': 'Same issue'} 394 | findings = [finding, finding, finding] # Same object repeated 395 | 396 | filter = create_simple_filter() 397 | kept, excluded = filter_findings_simple(filter, findings) 398 | 399 | # All duplicates should be kept (deduplication not filter's job) 400 | assert len(kept) == 3 401 | 402 | def test_similar_but_different_findings(self): 403 | """Test similar findings with slight differences.""" 404 | findings = [ 405 | {'severity': 'HIGH', 'description': 'SQL injection in login'}, 406 | {'severity': 'HIGH', 'description': 'SQL injection in login()'}, 407 | {'severity': 'HIGH', 'description': 'sql injection in login'}, 408 | {'severity': 'MEDIUM', 'description': 'SQL injection in login'}, 409 | ] 410 | 411 | filter = create_simple_filter() 412 | kept, excluded = filter_findings_simple(filter, findings) 413 | 414 | # All should be kept despite similarity 415 | assert len(kept) == 4 416 | -------------------------------------------------------------------------------- /claudecode/test_hard_exclusion_rules.py: -------------------------------------------------------------------------------- 1 | """Unit tests for HardExclusionRules in findings_filter module.""" 2 | 3 | from claudecode.findings_filter import HardExclusionRules 4 | 5 | 6 | class TestHardExclusionRules: 7 | """Test the HardExclusionRules class for filtering false positives.""" 8 | 9 | def test_dos_pattern_exclusion(self): 10 | """Test exclusion of DOS-related findings.""" 11 | dos_findings = [ 12 | { 13 | "title": "Potential Denial of Service", 14 | "description": "This could lead to resource exhaustion" 15 | }, 16 | { 17 | "title": "Resource consumption issue", 18 | "description": "Unbounded loop could exhaust CPU resources" 19 | }, 20 | { 21 | "title": "Memory exhaustion", 22 | "description": "This function could overwhelm memory with large inputs" 23 | }, 24 | { 25 | "title": "Stack overflow vulnerability", 26 | "description": "Infinite recursion detected" 27 | } 28 | ] 29 | 30 | for finding in dos_findings: 31 | reason = HardExclusionRules.get_exclusion_reason(finding) 32 | assert reason is not None 33 | assert "DOS/resource exhaustion" in reason 34 | 35 | def test_dos_pattern_not_excluded_with_exploit(self): 36 | """Test that stack overflow with exploit mention is not excluded.""" 37 | finding = { 38 | "title": "Stack overflow exploit", 39 | "description": "This stack overflow can be exploited to execute arbitrary code", 40 | "file": "exploit.c" # Add C file so it's not excluded by memory safety rule 41 | } 42 | 43 | reason = HardExclusionRules.get_exclusion_reason(finding) 44 | assert reason is None # Should not be excluded 45 | 46 | def test_generic_validation_pattern_exclusion(self): 47 | """Test that generic validation findings are NOT excluded anymore.""" 48 | validation_findings = [ 49 | { 50 | "title": "Security Issue", 51 | "description": "Missing input validation" 52 | }, 53 | { 54 | "title": "Security Issue", 55 | "description": "Input validation required" 56 | }, 57 | { 58 | "title": "Security Issue", 59 | "description": "Validate parameters" 60 | }, 61 | { 62 | "title": "Security Issue", 63 | "description": "Add input validation" 64 | } 65 | ] 66 | 67 | # Since we removed generic validation patterns, these should NOT be excluded 68 | for finding in validation_findings: 69 | reason = HardExclusionRules.get_exclusion_reason(finding) 70 | assert reason is None 71 | 72 | def test_specific_validation_not_excluded(self): 73 | """Test that specific validation issues are not excluded.""" 74 | specific_findings = [ 75 | { 76 | "title": "Missing input validation", 77 | "description": "SQL injection possible due to missing validation" 78 | }, 79 | { 80 | "title": "No validation", 81 | "description": "Command injection vulnerability - validate shell commands" 82 | }, 83 | { 84 | "title": "Missing validation", 85 | "description": "Path traversal - validate file paths" 86 | }, 87 | { 88 | "title": "Add validation", 89 | "description": "Eval() used without input validation" 90 | } 91 | ] 92 | 93 | for finding in specific_findings: 94 | reason = HardExclusionRules.get_exclusion_reason(finding) 95 | assert reason is None # Should not be excluded due to specific context 96 | 97 | def test_secrets_pattern_exclusion(self): 98 | """Test that generic secrets warnings are NOT excluded anymore.""" 99 | secrets_findings = [ 100 | { 101 | "title": "Hardcoded password detected", 102 | "description": "Avoid hardcoding credentials in source code" 103 | }, 104 | { 105 | "title": "Plaintext secrets", 106 | "description": "Credentials stored in plaintext" 107 | }, 108 | { 109 | "title": "Embedded token", 110 | "description": "API key in source code" 111 | }, 112 | { 113 | "title": "Password storage", 114 | "description": "Password stored in clear text" 115 | } 116 | ] 117 | 118 | # Since we removed secrets patterns, these should NOT be excluded 119 | for finding in secrets_findings: 120 | reason = HardExclusionRules.get_exclusion_reason(finding) 121 | assert reason is None 122 | 123 | def test_actual_secrets_not_excluded(self): 124 | """Test that actual exposed secrets are not excluded.""" 125 | actual_secrets = [ 126 | { 127 | "title": "Hardcoded password", 128 | "description": "Found actual password: 'admin123' in config file" 129 | }, 130 | { 131 | "title": "API key exposed", 132 | "description": "Discovered API key in source: sk-1234567890" 133 | }, 134 | { 135 | "title": "Plaintext password", 136 | "description": "Database password 'mypass' found in code" 137 | } 138 | ] 139 | 140 | for finding in actual_secrets: 141 | reason = HardExclusionRules.get_exclusion_reason(finding) 142 | assert reason is None # Should not be excluded 143 | 144 | def test_rate_limiting_pattern_exclusion(self): 145 | """Test exclusion of rate limiting recommendations.""" 146 | rate_limit_findings = [ 147 | { 148 | "title": "Missing rate limit", 149 | "description": "API endpoint has no rate limiting" 150 | }, 151 | { 152 | "title": "Rate limiting required", 153 | "description": "Implement rate limiting for this endpoint" 154 | }, 155 | { 156 | "title": "No rate limit", 157 | "description": "Unlimited requests allowed" 158 | }, 159 | { 160 | "title": "Add rate limiting", 161 | "description": "This API needs rate limits" 162 | } 163 | ] 164 | 165 | for finding in rate_limit_findings: 166 | reason = HardExclusionRules.get_exclusion_reason(finding) 167 | assert reason is not None 168 | assert "rate limiting recommendation" in reason 169 | 170 | def test_resource_pattern_exclusion(self): 171 | """Test exclusion of generic resource management findings.""" 172 | resource_findings = [ 173 | { 174 | "title": "Security Issue", 175 | "description": "Potential memory leak detected" 176 | }, 177 | { 178 | "title": "Security Issue", 179 | "description": "Resource leak potential in file handling" 180 | }, 181 | { 182 | "title": "Security Issue", 183 | "description": "Unclosed resource detected in function" 184 | }, 185 | { 186 | "title": "Security Issue", 187 | "description": "File cleanup required - close resource" 188 | } 189 | ] 190 | 191 | for finding in resource_findings: 192 | reason = HardExclusionRules.get_exclusion_reason(finding) 193 | assert reason is not None 194 | assert "Resource management finding" in reason 195 | 196 | def test_specific_resource_also_excluded(self): 197 | """Test that ALL resource issues are now excluded (including specific ones).""" 198 | specific_resources = [ 199 | { 200 | "title": "Database connection leak", 201 | "description": "PostgreSQL connections not returned to pool" 202 | }, 203 | { 204 | "title": "Thread leak", 205 | "description": "Thread pool exhaustion due to unclosed threads" 206 | }, 207 | { 208 | "title": "Socket leak", 209 | "description": "TCP sockets remain open after errors" 210 | } 211 | ] 212 | 213 | # All resource issues should be excluded now 214 | for finding in specific_resources: 215 | reason = HardExclusionRules.get_exclusion_reason(finding) 216 | assert reason is not None 217 | assert "Resource management finding" in reason # Should not be excluded 218 | 219 | def test_open_redirect_pattern_exclusion(self): 220 | """Test exclusion of open redirect findings.""" 221 | redirect_findings = [ 222 | { 223 | "title": "Open redirect vulnerability", 224 | "description": "User input used in redirect without validation" 225 | }, 226 | { 227 | "title": "Unvalidated redirect", 228 | "description": "Redirect URL not validated" 229 | }, 230 | { 231 | "title": "Redirect vulnerability", 232 | "description": "Possible redirect attack" 233 | }, 234 | { 235 | "title": "Malicious redirect possible", 236 | "description": "User-controlled redirect parameter" 237 | } 238 | ] 239 | 240 | for finding in redirect_findings: 241 | reason = HardExclusionRules.get_exclusion_reason(finding) 242 | assert reason is not None 243 | assert "Open redirect" in reason 244 | 245 | def test_mixed_case_handling(self): 246 | """Test that pattern matching handles mixed case correctly.""" 247 | mixed_case_findings = [ 248 | { 249 | "title": "DENIAL OF SERVICE", 250 | "description": "RESOURCE EXHAUSTION POSSIBLE" 251 | }, 252 | { 253 | "title": "Security Issue", 254 | "description": "ADD INPUT VALIDATION" 255 | }, 256 | { 257 | "title": "Security Issue", 258 | "description": "HARDCODED PASSWORD DETECTED" 259 | } 260 | ] 261 | 262 | # First finding should be excluded (DOS) 263 | reason = HardExclusionRules.get_exclusion_reason(mixed_case_findings[0]) 264 | assert reason is not None 265 | 266 | # Second finding should NOT be excluded (we removed generic validation patterns) 267 | reason = HardExclusionRules.get_exclusion_reason(mixed_case_findings[1]) 268 | assert reason is None 269 | 270 | # Third finding should NOT be excluded (we removed secrets patterns) 271 | reason = HardExclusionRules.get_exclusion_reason(mixed_case_findings[2]) 272 | assert reason is None 273 | 274 | def test_empty_finding_handling(self): 275 | """Test handling of empty or malformed findings.""" 276 | empty_findings = [ 277 | {}, 278 | {"title": "", "description": ""}, 279 | {"title": "Some title"}, # Missing description 280 | {"description": "Some description"}, # Missing title 281 | {"title": None, "description": None} 282 | ] 283 | 284 | for finding in empty_findings: 285 | reason = HardExclusionRules.get_exclusion_reason(finding) 286 | assert reason is None # Should not crash, just return None 287 | 288 | def test_combined_patterns(self): 289 | """Test findings that match multiple patterns.""" 290 | finding = { 291 | "title": "DOS and validation issue", 292 | "description": "Missing rate limit leads to resource exhaustion" 293 | } 294 | 295 | reason = HardExclusionRules.get_exclusion_reason(finding) 296 | assert reason is not None 297 | # Should match at least one pattern (DOS or rate limiting) 298 | 299 | def test_regex_special_characters(self): 300 | """Test that regex special characters in findings don't cause issues.""" 301 | findings_with_special_chars = [ 302 | { 303 | "title": "Issue with $pecial ch@rs", 304 | "description": "Contains [brackets] and (parentheses)" 305 | }, 306 | { 307 | "title": "Path: C:\\Windows\\System32", 308 | "description": "Backslashes \\ and dots ..." 309 | }, 310 | { 311 | "title": "Regex chars: .* + ? ^ $ { } ( ) [ ] \\ |", 312 | "description": "All the special regex characters" 313 | } 314 | ] 315 | 316 | for finding in findings_with_special_chars: 317 | # Should not raise regex errors 318 | reason = HardExclusionRules.get_exclusion_reason(finding) 319 | # These don't match any patterns, so should return None 320 | assert reason is None 321 | 322 | def test_performance_with_long_text(self): 323 | """Test performance with very long descriptions.""" 324 | long_text = "A" * 10000 # 10k characters 325 | finding = { 326 | "title": "Long finding", 327 | "description": long_text + " denial of service " + long_text 328 | } 329 | 330 | # Should handle long text efficiently 331 | reason = HardExclusionRules.get_exclusion_reason(finding) 332 | assert reason is not None # Should find DOS pattern 333 | assert "DOS/resource exhaustion" in reason 334 | 335 | def test_memory_safety_exclusion_non_cpp_files(self): 336 | """Test that memory safety issues are excluded in non-C/C++ files.""" 337 | memory_safety_findings = [ 338 | { 339 | "title": "Buffer overflow vulnerability", 340 | "description": "Potential buffer overflow in string handling", 341 | "file": "app.py" 342 | }, 343 | { 344 | "title": "Out of bounds access", 345 | "description": "Array out of bounds write detected", 346 | "file": "server.js" 347 | }, 348 | { 349 | "title": "Memory corruption", 350 | "description": "Use after free vulnerability found", 351 | "file": "Main.java" 352 | }, 353 | { 354 | "title": "Segmentation fault", 355 | "description": "Null pointer dereference causes segfault", 356 | "file": "handler.go" 357 | }, 358 | { 359 | "title": "Integer overflow", 360 | "description": "Integer overflow in calculation", 361 | "file": "calc.rb" 362 | } 363 | ] 364 | 365 | for finding in memory_safety_findings: 366 | reason = HardExclusionRules.get_exclusion_reason(finding) 367 | assert reason is not None 368 | assert "Memory safety finding in non-C/C++ code" in reason 369 | 370 | def test_memory_safety_not_excluded_cpp_files(self): 371 | """Test that memory safety issues are NOT excluded in C/C++ files.""" 372 | cpp_memory_findings = [ 373 | { 374 | "title": "Buffer overflow", 375 | "description": "Stack buffer overflow in strcpy", 376 | "file": "main.c" 377 | }, 378 | { 379 | "title": "Out of bounds write", 380 | "description": "Array index out of bounds", 381 | "file": "parser.cc" 382 | }, 383 | { 384 | "title": "Memory safety", 385 | "description": "Use after free in destructor", 386 | "file": "object.cpp" 387 | }, 388 | { 389 | "title": "Bounds check missing", 390 | "description": "No bounds checking on user input", 391 | "file": "input.h" 392 | } 393 | ] 394 | 395 | for finding in cpp_memory_findings: 396 | reason = HardExclusionRules.get_exclusion_reason(finding) 397 | assert reason is None # Should NOT be excluded 398 | 399 | def test_memory_safety_exclusion_case_insensitive(self): 400 | """Test that file extension checking is case insensitive.""" 401 | findings = [ 402 | { 403 | "title": "Buffer overflow", 404 | "description": "Buffer overflow detected", 405 | "file": "App.PY" # Uppercase extension 406 | }, 407 | { 408 | "title": "Memory corruption", 409 | "description": "Memory corruption issue", 410 | "file": "SERVER.JS" # All uppercase 411 | } 412 | ] 413 | 414 | for finding in findings: 415 | reason = HardExclusionRules.get_exclusion_reason(finding) 416 | assert reason is not None 417 | assert "Memory safety finding in non-C/C++ code" in reason 418 | 419 | def test_memory_safety_no_file_extension(self): 420 | """Test handling of files without extensions.""" 421 | findings = [ 422 | { 423 | "title": "Buffer overflow", 424 | "description": "Buffer overflow detected", 425 | "file": "Makefile" # No extension 426 | }, 427 | { 428 | "title": "Memory corruption", 429 | "description": "Memory corruption issue", 430 | "file": "" # Empty file path 431 | } 432 | ] 433 | 434 | for finding in findings: 435 | reason = HardExclusionRules.get_exclusion_reason(finding) 436 | # Should be excluded since they're not C/C++ files 437 | assert reason is not None 438 | assert "Memory safety finding in non-C/C++ code" in reason --------------------------------------------------------------------------------