├── __init__.py
├── pytest.ini
├── scripts
├── package.json
└── comment-pr-findings.js
├── claudecode
├── audit.py
├── evals
│ ├── __init__.py
│ ├── README.md
│ └── run_eval.py
├── __init__.py
├── requirements.txt
├── constants.py
├── logger.py
├── json_parser.py
├── test_integration.py
├── prompts.py
├── test_json_parser.py
├── test_helper_functions.py
├── test_prompts.py
├── test_github_action_audit.py
├── test_eval_engine.py
├── test_github_client.py
├── findings_filter.py
├── test_claude_runner.py
├── test_findings_conversion.py
└── test_hard_exclusion_rules.py
├── .gitignore
├── .github
└── workflows
│ ├── sast.yml
│ └── test-claudecode.yml
├── examples
├── custom-security-scan-instructions.txt
└── custom-false-positive-filtering.txt
├── LICENSE
├── docs
├── custom-filtering-instructions.md
└── custom-security-scan-instructions.md
├── README.md
├── .claude
└── commands
│ └── security-review.md
└── action.yml
/__init__.py:
--------------------------------------------------------------------------------
1 | # SAST package
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths = tests
3 | python_files = test_*.py
4 | python_classes = Test*
5 | python_functions = test_*
6 | addopts = -v --tb=short
--------------------------------------------------------------------------------
/scripts/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@anthropic-ai/sast-scripts",
3 | "version": "1.0.0",
4 | "description": "Scripts for Anthropic SAST Action",
5 | "scripts": {
6 | "test": "bun test",
7 | "test:watch": "bun test --watch"
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/claudecode/audit.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Main entry point for ClaudeCode audit tool.
4 | This provides a cleaner interface for running the audit.
5 | """
6 |
7 | from claudecode.github_action_audit import main
8 |
9 | if __name__ == "__main__":
10 | main()
--------------------------------------------------------------------------------
/claudecode/evals/__init__.py:
--------------------------------------------------------------------------------
1 | """Evaluation tool for SAST."""
2 |
3 | from .eval_engine import EvalCase, EvalResult, EvaluationEngine, run_single_evaluation
4 |
5 | __all__ = [
6 | 'EvalCase',
7 | 'EvalResult',
8 | 'EvaluationEngine',
9 | 'run_single_evaluation',
10 | ]
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Cache directories
2 | .cache/
3 |
4 | # Python
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 | *.pyc
9 |
10 | # Output files
11 | *.csv
12 | *.json
13 | security_report.*
14 |
15 | # Virtual environments
16 | venv/
17 | env/
18 | .venv/
19 |
20 | # Debug files
21 | claudecode/claudecode-prompt.txt
22 | eval_results/
23 |
--------------------------------------------------------------------------------
/claudecode/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | ClaudeCode - AI-Powered PR Security Audit Tool
3 |
4 | A standalone security audit tool that uses Claude Code for comprehensive
5 | security analysis of GitHub pull requests.
6 | """
7 |
8 | __version__ = "1.0.0"
9 | __author__ = "Anthropic Security Team"
10 |
11 | # Import main components for easier access
12 | from claudecode.github_action_audit import (
13 | GitHubActionClient,
14 | SimpleClaudeRunner,
15 | main
16 | )
17 |
18 | __all__ = [
19 | "GitHubActionClient",
20 | "SimpleClaudeRunner",
21 | "main"
22 | ]
--------------------------------------------------------------------------------
/.github/workflows/sast.yml:
--------------------------------------------------------------------------------
1 | name: SAST
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches:
7 | - main
8 | workflow_dispatch:
9 |
10 | jobs:
11 | security-review:
12 | runs-on: ubuntu-24.04
13 | permissions:
14 | contents: read
15 | pull-requests: write
16 | steps:
17 | - uses: actions/checkout@v4
18 |
19 | - uses: ./ # Points directly to action.yml
20 | with:
21 | comment-pr: true
22 | upload-results: true
23 | exclude-directories: "tests/vulnerable"
24 | claude-api-key: ${{ secrets.CLAUDE_API_KEY }}
25 | run-every-commit: true
--------------------------------------------------------------------------------
/claudecode/requirements.txt:
--------------------------------------------------------------------------------
1 | # Requirements for claudecode - Claude Code PR Security Audit Tool
2 | # Core dependencies for the GitHub Action version
3 |
4 | # GitHub API client
5 | PyGithub>=1.59.0
6 |
7 | # HTTP requests for GitHub API
8 | requests>=2.28.0
9 |
10 | # JSON parsing utilities (no additional deps - uses stdlib)
11 | # prompts.py (no additional deps - uses stdlib)
12 | # findings_filter.py (uses re, built-in)
13 |
14 | # Anthropic SDK for Claude API-based false positive filtering
15 | anthropic>=0.39.0
16 |
17 | # Note: Claude CLI tool must be installed separately
18 | # The claude command-line tool is required for security analysis
--------------------------------------------------------------------------------
/claudecode/constants.py:
--------------------------------------------------------------------------------
1 | """
2 | Constants and configuration values for ClaudeCode.
3 | """
4 |
5 | import os
6 |
7 | # API Configuration
8 | DEFAULT_CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL') or 'claude-opus-4-1-20250805'
9 | DEFAULT_TIMEOUT_SECONDS = 180 # 3 minutes
10 | DEFAULT_MAX_RETRIES = 3
11 | RATE_LIMIT_BACKOFF_MAX = 30 # Maximum backoff time for rate limits
12 |
13 | # Token Limits
14 | PROMPT_TOKEN_LIMIT = 16384 # 16k tokens max for claude-opus-4
15 |
16 | # Exit Codes
17 | EXIT_SUCCESS = 0
18 | EXIT_GENERAL_ERROR = 1
19 | EXIT_CONFIGURATION_ERROR = 2
20 |
21 | # Subprocess Configuration
22 | SUBPROCESS_TIMEOUT = 1200 # 20 minutes for Claude Code execution
23 |
24 |
--------------------------------------------------------------------------------
/examples/custom-security-scan-instructions.txt:
--------------------------------------------------------------------------------
1 | **Compliance-Specific Checks:**
2 | - GDPR Article 17 "Right to Erasure" implementation gaps
3 | - HIPAA PHI encryption at rest violations
4 | - PCI DSS credit card data retention beyond allowed periods
5 | - SOC2 audit trail tampering or deletion capabilities
6 | - CCPA data portability API vulnerabilities
7 |
8 | **Financial Services Security:**
9 | - Transaction replay attacks in payment processing
10 | - Double-spending vulnerabilities in ledger systems
11 | - Interest calculation manipulation through timing attacks
12 | - Regulatory reporting data tampering
13 | - Know Your Customer (KYC) bypass mechanisms
14 |
15 | **E-commerce Specific:**
16 | - Shopping cart manipulation for price changes
17 | - Inventory race conditions allowing overselling
18 | - Coupon/discount stacking exploits
19 | - Affiliate tracking manipulation
20 | - Review system authentication bypass
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Anthropic
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/.github/workflows/test-claudecode.yml:
--------------------------------------------------------------------------------
1 | name: Test ClaudeCode Integration
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches:
7 | - main
8 | workflow_dispatch:
9 |
10 | permissions:
11 | contents: read
12 | pull-requests: read
13 | actions: read
14 | checks: read
15 |
16 | jobs:
17 | test-claudecode:
18 | runs-on: ubuntu-latest
19 |
20 | steps:
21 | - uses: actions/checkout@v4
22 |
23 | - name: Set up Python
24 | uses: actions/setup-python@v4
25 | with:
26 | python-version: '3.10'
27 |
28 | - name: Set up Node.js
29 | uses: actions/setup-node@v4
30 | with:
31 | node-version: '20'
32 |
33 | - name: Install Claude CLI
34 | run: |
35 | npm install -g @anthropic-ai/claude-code
36 |
37 | - name: Install dependencies
38 | run: |
39 | pip install pytest pytest-cov
40 | pip install -r claudecode/requirements.txt
41 |
42 | - name: Run ClaudeCode unit tests
43 | run: |
44 | export PYTHONPATH="${PYTHONPATH}:${PWD}"
45 | pytest claudecode -v --cov=claudecode --cov-report=term-missing
46 |
47 | - name: Install Bun
48 | uses: oven-sh/setup-bun@v2
49 | with:
50 | bun-version: latest
51 |
52 | - name: Install script dependencies
53 | run: |
54 | cd scripts
55 | bun install
56 |
57 | - name: Run comment script tests
58 | run: |
59 | cd scripts
60 | bun test
--------------------------------------------------------------------------------
/claudecode/logger.py:
--------------------------------------------------------------------------------
1 | """Logging configuration for ClaudeCode."""
2 |
3 | import logging
4 | import sys
5 | import os
6 |
7 |
8 | def get_logger(name: str) -> logging.Logger:
9 | """Get a configured logger that outputs to stderr.
10 |
11 | Args:
12 | name: The name of the logger (usually __name__)
13 |
14 | Returns:
15 | Configured logger instance
16 | """
17 | logger = logging.getLogger(name)
18 |
19 | # Only configure if not already configured
20 | if not logger.handlers:
21 | handler = logging.StreamHandler(sys.stderr)
22 |
23 | # Get repo and PR number from environment for prefix
24 | repo_name = os.environ.get('GITHUB_REPOSITORY', '')
25 | pr_number = os.environ.get('PR_NUMBER', '')
26 |
27 | # Build prefix
28 | if repo_name and pr_number:
29 | prefix = f"[{repo_name}#{pr_number}]"
30 | elif repo_name:
31 | prefix = f"[{repo_name}]"
32 | elif pr_number:
33 | prefix = f"[PR#{pr_number}]"
34 | else:
35 | prefix = ""
36 |
37 | # Include prefix in format if available
38 | if prefix:
39 | format_str = f'{prefix} [%(name)s] %(message)s'
40 | else:
41 | format_str = '[%(name)s] %(message)s'
42 |
43 | formatter = logging.Formatter(format_str)
44 | handler.setFormatter(formatter)
45 | logger.addHandler(handler)
46 | logger.setLevel(logging.INFO)
47 |
48 | return logger
--------------------------------------------------------------------------------
/claudecode/evals/README.md:
--------------------------------------------------------------------------------
1 | # SAST Evaluation Tool
2 |
3 | This directory contains a tool for evaluating the SAST (Static Application Security Testing) tool on individual GitHub pull requests.
4 |
5 | ## Overview
6 |
7 | The evaluation tool allows you to run the Claude Code Security Reviewer on any GitHub PR to analyze its security findings. This is useful for:
8 | - Testing the tool on specific PRs
9 | - Evaluating performance and accuracy
10 | - Debugging security analysis issues
11 |
12 | ## Requirements
13 |
14 | - Python 3.9+
15 | - Git 2.20+ (for worktree support)
16 | - GitHub CLI (`gh`) for API access
17 | - Environment variables:
18 | - `ANTHROPIC_API_KEY`: Required for Claude API access
19 | - `GITHUB_TOKEN`: Recommended for GitHub API rate limits
20 |
21 | ## Usage
22 |
23 | Run an evaluation on a single PR:
24 |
25 | ```bash
26 | python -m claudecode.evals.run_eval example/repo#123 --verbose
27 | ```
28 |
29 | ### Command-line Options
30 |
31 | - PR specification: Required positional argument in format `owner/repo#pr_number`
32 | - `--output-dir PATH`: Directory for results (default: `./eval_results`)
33 | - `--work-dir PATH`: Directory where git repositories will be cloned and stored (default: `~/code/audit`)
34 | - `--verbose`: Enable verbose logging to see detailed progress
35 |
36 | ## Output
37 |
38 | The evaluation generates a JSON file in the output directory with:
39 | - Success/failure status
40 | - Runtime metrics
41 | - Security findings count
42 | - Detailed findings with file, line, severity, and descriptions
43 |
44 | Example output file: `pr_example_repo_123.json`
45 |
46 | ## Architecture
47 |
48 | The evaluation tool uses git worktrees for efficient repository management:
49 | 1. Clones the repository once as a base
50 | 2. Creates lightweight worktrees for each PR evaluation
51 | 3. Automatically handles cleanup of worktrees
52 | 4. Runs the SAST audit in the PR-specific worktree
--------------------------------------------------------------------------------
/examples/custom-false-positive-filtering.txt:
--------------------------------------------------------------------------------
1 | HARD EXCLUSIONS - Automatically exclude findings matching these patterns:
2 | 1. All DOS/resource exhaustion - we have k8s resource limits and autoscaling
3 | 2. Missing rate limiting - handled by our API gateway
4 | 3. Tabnabbing vulnerabilities - acceptable risk per our threat model
5 | 4. Test files (ending in _test.go, _test.js, or in __tests__ directories)
6 | 5. Documentation files (*.md, *.rst)
7 | 6. Configuration files that are not exposed to users (internal configs)
8 | 7. Memory safety in Rust, Go, or managed languages
9 | 8. GraphQL introspection queries - we intentionally expose schema in dev
10 | 9. Missing CSRF protection - we use stateless JWT auth exclusively
11 | 10. Timing attacks on non-cryptographic operations
12 | 11. Regex DoS in input validation (we have request timeouts)
13 | 12. Missing security headers in internal services (only public-facing services need them)
14 |
15 | SIGNAL QUALITY CRITERIA - For remaining findings, assess:
16 | 1. Can an unauthenticated external attacker exploit this?
17 | 2. Is there actual data exfiltration or system compromise potential?
18 | 3. Is this exploitable in our production Kubernetes environment?
19 | 4. Does this bypass our API gateway security controls?
20 |
21 | PRECEDENTS -
22 | 1. We use AWS Cognito for all authentication - auth bypass must defeat Cognito
23 | 2. All APIs require valid JWT tokens validated at the gateway level
24 | 3. SQL injection is only valid if using raw queries (we use Prisma ORM everywhere)
25 | 4. All internal services communicate over mTLS within the k8s cluster
26 | 5. Secrets are in AWS Secrets Manager or k8s secrets, never in code
27 | 6. We allow verbose error messages in dev/staging (not production)
28 | 7. File uploads go directly to S3 with presigned URLs (no local file handling)
29 | 8. All user input is considered untrusted and validated on the backend
30 | 9. Frontend validation is only for UX, not security
31 | 10. We use CSP headers and strict Content-Type validation
32 | 11. CORS is configured per-service based on actual needs
33 | 12. All webhooks use HMAC signature verification
--------------------------------------------------------------------------------
/docs/custom-filtering-instructions.md:
--------------------------------------------------------------------------------
1 | # Custom False Positive Filtering Instructions
2 |
3 | The Claude Code Security Reviewer Action supports custom false positive filtering instructions, allowing you to tailor the security analysis to your specific environment and requirements.
4 |
5 | ## Overview
6 |
7 | By default, the SAST action includes a comprehensive set of exclusions and criteria for filtering out false positives. However, every organization has unique security requirements, technology stacks, and threat models. The `false-positive-filtering-instructions` input allows you to provide your own custom criteria.
8 |
9 | ## Usage
10 |
11 | 1. Create a text file containing your custom filtering instructions (e.g., `.github/false-positive-filtering.txt`)
12 | 2. Reference it in your workflow:
13 |
14 | ```yaml
15 | - uses: anthropics/claude-code-security-review@main
16 | with:
17 | false-positive-filtering-instructions: .github/false-positive-filtering.txt
18 | ```
19 |
20 | ## File Format
21 |
22 | The file should contain plain text with three main sections:
23 |
24 | ### 1. HARD EXCLUSIONS
25 | List patterns that should be automatically excluded from findings.
26 |
27 | ### 2. SIGNAL QUALITY CRITERIA
28 | Questions to assess whether a finding represents a real vulnerability.
29 |
30 | ### 3. PRECEDENTS
31 | Specific guidance for common security patterns in your environment.
32 |
33 | ## Example
34 |
35 | See [examples/custom-false-positive-filtering.txt](../examples/custom-false-positive-filtering.txt) for a complete example tailored to a modern cloud-native application.
36 |
37 | ## Default Instructions
38 |
39 | If no custom file is provided, the action uses default instructions tuned to work well for most applications.
40 |
41 | ## Best Practices
42 |
43 | 1. **Start with defaults**: Begin with the default instructions and modify based on false positives you encounter
44 | 2. **Be specific**: Include details about your security architecture (e.g., "We use AWS Cognito for all authentication")
45 | 3. **Document assumptions**: Explain why certain patterns are excluded (e.g., "k8s resource limits prevent DOS")
46 | 4. **Version control**: Track changes to your filtering instructions alongside your code
47 | 5. **Team review**: Have your security team review and approve the filtering instructions
48 |
49 | ## Common Customizations
50 |
51 | - **Technology-specific exclusions**: Exclude findings that don't apply to your tech stack
52 | - **Infrastructure assumptions**: Document security controls at the infrastructure level
53 | - **Compliance requirements**: Adjust criteria based on your compliance needs
54 | - **Development practices**: Reflect your team's security practices and tooling
--------------------------------------------------------------------------------
/claudecode/json_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Utilities for parsing JSON from text output."""
3 |
4 | import json
5 | import re
6 | import logging
7 |
8 | # Configure logging
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | def extract_json_from_text(text):
13 | """
14 | Extract JSON object from text, looking in various formats and locations.
15 |
16 | Args:
17 | text: The text that may contain JSON
18 |
19 | Returns:
20 | dict: Parsed JSON object if found, None otherwise
21 | """
22 | try:
23 | # First, try to extract JSON from markdown code blocks (with or without language tag)
24 | json_matches = [
25 | re.search(r'```json\s*(.*?)\s*```', text, re.DOTALL),
26 | re.search(r'```\s*(\{.*?\})\s*```', text, re.DOTALL)
27 | ]
28 |
29 | for json_match in json_matches:
30 | if json_match:
31 | try:
32 | return json.loads(json_match.group(1))
33 | except json.JSONDecodeError:
34 | continue
35 |
36 | # If no JSON found in code blocks, try to find JSON objects anywhere in the text
37 | # Find all potential JSON objects (looking for balanced braces)
38 | brace_count = 0
39 | json_start = -1
40 | for i, char in enumerate(text):
41 | if char == '{':
42 | if brace_count == 0:
43 | json_start = i
44 | brace_count += 1
45 | elif char == '}':
46 | brace_count -= 1
47 | if brace_count == 0 and json_start != -1:
48 | # Found a complete JSON object
49 | potential_json = text[json_start:i+1]
50 | try:
51 | return json.loads(potential_json)
52 | except json.JSONDecodeError:
53 | # This wasn't valid JSON, continue looking
54 | continue
55 | except Exception:
56 | pass
57 |
58 | return None
59 |
60 |
61 | def parse_json_with_fallbacks(text, error_context=""):
62 | """
63 | Parse JSON from text with multiple fallback strategies and error handling.
64 |
65 | Args:
66 | text: The text to parse
67 | error_context: Context string for error messages
68 |
69 | Returns:
70 | tuple: (success, result) where result is either the parsed JSON dict or error info
71 | """
72 | try:
73 | # First, try direct JSON parsing
74 | return True, json.loads(text)
75 | except json.JSONDecodeError:
76 | pass
77 |
78 | # Try extracting JSON from text
79 | extracted_json = extract_json_from_text(text)
80 | if extracted_json:
81 | return True, extracted_json
82 |
83 | # If all parsing failed, return error info
84 | error_msg = "Failed to parse JSON"
85 | if error_context:
86 | error_msg = f"{error_context}: {error_msg}"
87 |
88 | logger.error(f"{error_msg}. Raw output: {repr(text)}")
89 | return False, {"error": f"Invalid JSON response -- raw output: {repr(text)}"}
--------------------------------------------------------------------------------
/docs/custom-security-scan-instructions.md:
--------------------------------------------------------------------------------
1 | # Custom Security Scan Instructions
2 |
3 | The Claude Code Security Reviewer Action supports custom security scan instructions, allowing you to add organization-specific vulnerability categories to the security audit.
4 |
5 | ## Overview
6 |
7 | The default security scan covers common vulnerability categories like SQL injection, XSS, authentication issues, etc. However, organizations often have specific security concerns based on their:
8 | - Technology stack (GraphQL, gRPC, specific cloud providers)
9 | - Compliance requirements (GDPR, HIPAA, PCI DSS)
10 | - Industry-specific vulnerabilities (financial services, healthcare)
11 | - Custom frameworks and libraries
12 |
13 | The `custom-security-scan-instructions` input allows you to extend the security categories that Claude checks for.
14 |
15 | ## Usage
16 |
17 | 1. Create a text file containing your custom security categories (e.g., `.github/custom-security-categories.txt`)
18 | 2. Reference it in your workflow:
19 |
20 | ```yaml
21 | - uses: anthropics/claude-code-security-review@main
22 | with:
23 | custom-security-scan-instructions: .github/custom-security-categories.txt
24 | ```
25 |
26 | ## File Format
27 |
28 | The file should contain additional security categories in the same format as the default categories. Each category should:
29 | - Start with a descriptive header in bold (using `**Category Name:**`)
30 | - List specific vulnerabilities or patterns to check for
31 | - Use clear, actionable descriptions
32 |
33 | ### Example Structure:
34 | ```
35 | **Category Name:**
36 | - Specific vulnerability or pattern to check
37 | - Another specific issue to look for
38 | - Detailed description of what constitutes this vulnerability
39 |
40 | **Another Category:**
41 | - More specific checks
42 | - Additional patterns to identify
43 | ```
44 |
45 | ## Examples
46 |
47 | ### Industry-Specific Example
48 | See [examples/organization-specific-scan-instructions.txt](../examples/custom-security-scan-instructions.txt) for an example set of instructions that customize Claude Code to look for industry-specific security weaknesses including:
49 | - Compliance checks (GDPR, HIPAA, PCI DSS)
50 | - Financial services security
51 | - E-commerce specific issues
52 |
53 | ## How It Works
54 |
55 | Your custom instructions are appended to the security audit prompt after the default "Data Exposure" category. This means:
56 | 1. All default categories are still checked
57 | 2. Your custom categories extend (not replace) the default scan
58 | 3. The same HIGH/MEDIUM/LOW severity guidelines apply
59 |
60 | ## Best Practices
61 |
62 | 1. **Be Specific**: Provide clear descriptions of what constitutes each vulnerability
63 | 2. **Include Context**: Explain why something is a vulnerability in your environment
64 | 3. **Provide Examples**: Where possible, describe specific attack scenarios
65 | 4. **Avoid Duplicates**: Check the default categories to avoid redundancy
66 | 5. **Keep It Focused**: Only add categories relevant to your codebase
67 |
68 | ## Default Categories Reference
69 |
70 | The default scan already includes:
71 | - Input Validation (SQL injection, command injection, XXE, etc.)
72 | - Authentication & Authorization
73 | - Crypto & Secrets Management
74 | - Injection & Code Execution
75 | - Data Exposure
76 |
77 | Your custom categories should complement these, not duplicate them.
78 |
79 | ## Tips for Writing Effective Categories
80 |
81 | 1. **Technology-Specific**: Add checks for your specific tech stack
82 | ```
83 | **GraphQL Security:**
84 | - Query depth attacks allowing unbounded recursion
85 | - Field-level authorization bypass
86 | - Introspection data leakage in production
87 | ```
88 |
89 | 2. **Compliance-Focused**: Add regulatory requirements
90 | ```
91 | **GDPR Compliance:**
92 | - Personal data processing without consent mechanisms
93 | - Missing data retention limits
94 | - Lack of data portability APIs
95 | ```
96 |
97 | 3. **Business Logic**: Add domain-specific vulnerabilities
98 | ```
99 | **Payment Processing:**
100 | - Transaction replay vulnerabilities
101 | - Currency conversion manipulation
102 | - Refund process bypass
103 | ```
--------------------------------------------------------------------------------
/claudecode/test_integration.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Pytest tests for SAST integration components.
4 | """
5 |
6 | import pytest
7 | import json
8 |
9 | class TestClaudeCodeAudit:
10 | """Test the main audit functionality."""
11 |
12 | @pytest.fixture
13 | def mock_env(self, monkeypatch):
14 | """Set up mock environment variables."""
15 | monkeypatch.setenv('GITHUB_REPOSITORY', 'test/repo')
16 | monkeypatch.setenv('PR_NUMBER', '123')
17 | monkeypatch.setenv('GITHUB_TOKEN', 'mock-token')
18 | monkeypatch.setenv('ANTHROPIC_API_KEY', 'mock-api-key')
19 |
20 | def test_missing_environment_variables(self, monkeypatch, capsys):
21 | """Test behavior with missing environment variables."""
22 | from claudecode import github_action_audit
23 |
24 | # Test missing GITHUB_REPOSITORY
25 | monkeypatch.delenv('GITHUB_REPOSITORY', raising=False)
26 | with pytest.raises(SystemExit) as exc_info:
27 | github_action_audit.main()
28 | assert exc_info.value.code == 2 # EXIT_CONFIGURATION_ERROR
29 | captured = capsys.readouterr()
30 | output = json.loads(captured.out)
31 | assert 'GITHUB_REPOSITORY' in output['error']
32 |
33 | # Test missing PR_NUMBER
34 | monkeypatch.setenv('GITHUB_REPOSITORY', 'test/repo')
35 | monkeypatch.delenv('PR_NUMBER', raising=False)
36 | with pytest.raises(SystemExit) as exc_info:
37 | github_action_audit.main()
38 | assert exc_info.value.code == 2 # EXIT_CONFIGURATION_ERROR
39 | captured = capsys.readouterr()
40 | output = json.loads(captured.out)
41 | assert 'PR_NUMBER' in output['error']
42 |
43 | def test_invalid_pr_number(self, monkeypatch, capsys):
44 | """Test behavior with invalid PR number."""
45 | from claudecode import github_action_audit
46 |
47 | monkeypatch.setenv('GITHUB_REPOSITORY', 'test/repo')
48 | monkeypatch.setenv('PR_NUMBER', 'invalid')
49 | monkeypatch.setenv('GITHUB_TOKEN', 'mock-token')
50 |
51 | with pytest.raises(SystemExit) as exc_info:
52 | github_action_audit.main()
53 | assert exc_info.value.code == 2 # EXIT_CONFIGURATION_ERROR
54 | captured = capsys.readouterr()
55 | output = json.loads(captured.out)
56 | assert 'Invalid PR_NUMBER' in output['error']
57 |
58 |
59 | class TestEnvironmentSetup:
60 | """Test environment setup and configuration."""
61 |
62 | def test_anthropic_api_key_handling(self, monkeypatch):
63 | """Test handling of Anthropic API key."""
64 | from claudecode.github_action_audit import SimpleClaudeRunner
65 |
66 | runner = SimpleClaudeRunner()
67 |
68 | # Test with API key set
69 | monkeypatch.setenv('ANTHROPIC_API_KEY', 'test-key')
70 | valid, error = runner.validate_claude_available()
71 | # Note: This will fail if claude CLI is not installed, which is OK
72 | if not valid and 'not installed' in error:
73 | pytest.fail("Claude CLI not installed")
74 |
75 | # Test without API key
76 | monkeypatch.delenv('ANTHROPIC_API_KEY', raising=False)
77 | valid, error = runner.validate_claude_available()
78 | if 'not installed' not in error:
79 | assert not valid
80 | assert 'ANTHROPIC_API_KEY' in error
81 |
82 |
83 | class TestFilteringIntegration:
84 | """Test the filtering system integration."""
85 |
86 | def test_full_filter_with_llm_disabled(self):
87 | """Test FindingsFilter with LLM filtering disabled."""
88 | from claudecode.findings_filter import FindingsFilter
89 |
90 | # Create filter with LLM disabled
91 | filter_instance = FindingsFilter(
92 | use_hard_exclusions=True,
93 | use_claude_filtering=False
94 | )
95 |
96 | test_findings = [
97 | {'description': 'SQL injection vulnerability', 'severity': 'HIGH'},
98 | {'description': 'Missing rate limiting', 'severity': 'MEDIUM'},
99 | ]
100 |
101 | success, results, stats = filter_instance.filter_findings(test_findings)
102 |
103 | assert success is True
104 | assert stats.total_findings == 2
105 | assert stats.kept_findings == 1 # Only SQL injection
106 | assert stats.hard_excluded == 1 # Rate limiting
107 | assert stats.claude_excluded == 0 # No Claude filtering
108 |
--------------------------------------------------------------------------------
/claudecode/evals/run_eval.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """CLI for running SAST evaluation on a single PR."""
3 |
4 | import argparse
5 | import os
6 | import sys
7 | import json
8 | from pathlib import Path
9 | from typing import Dict, Any, Optional, List
10 | from dataclasses import dataclass, asdict
11 |
12 | # Import the minimal required functionality
13 |
14 |
15 | @dataclass
16 | class EvalCase:
17 | """Single evaluation test case."""
18 | repo_name: str
19 | pr_number: int
20 | description: str = ""
21 |
22 |
23 | @dataclass
24 | class EvalResult:
25 | """Result of a single evaluation."""
26 | repo_name: str
27 | pr_number: int
28 | description: str
29 |
30 | # Evaluation results
31 | success: bool
32 | runtime_seconds: float
33 | findings_count: int
34 | detected_vulnerabilities: bool
35 |
36 | # Optional fields
37 | error_message: str = ""
38 | findings_summary: Optional[List[Dict[str, Any]]] = None
39 | full_findings: Optional[List[Dict[str, Any]]] = None
40 |
41 | def to_dict(self) -> Dict[str, Any]:
42 | """Convert to dictionary for JSON serialization."""
43 | return asdict(self)
44 |
45 |
46 | def main():
47 | """Main entry point for single PR SAST evaluation."""
48 | parser = argparse.ArgumentParser(
49 | description="Run SAST security evaluation on a single GitHub PR",
50 | formatter_class=argparse.ArgumentDefaultsHelpFormatter
51 | )
52 |
53 | parser.add_argument(
54 | "pr",
55 | type=str,
56 | help="PR to evaluate in format 'repo_owner/repo_name#pr_number' (e.g., 'example/repo#123')"
57 | )
58 |
59 | parser.add_argument(
60 | "--output-dir",
61 | type=str,
62 | default="./eval_results",
63 | help="Directory for evaluation results"
64 | )
65 |
66 | parser.add_argument(
67 | "--work-dir",
68 | type=str,
69 | default=None,
70 | help="Directory for temporary repositories (defaults to ~/code/audit)"
71 | )
72 |
73 | parser.add_argument(
74 | "--verbose",
75 | action="store_true",
76 | help="Enable verbose logging"
77 | )
78 |
79 |
80 | args = parser.parse_args()
81 |
82 | # Set EVAL_MODE=1 automatically for evaluation runs
83 | os.environ['EVAL_MODE'] = '1'
84 |
85 | # Check for required environment variables
86 | if not os.environ.get('ANTHROPIC_API_KEY'):
87 | print("Error: ANTHROPIC_API_KEY environment variable is not set")
88 | print("Please set it before running the evaluation")
89 | sys.exit(1)
90 |
91 |
92 | # Parse the PR specification
93 | try:
94 | repo_part, pr_number = args.pr.split('#')
95 | pr_number = int(pr_number)
96 | # Validate repository format
97 | if '/' not in repo_part or len(repo_part.split('/')) != 2:
98 | raise ValueError("Repository must be in format 'owner/repo'")
99 | owner, repo = repo_part.split('/')
100 | if not owner or not repo:
101 | raise ValueError("Repository owner and name cannot be empty")
102 | except ValueError as e:
103 | print(f"Error: Invalid PR format '{args.pr}': {e}")
104 | print("Expected format: 'repo_owner/repo_name#pr_number'")
105 | print("Example: 'example/repo#123'")
106 | sys.exit(1)
107 |
108 | print(f"\nEvaluating PR: {repo_part}#{pr_number}")
109 | print("-" * 60)
110 |
111 | # Create test case
112 | test_case = EvalCase(
113 | repo_name=repo_part,
114 | pr_number=pr_number,
115 | description=f"Evaluation for {repo_part}#{pr_number}"
116 | )
117 |
118 | # Import and run the evaluation
119 | from .eval_engine import run_single_evaluation
120 |
121 | # Run the evaluation
122 | result = run_single_evaluation(test_case, verbose=args.verbose, work_dir=args.work_dir)
123 |
124 | # Display results
125 | print("\n" + "=" * 60)
126 | print("EVALUATION RESULTS:")
127 | print(f"Success: {result.success}")
128 | print(f"Runtime: {result.runtime_seconds:.1f} seconds")
129 | print(f"Vulnerabilities detected: {result.detected_vulnerabilities}")
130 | print(f"Findings count: {result.findings_count}")
131 |
132 | if result.error_message:
133 | print(f"Error: {result.error_message}")
134 |
135 | if result.full_findings:
136 | print("\nFindings:")
137 | for finding in result.full_findings:
138 | print(f" - [{finding.get('severity', 'UNKNOWN')}] {finding.get('file', 'unknown')}:{finding.get('line', '?')}")
139 | if 'category' in finding:
140 | print(f" Category: {finding['category']}")
141 | if 'description' in finding:
142 | print(f" Description: {finding['description']}")
143 | if 'exploit_scenario' in finding:
144 | print(f" Exploit: {finding['exploit_scenario']}")
145 | if 'recommendation' in finding:
146 | print(f" Fix: {finding['recommendation']}")
147 | if 'confidence' in finding:
148 | print(f" Confidence: {finding['confidence']}")
149 | print() # Empty line between findings
150 | elif result.findings_summary:
151 | # Fallback to summary if full findings not available
152 | print("\nFindings:")
153 | for finding in result.findings_summary:
154 | print(f" - [{finding.get('severity', 'UNKNOWN')}] {finding.get('file', 'unknown')}:{finding.get('line', '?')}")
155 | if 'title' in finding and finding['title'] != 'Unknown':
156 | print(f" {finding['title']}")
157 | if 'description' in finding and finding['description'] != 'Unknown':
158 | print(f" {finding['description']}")
159 |
160 | # Save result to output directory
161 | output_path = Path(args.output_dir)
162 | output_path.mkdir(parents=True, exist_ok=True)
163 | result_file = output_path / f"pr_{repo_part.replace('/', '_')}_{pr_number}.json"
164 |
165 | with open(result_file, 'w') as f:
166 | json.dump(result.to_dict(), f, indent=2)
167 |
168 | print(f"\nResult saved to: {result_file}")
169 |
170 | # Exit with appropriate code
171 | sys.exit(0 if result.success else 1)
172 |
173 |
174 | if __name__ == "__main__":
175 | main()
--------------------------------------------------------------------------------
/claudecode/prompts.py:
--------------------------------------------------------------------------------
1 | """Security audit prompt templates."""
2 |
3 | def get_security_audit_prompt(pr_data, pr_diff=None, include_diff=True, custom_scan_instructions=None):
4 | """Generate security audit prompt for Claude Code.
5 |
6 | Args:
7 | pr_data: PR data dictionary from GitHub API
8 | pr_diff: Optional complete PR diff in unified format
9 | include_diff: Whether to include the diff in the prompt (default: True)
10 | custom_scan_instructions: Optional custom security categories to append
11 |
12 | Returns:
13 | Formatted prompt string
14 | """
15 |
16 | files_changed = "\n".join([f"- {f['filename']}" for f in pr_data['files']])
17 |
18 | # Add diff section if provided and include_diff is True
19 | diff_section = ""
20 | if pr_diff and include_diff:
21 | diff_section = f"""
22 |
23 | PR DIFF CONTENT:
24 | ```
25 | {pr_diff}
26 | ```
27 |
28 | Review the complete diff above. This contains all code changes in the PR.
29 | """
30 | elif pr_diff and not include_diff:
31 | diff_section = """
32 |
33 | NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR.
34 | """
35 |
36 | # Add custom security categories if provided
37 | custom_categories_section = ""
38 | if custom_scan_instructions:
39 | custom_categories_section = f"\n{custom_scan_instructions}\n"
40 |
41 | return f"""
42 | You are a senior security engineer conducting a focused security review of GitHub PR #{pr_data['number']}: "{pr_data['title']}"
43 |
44 | CONTEXT:
45 | - Repository: {pr_data.get('head', {}).get('repo', {}).get('full_name', 'unknown')}
46 | - Author: {pr_data['user']}
47 | - Files changed: {pr_data['changed_files']}
48 | - Lines added: {pr_data['additions']}
49 | - Lines deleted: {pr_data['deletions']}
50 |
51 | Files modified:
52 | {files_changed}{diff_section}
53 |
54 | OBJECTIVE:
55 | Perform a security-focused code review to identify HIGH-CONFIDENCE security vulnerabilities that could have real exploitation potential. This is not a general code review - focus ONLY on security implications newly added by this PR. Do not comment on existing security concerns.
56 |
57 | CRITICAL INSTRUCTIONS:
58 | 1. MINIMIZE FALSE POSITIVES: Only flag issues where you're >80% confident of actual exploitability
59 | 2. AVOID NOISE: Skip theoretical issues, style concerns, or low-impact findings
60 | 3. FOCUS ON IMPACT: Prioritize vulnerabilities that could lead to unauthorized access, data breaches, or system compromise
61 | 4. EXCLUSIONS: Do NOT report the following issue types:
62 | - Denial of Service (DOS) vulnerabilities, even if they allow service disruption
63 | - Secrets or sensitive data stored on disk (these are handled by other processes)
64 | - Rate limiting or resource exhaustion issues
65 |
66 | SECURITY CATEGORIES TO EXAMINE:
67 |
68 | **Input Validation Vulnerabilities:**
69 | - SQL injection via unsanitized user input
70 | - Command injection in system calls or subprocesses
71 | - XXE injection in XML parsing
72 | - Template injection in templating engines
73 | - NoSQL injection in database queries
74 | - Path traversal in file operations
75 |
76 | **Authentication & Authorization Issues:**
77 | - Authentication bypass logic
78 | - Privilege escalation paths
79 | - Session management flaws
80 | - JWT token vulnerabilities
81 | - Authorization logic bypasses
82 |
83 | **Crypto & Secrets Management:**
84 | - Hardcoded API keys, passwords, or tokens
85 | - Weak cryptographic algorithms or implementations
86 | - Improper key storage or management
87 | - Cryptographic randomness issues
88 | - Certificate validation bypasses
89 |
90 | **Injection & Code Execution:**
91 | - Remote code execution via deseralization
92 | - Pickle injection in Python
93 | - YAML deserialization vulnerabilities
94 | - Eval injection in dynamic code execution
95 | - XSS vulnerabilities in web applications (reflected, stored, DOM-based)
96 |
97 | **Data Exposure:**
98 | - Sensitive data logging or storage
99 | - PII handling violations
100 | - API endpoint data leakage
101 | - Debug information exposure
102 | {custom_categories_section}
103 | Additional notes:
104 | - Even if something is only exploitable from the local network, it can still be a HIGH severity issue
105 |
106 | ANALYSIS METHODOLOGY:
107 |
108 | Phase 1 - Repository Context Research (Use file search tools):
109 | - Identify existing security frameworks and libraries in use
110 | - Look for established secure coding patterns in the codebase
111 | - Examine existing sanitization and validation patterns
112 | - Understand the project's security model and threat model
113 |
114 | Phase 2 - Comparative Analysis:
115 | - Compare new code changes against existing security patterns
116 | - Identify deviations from established secure practices
117 | - Look for inconsistent security implementations
118 | - Flag code that introduces new attack surfaces
119 |
120 | Phase 3 - Vulnerability Assessment:
121 | - Examine each modified file for security implications
122 | - Trace data flow from user inputs to sensitive operations
123 | - Look for privilege boundaries being crossed unsafely
124 | - Identify injection points and unsafe deserialization
125 |
126 | REQUIRED OUTPUT FORMAT:
127 |
128 | You MUST output your findings as structured JSON with this exact schema:
129 |
130 | {{
131 | "findings": [
132 | {{
133 | "file": "path/to/file.py",
134 | "line": 42,
135 | "severity": "HIGH",
136 | "category": "sql_injection",
137 | "description": "User input passed to SQL query without parameterization",
138 | "exploit_scenario": "Attacker could extract database contents by manipulating the 'search' parameter with SQL injection payloads like '1; DROP TABLE users--'",
139 | "recommendation": "Replace string formatting with parameterized queries using SQLAlchemy or equivalent",
140 | "confidence": 0.95
141 | }}
142 | ],
143 | "analysis_summary": {{
144 | "files_reviewed": 8,
145 | "high_severity": 1,
146 | "medium_severity": 0,
147 | "low_severity": 0,
148 | "review_completed": true,
149 | }}
150 | }}
151 |
152 | SEVERITY GUIDELINES:
153 | - **HIGH**: Directly exploitable vulnerabilities leading to RCE, data breach, or authentication bypass
154 | - **MEDIUM**: Vulnerabilities requiring specific conditions but with significant impact
155 | - **LOW**: Defense-in-depth issues or lower-impact vulnerabilities
156 |
157 | CONFIDENCE SCORING:
158 | - 0.9-1.0: Certain exploit path identified, tested if possible
159 | - 0.8-0.9: Clear vulnerability pattern with known exploitation methods
160 | - 0.7-0.8: Suspicious pattern requiring specific conditions to exploit
161 | - Below 0.7: Don't report (too speculative)
162 |
163 | FINAL REMINDER:
164 | Focus on HIGH and MEDIUM findings only. Better to miss some theoretical issues than flood the report with false positives. Each finding should be something a security engineer would confidently raise in a PR review.
165 |
166 | IMPORTANT EXCLUSIONS - DO NOT REPORT:
167 | - Denial of Service (DOS) vulnerabilities or resource exhaustion attacks
168 | - Secrets/credentials stored on disk (these are managed separately)
169 | - Rate limiting concerns or service overload scenarios. Services do not need to implement rate limiting.
170 | - Memory consumption or CPU exhaustion issues.
171 | - Lack of input validation on non-security-critical fields. If there isn't a proven problem from a lack of input validation, don't report it.
172 |
173 | Begin your analysis now. Use the repository exploration tools to understand the codebase context, then analyze the PR changes for security implications.
174 |
175 | Your final reply must contain the JSON and nothing else. You should not reply again after outputting the JSON.
176 | """
--------------------------------------------------------------------------------
/claudecode/test_json_parser.py:
--------------------------------------------------------------------------------
1 | """Unit tests for the json_parser module."""
2 |
3 | import json
4 | from typing import Any, Dict
5 | from claudecode.json_parser import parse_json_with_fallbacks, extract_json_from_text
6 |
7 |
8 | class TestJsonParser:
9 | """Test JSON parsing utilities."""
10 |
11 | def test_parse_valid_json(self):
12 | """Test parsing valid JSON string."""
13 | valid_json = '{"key": "value", "number": 42, "array": [1, 2, 3]}'
14 | success, result = parse_json_with_fallbacks(valid_json)
15 |
16 | assert success is True
17 | assert result == {"key": "value", "number": 42, "array": [1, 2, 3]}
18 |
19 | def test_parse_json_with_whitespace(self):
20 | """Test parsing JSON with extra whitespace."""
21 | json_with_spaces = ' \n {"key": "value"} \n '
22 | success, result = parse_json_with_fallbacks(json_with_spaces)
23 |
24 | assert success is True
25 | assert result == {"key": "value"}
26 |
27 | def test_parse_empty_json_object(self):
28 | """Test parsing empty JSON object."""
29 | success, result = parse_json_with_fallbacks('{}')
30 | assert success is True
31 | assert result == {}
32 |
33 | def test_parse_empty_json_array(self):
34 | """Test parsing empty JSON array."""
35 | success, result = parse_json_with_fallbacks('[]')
36 | assert success is True
37 | assert result == []
38 |
39 | def test_parse_nested_json(self):
40 | """Test parsing nested JSON structures."""
41 | nested_json = '''
42 | {
43 | "level1": {
44 | "level2": {
45 | "level3": ["a", "b", "c"]
46 | }
47 | }
48 | }
49 | '''
50 | success, result = parse_json_with_fallbacks(nested_json)
51 |
52 | assert success is True
53 | assert isinstance(result, dict)
54 | # Type narrowing for pyright
55 | result_dict: Dict[str, Any] = result
56 | assert result_dict["level1"]["level2"]["level3"] == ["a", "b", "c"]
57 |
58 | def test_parse_json_with_unicode(self):
59 | """Test parsing JSON with unicode characters."""
60 | unicode_json = '{"emoji": "🔒", "text": "Hello λ world"}'
61 | success, result = parse_json_with_fallbacks(unicode_json)
62 |
63 | assert success is True
64 | assert result["emoji"] == "🔒"
65 | assert result["text"] == "Hello λ world"
66 |
67 | def test_parse_json_with_escaped_characters(self):
68 | """Test parsing JSON with escaped characters."""
69 | escaped_json = '{"path": "C:\\\\Users\\\\test", "quote": "\\"Hello\\""}'
70 | success, result = parse_json_with_fallbacks(escaped_json)
71 |
72 | assert success is True
73 | assert result["path"] == "C:\\Users\\test"
74 | assert result["quote"] == '"Hello"'
75 |
76 | def test_extract_json_from_text_with_backticks(self):
77 | """Test extracting JSON from markdown code blocks."""
78 | text_with_json = '''
79 | Here is some text before the JSON:
80 |
81 | ```json
82 | {"extracted": true, "value": 123}
83 | ```
84 |
85 | And some text after.
86 | '''
87 | result = extract_json_from_text(text_with_json)
88 |
89 | assert result == {"extracted": True, "value": 123}
90 |
91 | def test_extract_json_from_text_without_backticks(self):
92 | """Test extracting JSON from plain text."""
93 | text_with_json = '''
94 | Some text before
95 | {"plain": "json", "number": 456}
96 | Some text after
97 | '''
98 | result = extract_json_from_text(text_with_json)
99 |
100 | assert result == {"plain": "json", "number": 456}
101 |
102 | def test_extract_json_array_from_text(self):
103 | """Test extracting JSON array from text (currently not supported)."""
104 | text_with_array = '''
105 | Results:
106 | [{"id": 1}, {"id": 2}, {"id": 3}]
107 | Done.
108 | '''
109 | result = extract_json_from_text(text_with_array)
110 |
111 | # The function currently only extracts objects, not arrays
112 | # It should extract the first object it finds
113 | assert result == {"id": 1}
114 |
115 | def test_extract_json_with_multiple_blocks(self):
116 | """Test extracting JSON when multiple JSON blocks exist."""
117 | text_with_multiple = '''
118 | First block:
119 | {"first": true}
120 |
121 | Second block:
122 | {"second": true, "larger": "block"}
123 | '''
124 | # Should extract the first valid JSON block found
125 | result = extract_json_from_text(text_with_multiple)
126 |
127 | assert result == {"first": True} or result == {"second": True, "larger": "block"}
128 |
129 | def test_parse_invalid_json_returns_error(self):
130 | """Test parsing invalid JSON returns error."""
131 | invalid_jsons = [
132 | '{invalid json}',
133 | '{"unclosed": "string}',
134 | '{"trailing": "comma",}',
135 | '{unquoted: key}',
136 | 'not json at all',
137 | ''
138 | ]
139 |
140 | for invalid in invalid_jsons:
141 | success, result = parse_json_with_fallbacks(invalid)
142 | assert success is False
143 | assert "error" in result
144 |
145 | def test_extract_json_from_text_no_json(self):
146 | """Test extracting JSON from text with no JSON returns None."""
147 | texts_without_json = [
148 | 'This is just plain text',
149 | '```python\nprint("hello")\n```',
150 | '',
151 | None
152 | ]
153 |
154 | for text in texts_without_json:
155 | result = extract_json_from_text(text)
156 | assert result is None
157 |
158 | def test_parse_json_with_comments(self):
159 | """Test parsing JSON that might have comments (should fail)."""
160 | json_with_comments = '''
161 | {
162 | // This is a comment
163 | "key": "value"
164 | }
165 | '''
166 | success, result = parse_json_with_fallbacks(json_with_comments)
167 | assert success is False # Standard JSON doesn't support comments
168 | assert "error" in result
169 |
170 | def test_extract_json_with_syntax_errors_in_text(self):
171 | """Test extracting JSON when there are syntax errors in surrounding text."""
172 | text = '''
173 | Here's some code with errors: print(
174 |
175 | But the JSON is valid:
176 | {"valid": "json", "number": 789}
177 |
178 | More broken code: }{][
179 | '''
180 | result = extract_json_from_text(text)
181 |
182 | assert result == {"valid": "json", "number": 789}
183 |
184 | def test_large_json_parsing(self):
185 | """Test parsing large JSON structures."""
186 | large_json = {
187 | "findings": [
188 | {
189 | "id": i,
190 | "title": f"Finding {i}",
191 | "description": f"Description for finding {i}",
192 | "severity": "medium",
193 | "file": f"/path/to/file{i}.py",
194 | "line": i * 10
195 | }
196 | for i in range(100)
197 | ]
198 | }
199 |
200 | json_string = json.dumps(large_json)
201 | success, result = parse_json_with_fallbacks(json_string)
202 |
203 | assert success is True
204 | assert result == large_json
205 | assert len(result["findings"]) == 100
206 |
207 | def test_json_with_special_characters_in_strings(self):
208 | """Test JSON with special characters in string values."""
209 | special_json = {
210 | "newline": "line1\nline2",
211 | "tab": "before\tafter",
212 | "backslash": "path\\to\\file",
213 | "quotes": 'He said "Hello"',
214 | "unicode": "café ☕",
215 | "emoji": "🔒 Security 🛡️"
216 | }
217 |
218 | json_string = json.dumps(special_json)
219 | success, result = parse_json_with_fallbacks(json_string)
220 |
221 | assert success is True
222 | assert result == special_json
223 |
224 | def test_extract_json_from_nested_code_blocks(self):
225 | """Test extracting JSON from nested code blocks."""
226 | text = '''
227 | Here's a code block within text:
228 |
229 | ```
230 | Some other code
231 | ```json
232 | {"nested": "json"}
233 | ```
234 | ```
235 | '''
236 | result = extract_json_from_text(text)
237 |
238 | # Should be able to extract the JSON
239 | assert result == {"nested": "json"}
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Claude Code Security Reviewer
2 |
3 | An AI-powered security review GitHub Action using Claude to analyze code changes for security vulnerabilities. This action provides intelligent, context-aware security analysis for pull requests using Anthropic's Claude Code tool for deep semantic security analysis. See our blog post [here](https://www.anthropic.com/news/automate-security-reviews-with-claude-code) for more details.
4 |
5 | ## Features
6 |
7 | - **AI-Powered Analysis**: Uses Claude's advanced reasoning to detect security vulnerabilities with deep semantic understanding
8 | - **Diff-Aware Scanning**: For PRs, only analyzes changed files
9 | - **PR Comments**: Automatically comments on PRs with security findings
10 | - **Contextual Understanding**: Goes beyond pattern matching to understand code semantics
11 | - **Language Agnostic**: Works with any programming language
12 | - **False Positive Filtering**: Advanced filtering to reduce noise and focus on real vulnerabilities
13 |
14 | ## Quick Start
15 |
16 | Add this to your repository's `.github/workflows/security.yml`:
17 |
18 | ```yaml
19 | name: Security Review
20 |
21 | permissions:
22 | pull-requests: write # Needed for leaving PR comments
23 | contents: read
24 |
25 | on:
26 | pull_request:
27 |
28 | jobs:
29 | security:
30 | runs-on: ubuntu-latest
31 | steps:
32 | - uses: actions/checkout@v4
33 | with:
34 | ref: ${{ github.event.pull_request.head.sha || github.sha }}
35 | fetch-depth: 2
36 |
37 | - uses: anthropics/claude-code-security-review@main
38 | with:
39 | comment-pr: true
40 | claude-api-key: ${{ secrets.CLAUDE_API_KEY }}
41 | ```
42 |
43 | ## Security Considerations
44 |
45 | This action is not hardened against prompt injection attacks and should only be used to review trusted PRs. We recommend [configuring your repository](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/enabling-features-for-your-repository/managing-github-actions-settings-for-a-repository#controlling-changes-from-forks-to-workflows-in-public-repositories) to use the "Require approval for all external contributors" option to ensure workflows only run after a maintainer has reviewed the PR.
46 |
47 | ## Configuration Options
48 |
49 | ### Action Inputs
50 |
51 | | Input | Description | Default | Required |
52 | |-------|-------------|---------|----------|
53 | | `claude-api-key` | Anthropic Claude API key for security analysis.
*Note*: This API key needs to be enabled for both the Claude API and Claude Code usage. | None | Yes |
54 | | `comment-pr` | Whether to comment on PRs with findings | `true` | No |
55 | | `upload-results` | Whether to upload results as artifacts | `true` | No |
56 | | `exclude-directories` | Comma-separated list of directories to exclude from scanning | None | No |
57 | | `claude-model` | Claude [model name](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-names) to use. Defaults to Opus 4.1. | `claude-opus-4-1-20250805` | No |
58 | | `claudecode-timeout` | Timeout for ClaudeCode analysis in minutes | `20` | No |
59 | | `run-every-commit` | Run ClaudeCode on every commit (skips cache check). Warning: May increase false positives on PRs with many commits. | `false` | No |
60 | | `false-positive-filtering-instructions` | Path to custom false positive filtering instructions text file | None | No |
61 | | `custom-security-scan-instructions` | Path to custom security scan instructions text file to append to audit prompt | None | No |
62 |
63 | ### Action Outputs
64 |
65 | | Output | Description |
66 | |--------|-------------|
67 | | `findings-count` | Total number of security findings |
68 | | `results-file` | Path to the results JSON file |
69 |
70 | ## How It Works
71 |
72 | ### Architecture
73 |
74 | ```
75 | claudecode/
76 | ├── github_action_audit.py # Main audit script for GitHub Actions
77 | ├── prompts.py # Security audit prompt templates
78 | ├── findings_filter.py # False positive filtering logic
79 | ├── claude_api_client.py # Claude API client for false positive filtering
80 | ├── json_parser.py # Robust JSON parsing utilities
81 | ├── requirements.txt # Python dependencies
82 | ├── test_*.py # Test suites
83 | └── evals/ # Eval tooling to test CC on arbitrary PRs
84 | ```
85 |
86 | ### Workflow
87 |
88 | 1. **PR Analysis**: When a pull request is opened, Claude analyzes the diff to understand what changed
89 | 2. **Contextual Review**: Claude examines the code changes in context, understanding the purpose and potential security implications
90 | 3. **Finding Generation**: Security issues are identified with detailed explanations, severity ratings, and remediation guidance
91 | 4. **False Positive Filtering**: Advanced filtering removes low-impact or false positive prone findings to reduce noise
92 | 5. **PR Comments**: Findings are posted as review comments on the specific lines of code
93 |
94 | ## Security Analysis Capabilities
95 |
96 | ### Types of Vulnerabilities Detected
97 |
98 | - **Injection Attacks**: SQL injection, command injection, LDAP injection, XPath injection, NoSQL injection, XXE
99 | - **Authentication & Authorization**: Broken authentication, privilege escalation, insecure direct object references, bypass logic, session flaws
100 | - **Data Exposure**: Hardcoded secrets, sensitive data logging, information disclosure, PII handling violations
101 | - **Cryptographic Issues**: Weak algorithms, improper key management, insecure random number generation
102 | - **Input Validation**: Missing validation, improper sanitization, buffer overflows
103 | - **Business Logic Flaws**: Race conditions, time-of-check-time-of-use (TOCTOU) issues
104 | - **Configuration Security**: Insecure defaults, missing security headers, permissive CORS
105 | - **Supply Chain**: Vulnerable dependencies, typosquatting risks
106 | - **Code Execution**: RCE via deserialization, pickle injection, eval injection
107 | - **Cross-Site Scripting (XSS)**: Reflected, stored, and DOM-based XSS
108 |
109 | ### False Positive Filtering
110 |
111 | The tool automatically excludes a variety of low-impact and false positive prone findings to focus on high-impact vulnerabilities:
112 | - Denial of Service vulnerabilities
113 | - Rate limiting concerns
114 | - Memory/CPU exhaustion issues
115 | - Generic input validation without proven impact
116 | - Open redirect vulnerabilities
117 |
118 | The false positive filtering can also be tuned as needed for a given project's security goals.
119 |
120 | ### Benefits Over Traditional SAST
121 |
122 | - **Contextual Understanding**: Understands code semantics and intent, not just patterns
123 | - **Lower False Positives**: AI-powered analysis reduces noise by understanding when code is actually vulnerable
124 | - **Detailed Explanations**: Provides clear explanations of why something is a vulnerability and how to fix it
125 | - **Adaptive Learning**: Can be customized with organization-specific security requirements
126 |
127 | ## Installation & Setup
128 |
129 | ### GitHub Actions
130 |
131 | Follow the Quick Start guide above. The action handles all dependencies automatically.
132 |
133 | ### Local Development
134 |
135 | To run the security scanner locally against a specific PR, see the [evaluation framework documentation](claudecode/evals/README.md).
136 |
137 |
138 |
139 | ## Claude Code Integration: /security-review Command
140 |
141 | By default, Claude Code ships a `/security-review` [slash command](https://docs.anthropic.com/en/docs/claude-code/slash-commands) that provides the same security analysis capabilities as the GitHub Action workflow, but integrated directly into your Claude Code development environment. To use this, simply run `/security-review` to perform a comprehensive security review of all pending changes.
142 |
143 | ### Customizing the Command
144 |
145 | The default `/security-review` command is designed to work well in most cases, but it can also be customized based on your specific security needs. To do so:
146 |
147 | 1. Copy the [`security-review.md`](https://github.com/anthropics/claude-code-security-review/blob/main/.claude/commands/security-review.md?plain=1) file from this repository to your project's `.claude/commands/` folder.
148 | 2. Edit `security-review.md` to customize the security analysis. For example, you could add additional organization-specific directions to the false positive filtering instructions.
149 |
150 | ## Custom Scanning Configuration
151 |
152 | It is also possible to configure custom scanning and false positive filtering instructions, see the [`docs/`](docs/) folder for more details.
153 |
154 | ## Testing
155 |
156 | Run the test suite to validate functionality:
157 |
158 | ```bash
159 | cd claude-code-security-review
160 | # Run all tests
161 | pytest claudecode -v
162 | ```
163 |
164 | ## Support
165 |
166 | For issues or questions:
167 | - Open an issue in this repository
168 | - Check the [GitHub Actions logs](https://docs.github.com/en/actions/monitoring-and-troubleshooting-workflows/viewing-workflow-run-history) for debugging information
169 |
170 | ## License
171 |
172 | MIT License - see [LICENSE](LICENSE) file for details.
173 |
--------------------------------------------------------------------------------
/scripts/comment-pr-findings.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | /**
4 | * Script to comment on PRs with security findings from ClaudeCode
5 | */
6 |
7 | const fs = require('fs');
8 | const { spawnSync } = require('child_process');
9 |
10 | // Parse GitHub context from environment
11 | const context = {
12 | repo: {
13 | owner: process.env.GITHUB_REPOSITORY?.split('/')[0] || '',
14 | repo: process.env.GITHUB_REPOSITORY?.split('/')[1] || ''
15 | },
16 | issue: {
17 | number: parseInt(process.env.GITHUB_EVENT_PATH ? JSON.parse(fs.readFileSync(process.env.GITHUB_EVENT_PATH, 'utf8')).pull_request?.number : '') || 0
18 | },
19 | payload: {
20 | pull_request: process.env.GITHUB_EVENT_PATH ? JSON.parse(fs.readFileSync(process.env.GITHUB_EVENT_PATH, 'utf8')).pull_request : {}
21 | }
22 | };
23 |
24 | // GitHub API helper using gh CLI
25 | function ghApi(endpoint, method = 'GET', data = null) {
26 | // Build arguments array safely to prevent command injection
27 | const args = ['api', endpoint, '--method', method];
28 |
29 | if (data) {
30 | args.push('--input', '-');
31 | }
32 |
33 | try {
34 | const result = spawnSync('gh', args, {
35 | encoding: 'utf8',
36 | input: data ? JSON.stringify(data) : undefined,
37 | stdio: ['pipe', 'pipe', 'pipe']
38 | });
39 |
40 | if (result.error) {
41 | throw new Error(`Failed to spawn gh process: ${result.error.message}`);
42 | }
43 |
44 | if (result.status !== 0) {
45 | console.error(`Error calling GitHub API: ${result.stderr}`);
46 | throw new Error(`gh process exited with code ${result.status}: ${result.stderr}`);
47 | }
48 |
49 | return JSON.parse(result.stdout);
50 | } catch (error) {
51 | console.error(`Error calling GitHub API: ${error.message}`);
52 | throw error;
53 | }
54 | }
55 |
56 | // Helper function to add reactions to a comment
57 | function addReactionsToComment(commentId, isReviewComment = true) {
58 | const reactions = ['+1', '-1']; // thumbs up and thumbs down
59 | const endpoint = isReviewComment
60 | ? `/repos/${context.repo.owner}/${context.repo.repo}/pulls/comments/${commentId}/reactions`
61 | : `/repos/${context.repo.owner}/${context.repo.repo}/issues/comments/${commentId}/reactions`;
62 |
63 | for (const reaction of reactions) {
64 | try {
65 | ghApi(endpoint, 'POST', { content: reaction });
66 | console.log(`Added ${reaction} reaction to comment ${commentId}`);
67 | } catch (error) {
68 | console.error(`Failed to add ${reaction} reaction to comment ${commentId}:`, error.message);
69 | }
70 | }
71 | }
72 |
73 | // Helper function to add reactions to all comments in a review
74 | function addReactionsToReview(reviewId) {
75 | try {
76 | // Get all comments from the review
77 | const reviewComments = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/reviews/${reviewId}/comments`);
78 |
79 | if (reviewComments && Array.isArray(reviewComments)) {
80 | for (const comment of reviewComments) {
81 | if (comment.id) {
82 | addReactionsToComment(comment.id, true);
83 | }
84 | }
85 | }
86 | } catch (error) {
87 | console.error(`Failed to get review comments for review ${reviewId}:`, error.message);
88 | }
89 | }
90 |
91 | async function run() {
92 | try {
93 | // Read the findings
94 | let newFindings = [];
95 | try {
96 | const findingsData = fs.readFileSync('findings.json', 'utf8');
97 | newFindings = JSON.parse(findingsData);
98 | } catch (e) {
99 | console.log('Could not read findings file');
100 | return;
101 | }
102 |
103 | if (newFindings.length === 0) {
104 | return;
105 | }
106 |
107 | // Get the PR diff to map file lines to diff positions
108 | const prFiles = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/files?per_page=100`);
109 |
110 | // Create a map of file paths to their diff information
111 | const fileMap = {};
112 | prFiles.forEach(file => {
113 | fileMap[file.filename] = file;
114 | });
115 |
116 | // Prepare review comments
117 | const reviewComments = [];
118 |
119 | // Check if ClaudeCode comments should be silenced
120 | const silenceClaudeCodeComments = process.env.SILENCE_CLAUDECODE_COMMENTS === 'true';
121 |
122 | if (silenceClaudeCodeComments) {
123 | console.log(`ClaudeCode comments silenced - excluding ${newFindings.length} findings from comments`);
124 | return;
125 | }
126 |
127 |
128 | // Process findings synchronously (gh cli doesn't support async well)
129 | for (const finding of newFindings) {
130 | const file = finding.file || finding.path;
131 | const line = finding.line || (finding.start && finding.start.line) || 1;
132 | const message = finding.description || (finding.extra && finding.extra.message) || 'Security vulnerability detected';
133 | const severity = finding.severity || 'HIGH';
134 | const category = finding.category || 'security_issue';
135 |
136 | // Check if this file is part of the PR diff
137 | if (!fileMap[file]) {
138 | console.log(`File ${file} not in PR diff, skipping`);
139 | continue;
140 | }
141 |
142 | // Build the comment body
143 | let commentBody = `🤖 **Security Issue: ${message}**\n\n`;
144 | commentBody += `**Severity:** ${severity}\n`;
145 | commentBody += `**Category:** ${category}\n`;
146 | commentBody += `**Tool:** ClaudeCode AI Security Analysis\n`;
147 |
148 | // Add exploit scenario if available
149 | if (finding.exploit_scenario || (finding.extra && finding.extra.metadata && finding.extra.metadata.exploit_scenario)) {
150 | const exploitScenario = finding.exploit_scenario || finding.extra.metadata.exploit_scenario;
151 | commentBody += `\n**Exploit Scenario:** ${exploitScenario}\n`;
152 | }
153 |
154 | // Add recommendation if available
155 | if (finding.recommendation || (finding.extra && finding.extra.metadata && finding.extra.metadata.recommendation)) {
156 | const recommendation = finding.recommendation || finding.extra.metadata.recommendation;
157 | commentBody += `\n**Recommendation:** ${recommendation}\n`;
158 | }
159 |
160 | // Prepare the review comment
161 | const reviewComment = {
162 | path: file,
163 | line: line,
164 | side: 'RIGHT',
165 | body: commentBody
166 | };
167 |
168 | reviewComments.push(reviewComment);
169 | }
170 |
171 | if (reviewComments.length === 0) {
172 | console.log('No findings to comment on PR diff');
173 | return;
174 | }
175 |
176 | // Check for existing review comments to avoid duplicates
177 | const comments = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/comments`);
178 |
179 | // Check if we've already commented on these findings
180 | const existingSecurityComments = comments.filter(comment =>
181 | comment.user.type === 'Bot' &&
182 | comment.body && comment.body.includes('🤖 **Security Issue:')
183 | );
184 |
185 | if (existingSecurityComments.length > 0) {
186 | console.log(`Found ${existingSecurityComments.length} existing security comments, skipping to avoid duplicates`);
187 | return;
188 | }
189 |
190 | try {
191 | // Create a review with all the comments
192 | const reviewData = {
193 | commit_id: context.payload.pull_request.head.sha,
194 | event: 'COMMENT',
195 | comments: reviewComments
196 | };
197 |
198 | const reviewResponse = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/reviews`, 'POST', reviewData);
199 |
200 | console.log(`Created review with ${reviewComments.length} inline comments`);
201 |
202 | // Add reactions to the comments
203 | if (reviewResponse && reviewResponse.id) {
204 | addReactionsToReview(reviewResponse.id);
205 | }
206 | } catch (error) {
207 | console.error('Error creating review:', error);
208 |
209 | // Fallback: try to create individual comments if review fails
210 | // This might happen if line numbers are outside the diff context
211 | console.log('Attempting fallback with adjusted line numbers...');
212 |
213 | for (const comment of reviewComments) {
214 | try {
215 | // Try to create comment with the original line
216 | const commentData = {
217 | path: comment.path,
218 | line: comment.line,
219 | side: comment.side,
220 | body: comment.body,
221 | commit_id: context.payload.pull_request.head.sha
222 | };
223 |
224 | const commentResponse = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/comments`, 'POST', commentData);
225 |
226 | // Add reactions to the individual comment
227 | if (commentResponse && commentResponse.id) {
228 | addReactionsToComment(commentResponse.id, true);
229 | }
230 | } catch (lineError) {
231 | console.log(`Could not comment on ${comment.path}:${comment.line} - line might not be in diff context`);
232 | // If the specific line fails, try to get the file's patch and find a suitable line
233 | const fileInfo = fileMap[comment.path];
234 | if (fileInfo && fileInfo.patch) {
235 | // This is a simplified approach - in production you'd want more sophisticated line mapping
236 | console.log(`File ${comment.path} has additions but line ${comment.line} is not in the diff`);
237 | }
238 | }
239 | }
240 | }
241 | } catch (error) {
242 | console.error('Failed to comment on PR:', error);
243 | process.exit(1);
244 | }
245 | }
246 |
247 | run();
--------------------------------------------------------------------------------
/claudecode/test_helper_functions.py:
--------------------------------------------------------------------------------
1 | """Unit tests for helper functions in github_action_audit module."""
2 |
3 | import pytest
4 | import os
5 | from unittest.mock import patch, MagicMock
6 |
7 |
8 | from claudecode.github_action_audit import (
9 | get_environment_config,
10 | initialize_clients,
11 | initialize_findings_filter,
12 | run_security_audit,
13 | apply_findings_filter,
14 | ConfigurationError,
15 | AuditError
16 | )
17 | from claudecode.findings_filter import FindingsFilter
18 |
19 |
20 | class TestHelperFunctions:
21 | """Test helper functions in github_action_audit module."""
22 |
23 | def test_get_environment_config_success(self):
24 | """Test successful environment configuration retrieval."""
25 | with patch.dict(os.environ, {
26 | 'GITHUB_REPOSITORY': 'owner/repo',
27 | 'PR_NUMBER': '123'
28 | }):
29 | repo_name, pr_number = get_environment_config()
30 |
31 | assert repo_name == 'owner/repo'
32 | assert pr_number == 123
33 |
34 | def test_get_environment_config_missing_repository(self):
35 | """Test error when GITHUB_REPOSITORY is missing."""
36 | with patch.dict(os.environ, {'PR_NUMBER': '123'}, clear=True):
37 | with pytest.raises(ConfigurationError) as exc_info:
38 | get_environment_config()
39 |
40 | assert "GITHUB_REPOSITORY environment variable required" in str(exc_info.value)
41 |
42 | def test_get_environment_config_missing_pr_number(self):
43 | """Test error when PR_NUMBER is missing."""
44 | with patch.dict(os.environ, {'GITHUB_REPOSITORY': 'owner/repo'}, clear=True):
45 | with pytest.raises(ConfigurationError) as exc_info:
46 | get_environment_config()
47 |
48 | assert "PR_NUMBER environment variable required" in str(exc_info.value)
49 |
50 | def test_get_environment_config_invalid_pr_number(self):
51 | """Test error when PR_NUMBER is not a valid integer."""
52 | with patch.dict(os.environ, {
53 | 'GITHUB_REPOSITORY': 'owner/repo',
54 | 'PR_NUMBER': 'not-a-number'
55 | }):
56 | with pytest.raises(ConfigurationError) as exc_info:
57 | get_environment_config()
58 |
59 | assert "Invalid PR_NUMBER" in str(exc_info.value)
60 |
61 | @patch('claudecode.github_action_audit.GitHubActionClient')
62 | @patch('claudecode.github_action_audit.SimpleClaudeRunner')
63 | def test_initialize_clients_success(self, mock_claude_runner, mock_github_client):
64 | """Test successful client initialization."""
65 | mock_github_instance = MagicMock()
66 | mock_claude_instance = MagicMock()
67 | mock_github_client.return_value = mock_github_instance
68 | mock_claude_runner.return_value = mock_claude_instance
69 |
70 | github_client, claude_runner = initialize_clients()
71 |
72 | assert github_client == mock_github_instance
73 | assert claude_runner == mock_claude_instance
74 | mock_github_client.assert_called_once()
75 | mock_claude_runner.assert_called_once()
76 |
77 | @patch('claudecode.github_action_audit.GitHubActionClient')
78 | def test_initialize_clients_github_failure(self, mock_github_client):
79 | """Test error when GitHub client initialization fails."""
80 | mock_github_client.side_effect = Exception("GitHub API error")
81 |
82 | with pytest.raises(ConfigurationError) as exc_info:
83 | initialize_clients()
84 |
85 | assert "Failed to initialize GitHub client" in str(exc_info.value)
86 | assert "GitHub API error" in str(exc_info.value)
87 |
88 | @patch('claudecode.github_action_audit.GitHubActionClient')
89 | @patch('claudecode.github_action_audit.SimpleClaudeRunner')
90 | def test_initialize_clients_claude_failure(self, mock_claude_runner, mock_github_client):
91 | """Test error when Claude runner initialization fails."""
92 | mock_github_client.return_value = MagicMock()
93 | mock_claude_runner.side_effect = Exception("Claude init error")
94 |
95 | with pytest.raises(ConfigurationError) as exc_info:
96 | initialize_clients()
97 |
98 | assert "Failed to initialize Claude runner" in str(exc_info.value)
99 | assert "Claude init error" in str(exc_info.value)
100 |
101 | @patch('claudecode.github_action_audit.FindingsFilter')
102 | def test_initialize_findings_filter_with_claude(self, mock_filter):
103 | """Test initializing findings filter with Claude API enabled."""
104 | mock_filter_instance = MagicMock()
105 | mock_filter.return_value = mock_filter_instance
106 |
107 | with patch.dict(os.environ, {
108 | 'ENABLE_CLAUDE_FILTERING': 'true',
109 | 'ANTHROPIC_API_KEY': 'test-key-123'
110 | }):
111 | result = initialize_findings_filter()
112 |
113 | assert result == mock_filter_instance
114 | mock_filter.assert_called_once_with(
115 | use_hard_exclusions=True,
116 | use_claude_filtering=True,
117 | api_key='test-key-123',
118 | custom_filtering_instructions=None
119 | )
120 |
121 | @patch('claudecode.github_action_audit.FindingsFilter')
122 | def test_initialize_findings_filter_without_claude(self, mock_simple_filter):
123 | """Test initializing findings filter without Claude API."""
124 | mock_filter_instance = MagicMock()
125 | mock_simple_filter.return_value = mock_filter_instance
126 |
127 | with patch.dict(os.environ, {
128 | 'ENABLE_CLAUDE_FILTERING': 'false'
129 | }, clear=True):
130 | result = initialize_findings_filter()
131 |
132 | assert result == mock_filter_instance
133 | mock_simple_filter.assert_called_once()
134 |
135 | @patch('claudecode.github_action_audit.FindingsFilter')
136 | def test_initialize_findings_filter_with_defaults(self, mock_simple_filter):
137 | """Test initializing findings filter with defaults."""
138 | mock_filter_instance = MagicMock()
139 | mock_simple_filter.return_value = mock_filter_instance
140 |
141 | with patch.dict(os.environ, {}, clear=True):
142 | result = initialize_findings_filter()
143 |
144 | assert result == mock_filter_instance
145 |
146 | def test_run_security_audit_success(self):
147 | """Test successful security audit execution."""
148 | mock_runner = MagicMock()
149 | mock_runner.run_security_audit.return_value = (
150 | True,
151 | "",
152 | {"findings": [{"id": 1}], "analysis_summary": {}}
153 | )
154 |
155 | result = run_security_audit(mock_runner, "test prompt")
156 |
157 | assert result == {"findings": [{"id": 1}], "analysis_summary": {}}
158 | mock_runner.run_security_audit.assert_called_once()
159 |
160 | def test_run_security_audit_failure(self):
161 | """Test security audit execution failure."""
162 | mock_runner = MagicMock()
163 | mock_runner.run_security_audit.return_value = (
164 | False,
165 | "Audit failed: timeout",
166 | {}
167 | )
168 |
169 | with pytest.raises(AuditError) as exc_info:
170 | run_security_audit(mock_runner, "test prompt")
171 |
172 | assert "Security audit failed: Audit failed: timeout" in str(exc_info.value)
173 |
174 | def test_apply_findings_filter_with_findings_filter(self):
175 | """Test applying FindingsFilter to findings."""
176 | mock_filter = MagicMock(spec=FindingsFilter)
177 | mock_filter.filter_findings.return_value = (
178 | True,
179 | {
180 | 'filtered_findings': [{"id": 1}],
181 | 'excluded_findings': [{"id": 2}],
182 | 'analysis_summary': {'total': 2, 'kept': 1}
183 | },
184 | MagicMock() # filter_stats
185 | )
186 |
187 | original_findings = [{"id": 1}, {"id": 2}]
188 | pr_context = {"repo_name": "test/repo"}
189 |
190 | # Create mock github client
191 | mock_github_client = MagicMock()
192 | mock_github_client._is_excluded.return_value = False
193 |
194 | kept, excluded, summary = apply_findings_filter(
195 | mock_filter, original_findings, pr_context, mock_github_client
196 | )
197 |
198 | assert kept == [{"id": 1}]
199 | assert excluded == [{"id": 2}]
200 | assert summary == {'total': 2, 'kept': 1, 'directory_excluded_count': 0}
201 |
202 | mock_filter.filter_findings.assert_called_once_with(original_findings, pr_context)
203 |
204 | def test_apply_findings_filter_with_simple_filter(self):
205 | """Test applying FindingsFilter to findings."""
206 | mock_filter = MagicMock(spec=FindingsFilter)
207 | mock_filter.filter_findings.return_value = (
208 | True,
209 | {
210 | 'filtered_findings': [{"id": 1}],
211 | 'excluded_findings': [{"id": 2}],
212 | 'analysis_summary': {}
213 | },
214 | MagicMock() # filter_stats
215 | )
216 |
217 | original_findings = [{"id": 1}, {"id": 2}]
218 | pr_context = {"repo_name": "test/repo"}
219 |
220 | # Create mock github client
221 | mock_github_client = MagicMock()
222 | mock_github_client._is_excluded.return_value = False
223 |
224 | kept, excluded, summary = apply_findings_filter(
225 | mock_filter, original_findings, pr_context, mock_github_client
226 | )
227 |
228 | assert kept == [{"id": 1}]
229 | assert excluded == [{"id": 2}]
230 | assert summary == {'directory_excluded_count': 0}
231 |
232 | mock_filter.filter_findings.assert_called_once_with(original_findings, pr_context)
233 |
234 | def test_apply_findings_filter_failure(self):
235 | """Test handling of filter failure."""
236 | mock_filter = MagicMock(spec=FindingsFilter)
237 | mock_filter.filter_findings.return_value = (
238 | False, # filter failed
239 | {},
240 | MagicMock()
241 | )
242 |
243 | original_findings = [{"id": 1}, {"id": 2}]
244 | pr_context = {"repo_name": "test/repo"}
245 |
246 | # Create mock github client
247 | mock_github_client = MagicMock()
248 | mock_github_client._is_excluded.return_value = False
249 |
250 | kept, excluded, summary = apply_findings_filter(
251 | mock_filter, original_findings, pr_context, mock_github_client
252 | )
253 |
254 | # On failure, should keep all findings
255 | assert kept == original_findings
256 | assert excluded == []
257 | assert summary == {'directory_excluded_count': 0}
--------------------------------------------------------------------------------
/.claude/commands/security-review.md:
--------------------------------------------------------------------------------
1 | ---
2 | allowed-tools: Bash(git diff:*), Bash(git status:*), Bash(git log:*), Bash(git show:*), Bash(git remote show:*), Read, Glob, Grep, LS, Task
3 | description: Complete a security review of the pending changes on the current branch
4 | ---
5 |
6 | You are a senior security engineer conducting a focused security review of the changes on this branch.
7 |
8 | GIT STATUS:
9 |
10 | ```
11 | !`git status`
12 | ```
13 |
14 | FILES MODIFIED:
15 |
16 | ```
17 | !`git diff --name-only origin/HEAD...`
18 | ```
19 |
20 | COMMITS:
21 |
22 | ```
23 | !`git log --no-decorate origin/HEAD...`
24 | ```
25 |
26 | DIFF CONTENT:
27 |
28 | ```
29 | !`git diff --merge-base origin/HEAD`
30 | ```
31 |
32 | Review the complete diff above. This contains all code changes in the PR.
33 |
34 |
35 | OBJECTIVE:
36 | Perform a security-focused code review to identify HIGH-CONFIDENCE security vulnerabilities that could have real exploitation potential. This is not a general code review - focus ONLY on security implications newly added by this PR. Do not comment on existing security concerns.
37 |
38 | CRITICAL INSTRUCTIONS:
39 | 1. MINIMIZE FALSE POSITIVES: Only flag issues where you're >80% confident of actual exploitability
40 | 2. AVOID NOISE: Skip theoretical issues, style concerns, or low-impact findings
41 | 3. FOCUS ON IMPACT: Prioritize vulnerabilities that could lead to unauthorized access, data breaches, or system compromise
42 | 4. EXCLUSIONS: Do NOT report the following issue types:
43 | - Denial of Service (DOS) vulnerabilities, even if they allow service disruption
44 | - Secrets or sensitive data stored on disk (these are handled by other processes)
45 | - Rate limiting or resource exhaustion issues
46 |
47 | SECURITY CATEGORIES TO EXAMINE:
48 |
49 | **Input Validation Vulnerabilities:**
50 | - SQL injection via unsanitized user input
51 | - Command injection in system calls or subprocesses
52 | - XXE injection in XML parsing
53 | - Template injection in templating engines
54 | - NoSQL injection in database queries
55 | - Path traversal in file operations
56 |
57 | **Authentication & Authorization Issues:**
58 | - Authentication bypass logic
59 | - Privilege escalation paths
60 | - Session management flaws
61 | - JWT token vulnerabilities
62 | - Authorization logic bypasses
63 |
64 | **Crypto & Secrets Management:**
65 | - Hardcoded API keys, passwords, or tokens
66 | - Weak cryptographic algorithms or implementations
67 | - Improper key storage or management
68 | - Cryptographic randomness issues
69 | - Certificate validation bypasses
70 |
71 | **Injection & Code Execution:**
72 | - Remote code execution via deseralization
73 | - Pickle injection in Python
74 | - YAML deserialization vulnerabilities
75 | - Eval injection in dynamic code execution
76 | - XSS vulnerabilities in web applications (reflected, stored, DOM-based)
77 |
78 | **Data Exposure:**
79 | - Sensitive data logging or storage
80 | - PII handling violations
81 | - API endpoint data leakage
82 | - Debug information exposure
83 |
84 | Additional notes:
85 | - Even if something is only exploitable from the local network, it can still be a HIGH severity issue
86 |
87 | ANALYSIS METHODOLOGY:
88 |
89 | Phase 1 - Repository Context Research (Use file search tools):
90 | - Identify existing security frameworks and libraries in use
91 | - Look for established secure coding patterns in the codebase
92 | - Examine existing sanitization and validation patterns
93 | - Understand the project's security model and threat model
94 |
95 | Phase 2 - Comparative Analysis:
96 | - Compare new code changes against existing security patterns
97 | - Identify deviations from established secure practices
98 | - Look for inconsistent security implementations
99 | - Flag code that introduces new attack surfaces
100 |
101 | Phase 3 - Vulnerability Assessment:
102 | - Examine each modified file for security implications
103 | - Trace data flow from user inputs to sensitive operations
104 | - Look for privilege boundaries being crossed unsafely
105 | - Identify injection points and unsafe deserialization
106 |
107 | REQUIRED OUTPUT FORMAT:
108 |
109 | You MUST output your findings in markdown. The markdown output should contain the file, line number, severity, category (e.g. `sql_injection` or `xss`), description, exploit scenario, and fix recommendation.
110 |
111 | For example:
112 |
113 | # Vuln 1: XSS: `foo.py:42`
114 |
115 | * Severity: High
116 | * Description: User input from `username` parameter is directly interpolated into HTML without escaping, allowing reflected XSS attacks
117 | * Exploit Scenario: Attacker crafts URL like /bar?q= to execute JavaScript in victim's browser, enabling session hijacking or data theft
118 | * Recommendation: Use Flask's escape() function or Jinja2 templates with auto-escaping enabled for all user inputs rendered in HTML
119 |
120 | SEVERITY GUIDELINES:
121 | - **HIGH**: Directly exploitable vulnerabilities leading to RCE, data breach, or authentication bypass
122 | - **MEDIUM**: Vulnerabilities requiring specific conditions but with significant impact
123 | - **LOW**: Defense-in-depth issues or lower-impact vulnerabilities
124 |
125 | CONFIDENCE SCORING:
126 | - 0.9-1.0: Certain exploit path identified, tested if possible
127 | - 0.8-0.9: Clear vulnerability pattern with known exploitation methods
128 | - 0.7-0.8: Suspicious pattern requiring specific conditions to exploit
129 | - Below 0.7: Don't report (too speculative)
130 |
131 | FINAL REMINDER:
132 | Focus on HIGH and MEDIUM findings only. Better to miss some theoretical issues than flood the report with false positives. Each finding should be something a security engineer would confidently raise in a PR review.
133 |
134 | FALSE POSITIVE FILTERING:
135 |
136 | > You do not need to run commands to reproduce the vulnerability, just read the code to determine if it is a real vulnerability. Do not use the bash tool or write to any files.
137 | >
138 | > HARD EXCLUSIONS - Automatically exclude findings matching these patterns:
139 | > 1. Denial of Service (DOS) vulnerabilities or resource exhaustion attacks.
140 | > 2. Secrets or credentials stored on disk if they are otherwise secured.
141 | > 3. Rate limiting concerns or service overload scenarios.
142 | > 4. Memory consumption or CPU exhaustion issues.
143 | > 5. Lack of input validation on non-security-critical fields without proven security impact.
144 | > 6. Input sanitization concerns for GitHub Action workflows unless they are clearly triggerable via untrusted input.
145 | > 7. A lack of hardening measures. Code is not expected to implement all security best practices, only flag concrete vulnerabilities.
146 | > 8. Race conditions or timing attacks that are theoretical rather than practical issues. Only report a race condition if it is concretely problematic.
147 | > 9. Vulnerabilities related to outdated third-party libraries. These are managed separately and should not be reported here.
148 | > 10. Memory safety issues such as buffer overflows or use-after-free-vulnerabilities are impossible in rust. Do not report memory safety issues in rust or any other memory safe languages.
149 | > 11. Files that are only unit tests or only used as part of running tests.
150 | > 12. Log spoofing concerns. Outputting un-sanitized user input to logs is not a vulnerability.
151 | > 13. SSRF vulnerabilities that only control the path. SSRF is only a concern if it can control the host or protocol.
152 | > 14. Including user-controlled content in AI system prompts is not a vulnerability.
153 | > 15. Regex injection. Injecting untrusted content into a regex is not a vulnerability.
154 | > 16. Regex DOS concerns.
155 | > 16. Insecure documentation. Do not report any findings in documentation files such as markdown files.
156 | > 17. A lack of audit logs is not a vulnerability.
157 | >
158 | > PRECEDENTS -
159 | > 1. Logging high value secrets in plaintext is a vulnerability. Logging URLs is assumed to be safe.
160 | > 2. UUIDs can be assumed to be unguessable and do not need to be validated.
161 | > 3. Environment variables and CLI flags are trusted values. Attackers are generally not able to modify them in a secure environment. Any attack that relies on controlling an environment variable is invalid.
162 | > 4. Resource management issues such as memory or file descriptor leaks are not valid.
163 | > 5. Subtle or low impact web vulnerabilities such as tabnabbing, XS-Leaks, prototype pollution, and open redirects should not be reported unless they are extremely high confidence.
164 | > 6. React and Angular are generally secure against XSS. These frameworks do not need to sanitize or escape user input unless it is using dangerouslySetInnerHTML, bypassSecurityTrustHtml, or similar methods. Do not report XSS vulnerabilities in React or Angular components or tsx files unless they are using unsafe methods.
165 | > 7. Most vulnerabilities in github action workflows are not exploitable in practice. Before validating a github action workflow vulnerability ensure it is concrete and has a very specific attack path.
166 | > 8. A lack of permission checking or authentication in client-side JS/TS code is not a vulnerability. Client-side code is not trusted and does not need to implement these checks, they are handled on the server-side. The same applies to all flows that send untrusted data to the backend, the backend is responsible for validating and sanitizing all inputs.
167 | > 9. Only include MEDIUM findings if they are obvious and concrete issues.
168 | > 10. Most vulnerabilities in ipython notebooks (*.ipynb files) are not exploitable in practice. Before validating a notebook vulnerability ensure it is concrete and has a very specific attack path where untrusted input can trigger the vulnerability.
169 | > 11. Logging non-PII data is not a vulnerability even if the data may be sensitive. Only report logging vulnerabilities if they expose sensitive information such as secrets, passwords, or personally identifiable information (PII).
170 | > 12. Command injection vulnerabilities in shell scripts are generally not exploitable in practice since shell scripts generally do not run with untrusted user input. Only report command injection vulnerabilities in shell scripts if they are concrete and have a very specific attack path for untrusted input.
171 | >
172 | > SIGNAL QUALITY CRITERIA - For remaining findings, assess:
173 | > 1. Is there a concrete, exploitable vulnerability with a clear attack path?
174 | > 2. Does this represent a real security risk vs theoretical best practice?
175 | > 3. Are there specific code locations and reproduction steps?
176 | > 4. Would this finding be actionable for a security team?
177 | >
178 | > For each finding, assign a confidence score from 1-10:
179 | > - 1-3: Low confidence, likely false positive or noise
180 | > - 4-6: Medium confidence, needs investigation
181 | > - 7-10: High confidence, likely true vulnerability
182 |
183 | START ANALYSIS:
184 |
185 | Begin your analysis now. Do this in 3 steps:
186 |
187 | 1. Use a sub-task to identify vulnerabilities. Use the repository exploration tools to understand the codebase context, then analyze the PR changes for security implications. In the prompt for this sub-task, include all of the above.
188 | 2. Then for each vulnerability identified by the above sub-task, create a new sub-task to filter out false-positives. Launch these sub-tasks as parallel sub-tasks. In the prompt for these sub-tasks, include everything in the "FALSE POSITIVE FILTERING" instructions.
189 | 3. Filter out any vulnerabilities where the sub-task reported a confidence less than 8.
190 |
191 | Your final reply must contain the markdown report and nothing else.
--------------------------------------------------------------------------------
/claudecode/test_prompts.py:
--------------------------------------------------------------------------------
1 | """Unit tests for the prompts module."""
2 |
3 | from claudecode.prompts import get_security_audit_prompt
4 |
5 |
6 | class TestPrompts:
7 | """Test prompt generation functions."""
8 |
9 | def test_get_security_audit_prompt_basic(self):
10 | """Test basic security audit prompt generation."""
11 | pr_data = {
12 | "number": 123,
13 | "title": "Add new feature",
14 | "body": "This PR adds a new feature to handle user input",
15 | "user": "testuser",
16 | "changed_files": 1,
17 | "additions": 10,
18 | "deletions": 5,
19 | "head": {
20 | "repo": {
21 | "full_name": "owner/repo"
22 | }
23 | },
24 | "files": [
25 | {
26 | "filename": "app.py",
27 | "status": "modified",
28 | "additions": 10,
29 | "deletions": 5
30 | }
31 | ]
32 | }
33 |
34 | pr_diff = """
35 | diff --git a/app.py b/app.py
36 | @@ -1,5 +1,10 @@
37 | def process_input(user_input):
38 | - return user_input
39 | + # Process the input
40 | + result = eval(user_input) # Potential security issue
41 | + return result
42 | """
43 |
44 | prompt = get_security_audit_prompt(pr_data, pr_diff)
45 |
46 | # Check that prompt contains expected elements
47 | assert isinstance(prompt, str)
48 | assert len(prompt) > 0
49 | assert "123" in prompt # PR number
50 | assert "Add new feature" in prompt # PR title
51 | assert "testuser" in prompt # Author
52 | assert "app.py" in prompt # File name
53 | assert "eval(user_input)" in prompt # The actual diff content
54 |
55 | def test_get_security_audit_prompt_empty_body(self):
56 | """Test prompt generation with empty PR body."""
57 | pr_data = {
58 | "number": 456,
59 | "title": "Quick fix",
60 | "body": None, # Empty body
61 | "user": "author",
62 | "changed_files": 0,
63 | "additions": 0,
64 | "deletions": 0,
65 | "head": {
66 | "repo": {
67 | "full_name": "owner/repo"
68 | }
69 | },
70 | "files": []
71 | }
72 |
73 | pr_diff = "diff --git a/test.js b/test.js"
74 |
75 | prompt = get_security_audit_prompt(pr_data, pr_diff)
76 |
77 | assert isinstance(prompt, str)
78 | assert "456" in prompt
79 | assert "Quick fix" in prompt
80 | assert "author" in prompt
81 |
82 | def test_get_security_audit_prompt_multiple_files(self):
83 | """Test prompt generation with multiple files."""
84 | pr_data = {
85 | "number": 789,
86 | "title": "Security improvements",
87 | "body": "Fixing various security issues",
88 | "user": "security-team",
89 | "changed_files": 3,
90 | "additions": 70,
91 | "deletions": 110,
92 | "head": {
93 | "repo": {
94 | "full_name": "owner/repo"
95 | }
96 | },
97 | "files": [
98 | {
99 | "filename": "auth.py",
100 | "status": "modified",
101 | "additions": 20,
102 | "deletions": 10
103 | },
104 | {
105 | "filename": "config.yaml",
106 | "status": "added",
107 | "additions": 50,
108 | "deletions": 0
109 | },
110 | {
111 | "filename": "old_auth.py",
112 | "status": "deleted",
113 | "additions": 0,
114 | "deletions": 100
115 | }
116 | ]
117 | }
118 |
119 | pr_diff = """
120 | diff --git a/auth.py b/auth.py
121 | @@ -1,10 +1,20 @@
122 | +import secrets
123 | +
124 | diff --git a/config.yaml b/config.yaml
125 | @@ -0,0 +1,50 @@
126 | +database:
127 | + password: "hardcoded_password"
128 | """
129 |
130 | prompt = get_security_audit_prompt(pr_data, pr_diff)
131 |
132 | # Check all files are mentioned
133 | assert "auth.py" in prompt
134 | assert "config.yaml" in prompt
135 | assert "old_auth.py" in prompt
136 |
137 | # Check file statuses
138 | assert "modified" in prompt.lower()
139 | assert "added" in prompt.lower()
140 | assert "deleted" in prompt.lower()
141 |
142 | def test_get_security_audit_prompt_special_characters(self):
143 | """Test prompt generation with special characters."""
144 | pr_data = {
145 | "number": 999,
146 | "title": "Fix SQL injection in user's profile",
147 | "body": "This fixes a SQL injection vulnerability in the `get_user()` function",
148 | "user": "user-with-dash",
149 | "changed_files": 1,
150 | "additions": 5,
151 | "deletions": 3,
152 | "head": {
153 | "repo": {
154 | "full_name": "owner/repo"
155 | }
156 | },
157 | "files": [
158 | {
159 | "filename": "src/db/queries.py",
160 | "status": "modified",
161 | "additions": 5,
162 | "deletions": 3
163 | }
164 | ]
165 | }
166 |
167 | pr_diff = """
168 | diff --git a/src/db/queries.py b/src/db/queries.py
169 | @@ -10,3 +10,5 @@
170 | - query = f"SELECT * FROM users WHERE id = {user_id}"
171 | + query = "SELECT * FROM users WHERE id = ?"
172 | + cursor.execute(query, (user_id,))
173 | """
174 |
175 | prompt = get_security_audit_prompt(pr_data, pr_diff)
176 |
177 | # Check special characters are preserved
178 | assert "user's" in prompt
179 | assert "user-with-dash" in prompt
180 | assert "src/db/queries.py" in prompt
181 |
182 | def test_get_security_audit_prompt_no_files(self):
183 | """Test prompt generation with no files (edge case)."""
184 | pr_data = {
185 | "number": 111,
186 | "title": "Documentation update",
187 | "body": "Just updating docs",
188 | "user": "doc-author",
189 | "changed_files": 0,
190 | "additions": 0,
191 | "deletions": 0,
192 | "head": {
193 | "repo": {
194 | "full_name": "owner/repo"
195 | }
196 | },
197 | "files": [] # No files
198 | }
199 |
200 | pr_diff = "" # Empty diff
201 |
202 | prompt = get_security_audit_prompt(pr_data, pr_diff)
203 |
204 | assert isinstance(prompt, str)
205 | assert "111" in prompt
206 | assert "Documentation update" in prompt
207 |
208 | def test_get_security_audit_prompt_structure(self):
209 | """Test that prompt has expected structure."""
210 | pr_data = {
211 | "number": 42,
212 | "title": "Test PR",
213 | "body": "Test description",
214 | "user": "testuser",
215 | "changed_files": 1,
216 | "additions": 1,
217 | "deletions": 1,
218 | "head": {
219 | "repo": {
220 | "full_name": "owner/repo"
221 | }
222 | },
223 | "files": [
224 | {
225 | "filename": "test.py",
226 | "status": "modified",
227 | "additions": 1,
228 | "deletions": 1
229 | }
230 | ]
231 | }
232 |
233 | pr_diff = "diff --git a/test.py b/test.py\n+print('test')"
234 |
235 | prompt = get_security_audit_prompt(pr_data, pr_diff)
236 |
237 | # Should contain sections for metadata and diff
238 | assert "PR #" in prompt or "Pull Request" in prompt
239 | assert "Title:" in prompt or pr_data["title"] in prompt
240 | assert "Author:" in prompt or pr_data["user"]["login"] in prompt
241 | assert "Files:" in prompt or "test.py" in prompt
242 |
243 | # Should contain the actual diff
244 | assert pr_diff in prompt or "print('test')" in prompt
245 |
246 | def test_get_security_audit_prompt_long_diff(self):
247 | """Test prompt generation with very long diff."""
248 | pr_data = {
249 | "number": 12345,
250 | "title": "Major refactoring",
251 | "body": "Refactoring the entire codebase",
252 | "user": "refactor-bot",
253 | "changed_files": 10,
254 | "additions": 1000,
255 | "deletions": 500,
256 | "head": {
257 | "repo": {
258 | "full_name": "owner/repo"
259 | }
260 | },
261 | "files": [
262 | {
263 | "filename": f"file{i}.py",
264 | "status": "modified",
265 | "additions": 100,
266 | "deletions": 50
267 | }
268 | for i in range(10)
269 | ]
270 | }
271 |
272 | # Create a large diff
273 | pr_diff = "\n".join([
274 | f"diff --git a/file{i}.py b/file{i}.py\n" +
275 | "\n".join([f"+line {j}" for j in range(50)])
276 | for i in range(10)
277 | ])
278 |
279 | prompt = get_security_audit_prompt(pr_data, pr_diff)
280 |
281 | # Should handle large diffs without error
282 | assert isinstance(prompt, str)
283 | assert len(prompt) > 1000 # Should be substantial
284 | assert "12345" in prompt
285 | assert "Major refactoring" in prompt
286 |
287 | def test_get_security_audit_prompt_unicode(self):
288 | """Test prompt generation with unicode characters."""
289 | pr_data = {
290 | "number": 666,
291 | "title": "Add emoji support 🎉",
292 | "body": "This PR adds emoji rendering 🔒 🛡️",
293 | "user": "émoji-user",
294 | "changed_files": 1,
295 | "additions": 42,
296 | "deletions": 0,
297 | "head": {
298 | "repo": {
299 | "full_name": "owner/repo"
300 | }
301 | },
302 | "files": [
303 | {
304 | "filename": "émojis.py",
305 | "status": "added",
306 | "additions": 42,
307 | "deletions": 0
308 | }
309 | ]
310 | }
311 |
312 | pr_diff = """
313 | diff --git a/émojis.py b/émojis.py
314 | +# 🔒 Security check
315 | +def check_input(text: str) -> bool:
316 | + return "🚨" not in text
317 | """
318 |
319 | prompt = get_security_audit_prompt(pr_data, pr_diff)
320 |
321 | # Check unicode is preserved
322 | assert "🎉" in prompt # Title emoji
323 | assert "émoji-user" in prompt
324 | assert "émojis.py" in prompt
325 | assert "🚨" in prompt # From diff
--------------------------------------------------------------------------------
/claudecode/test_github_action_audit.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Pytest tests for GitHub Action audit script components.
4 | """
5 |
6 |
7 | class TestImports:
8 | """Test that all required modules can be imported."""
9 |
10 | def test_main_module_import(self):
11 | """Test that the main module can be imported."""
12 | from claudecode import github_action_audit
13 | assert hasattr(github_action_audit, 'GitHubActionClient')
14 | assert hasattr(github_action_audit, 'SimpleClaudeRunner')
15 | # SimpleFindingsFilter was removed
16 | assert hasattr(github_action_audit, 'main')
17 |
18 | def test_component_imports(self):
19 | """Test that all component modules can be imported."""
20 | from claudecode.prompts import get_security_audit_prompt
21 | from claudecode.json_parser import parse_json_with_fallbacks, extract_json_from_text
22 |
23 | # Verify they're callable/usable
24 | assert callable(get_security_audit_prompt)
25 | assert callable(parse_json_with_fallbacks)
26 | assert callable(extract_json_from_text)
27 |
28 |
29 | class TestHardExclusionRules:
30 | """Test the HardExclusionRules patterns."""
31 |
32 | def test_dos_patterns(self):
33 | """Test DOS pattern exclusions."""
34 | from claudecode.findings_filter import HardExclusionRules
35 |
36 | dos_findings = [
37 | {'description': 'Potential denial of service vulnerability'},
38 | {'description': 'DOS attack through resource exhaustion'},
39 | {'description': 'Infinite loop causing resource exhaustion'},
40 | ]
41 |
42 | for finding in dos_findings:
43 | reason = HardExclusionRules.get_exclusion_reason(finding)
44 | assert reason is not None
45 | assert 'dos' in reason.lower()
46 |
47 | def test_rate_limiting_patterns(self):
48 | """Test rate limiting pattern exclusions."""
49 | from claudecode.findings_filter import HardExclusionRules
50 |
51 | rate_limit_findings = [
52 | {'description': 'Missing rate limiting on endpoint'},
53 | {'description': 'No rate limit implemented for API'},
54 | {'description': 'Implement rate limiting for this route'},
55 | ]
56 |
57 | for finding in rate_limit_findings:
58 | reason = HardExclusionRules.get_exclusion_reason(finding)
59 | assert reason is not None
60 | assert 'rate limit' in reason.lower()
61 |
62 | def test_open_redirect_patterns(self):
63 | """Test open redirect pattern exclusions."""
64 | from claudecode.findings_filter import HardExclusionRules
65 |
66 | redirect_findings = [
67 | {'description': 'Open redirect vulnerability found'},
68 | {'description': 'Unvalidated redirect in URL parameter'},
69 | {'description': 'Redirect attack possible through user input'},
70 | ]
71 |
72 | for finding in redirect_findings:
73 | reason = HardExclusionRules.get_exclusion_reason(finding)
74 | assert reason is not None
75 | assert 'open redirect' in reason.lower()
76 |
77 | def test_markdown_file_exclusion(self):
78 | """Test that findings in .md files are excluded."""
79 | from claudecode.findings_filter import HardExclusionRules
80 |
81 | md_findings = [
82 | {'file': 'README.md', 'description': 'SQL injection vulnerability'},
83 | {'file': 'docs/security.md', 'description': 'Command injection found'},
84 | {'file': 'CHANGELOG.MD', 'description': 'XSS vulnerability'}, # Test case insensitive
85 | {'file': 'path/to/file.Md', 'description': 'Path traversal'}, # Mixed case
86 | ]
87 |
88 | for finding in md_findings:
89 | reason = HardExclusionRules.get_exclusion_reason(finding)
90 | assert reason is not None
91 | assert 'markdown' in reason.lower()
92 |
93 | def test_non_markdown_files_not_excluded(self):
94 | """Test that findings in non-.md files are not excluded due to file extension."""
95 | from claudecode.findings_filter import HardExclusionRules
96 |
97 | non_md_findings = [
98 | {'file': 'main.py', 'description': 'SQL injection vulnerability'},
99 | {'file': 'server.js', 'description': 'Command injection found'},
100 | {'file': 'index.html', 'description': 'XSS vulnerability'},
101 | {'file': 'config.yml', 'description': 'Hardcoded credentials'},
102 | {'file': 'README.txt', 'description': 'Path traversal'},
103 | {'file': 'file.mdx', 'description': 'Security issue'}, # Not .md
104 | ]
105 |
106 | for finding in non_md_findings:
107 | reason = HardExclusionRules.get_exclusion_reason(finding)
108 | # Should not be excluded for being a markdown file
109 | # (might be excluded for other reasons like DOS patterns)
110 | if reason:
111 | assert 'markdown' not in reason.lower()
112 |
113 | def test_keeps_real_vulnerabilities(self):
114 | """Test that real vulnerabilities are not excluded."""
115 | from claudecode.findings_filter import HardExclusionRules
116 |
117 | real_vulns = [
118 | {'file': 'auth.py', 'description': 'SQL injection in user authentication'},
119 | {'file': 'exec.js', 'description': 'Command injection through user input'},
120 | {'file': 'comments.php', 'description': 'Cross-site scripting in comment field'},
121 | {'file': 'upload.go', 'description': 'Path traversal in file upload'},
122 | ]
123 |
124 | for finding in real_vulns:
125 | reason = HardExclusionRules.get_exclusion_reason(finding)
126 | assert reason is None
127 |
128 |
129 | class TestJSONParser:
130 | """Test JSON parsing utilities."""
131 |
132 | def test_parse_valid_json(self):
133 | """Test parsing valid JSON."""
134 | from claudecode.json_parser import parse_json_with_fallbacks
135 |
136 | valid_json = '{"test": "data", "number": 123}'
137 | success, result = parse_json_with_fallbacks(valid_json, "test")
138 |
139 | assert success is True
140 | assert result == {"test": "data", "number": 123}
141 |
142 | def test_parse_invalid_json(self):
143 | """Test parsing invalid JSON."""
144 | from claudecode.json_parser import parse_json_with_fallbacks
145 |
146 | invalid_json = '{invalid json}'
147 | success, result = parse_json_with_fallbacks(invalid_json, "test")
148 |
149 | assert success is False
150 | assert 'error' in result
151 | assert 'Invalid JSON response' in result['error']
152 |
153 | def test_extract_json_from_text(self):
154 | """Test extracting JSON from mixed text."""
155 | from claudecode.json_parser import extract_json_from_text
156 |
157 | mixed_text = 'Some text before {"key": "value"} some text after'
158 | result = extract_json_from_text(mixed_text)
159 |
160 | assert result == {"key": "value"}
161 |
162 | def test_extract_json_from_text_no_json(self):
163 | """Test extracting JSON when none exists."""
164 | from claudecode.json_parser import extract_json_from_text
165 |
166 | plain_text = 'This is just plain text with no JSON'
167 | result = extract_json_from_text(plain_text)
168 |
169 | assert result is None
170 |
171 |
172 | class TestPromptsModule:
173 | """Test the prompts module."""
174 |
175 | def test_get_security_audit_prompt(self):
176 | """Test security audit prompt generation."""
177 | from claudecode.prompts import get_security_audit_prompt
178 |
179 | pr_data = {
180 | 'number': 123,
181 | 'title': 'Test PR',
182 | 'body': 'Test description',
183 | 'user': 'testuser',
184 | 'changed_files': 1,
185 | 'additions': 10,
186 | 'deletions': 5,
187 | 'head': {
188 | 'repo': {
189 | 'full_name': 'owner/repo'
190 | }
191 | },
192 | 'files': [
193 | {
194 | 'filename': 'test.py',
195 | 'status': 'modified',
196 | 'additions': 10,
197 | 'deletions': 5,
198 | 'patch': '@@ -1,5 +1,10 @@\n+added line'
199 | }
200 | ]
201 | }
202 |
203 | pr_diff = "diff --git a/test.py b/test.py\n+added line"
204 |
205 | prompt = get_security_audit_prompt(pr_data, pr_diff)
206 |
207 | assert isinstance(prompt, str)
208 | assert 'security' in prompt.lower()
209 | assert 'PR #123' in prompt
210 | assert 'test.py' in prompt
211 |
212 |
213 | class TestDeploymentPRDetection:
214 | """Test deployment PR title pattern matching."""
215 |
216 | def test_deployment_pr_patterns(self):
217 | """Test that deployment PR titles are correctly identified."""
218 | import re
219 |
220 | deployment_pattern = r'^Deploy\s+[a-f0-9]{6,}\s+to\s+(production|staging|development|production-services)'
221 |
222 | # These should match
223 | deployment_titles = [
224 | "Deploy 53f395b0 to production-services",
225 | "Deploy af179b5b to production",
226 | "Deploy 1a3cb909 to production",
227 | "Deploy 49c09ea5 to production-services",
228 | "Deploy 8e7acc60 to production",
229 | "Deploy e0b1fe0b to production-services",
230 | "Deploy c53e6010 to production",
231 | "Deploy 42c4a061 to production",
232 | "Deploy 9de55976 to production-services",
233 | "deploy abcdef123456 to staging", # lowercase should work
234 | "DEPLOY ABCDEF01 TO DEVELOPMENT", # uppercase should work
235 | ]
236 |
237 | for title in deployment_titles:
238 | assert re.match(deployment_pattern, title, re.IGNORECASE), f"Failed to match deployment PR: {title}"
239 |
240 | def test_non_deployment_pr_patterns(self):
241 | """Test that non-deployment PR titles are not matched."""
242 | import re
243 |
244 | deployment_pattern = r'^Deploy\s+[a-f0-9]{6,}\s+to\s+(production|staging|development|production-services)'
245 |
246 | # These should NOT match
247 | non_deployment_titles = [
248 | "Add new feature",
249 | "Fix bug in deployment script",
250 | "Update deployment documentation",
251 | "Deploy new feature to production", # No commit hash
252 | "Deploy abc to production", # Too short hash
253 | "Deploy 12345g to production", # Non-hex character
254 | "Preparing deploy af179b5b to production", # Doesn't start with Deploy
255 | "Deploy af179b5b to testing", # Wrong environment
256 | "Deploy af179b5b", # Missing environment
257 | "af179b5b to production", # Missing Deploy prefix
258 | ]
259 |
260 | for title in non_deployment_titles:
261 | assert not re.match(deployment_pattern, title, re.IGNORECASE), f"Incorrectly matched non-deployment PR: {title}"
262 |
263 |
--------------------------------------------------------------------------------
/claudecode/test_eval_engine.py:
--------------------------------------------------------------------------------
1 | """Tests for eval_engine module."""
2 |
3 | import os
4 | from unittest.mock import Mock, patch
5 | import pytest
6 | import json
7 |
8 | from claudecode.evals.eval_engine import (
9 | EvaluationEngine, EvalResult, EvalCase, run_single_evaluation
10 | )
11 |
12 |
13 | class TestEvalResult:
14 | """Test the EvalResult dataclass."""
15 |
16 | def test_eval_result_creation(self):
17 | """Test creating an EvalResult instance."""
18 | result = EvalResult(
19 | repo_name="test/repo",
20 | pr_number=123,
21 | description="Test PR",
22 | success=True,
23 | runtime_seconds=10.5,
24 | findings_count=2,
25 | detected_vulnerabilities=True
26 | )
27 |
28 | assert result.repo_name == "test/repo"
29 | assert result.pr_number == 123
30 | assert result.description == "Test PR"
31 | assert result.success is True
32 | assert result.runtime_seconds == 10.5
33 | assert result.findings_count == 2
34 | assert result.detected_vulnerabilities is True
35 | assert result.error_message == ""
36 | assert result.findings_summary is None
37 |
38 | def test_eval_result_with_error(self):
39 | """Test creating an EvalResult with error."""
40 | result = EvalResult(
41 | repo_name="test/repo",
42 | pr_number=456,
43 | description="Failed PR",
44 | success=False,
45 | runtime_seconds=5.0,
46 | findings_count=0,
47 | detected_vulnerabilities=False,
48 | error_message="Failed to clone repository"
49 | )
50 |
51 | assert result.success is False
52 | assert result.error_message == "Failed to clone repository"
53 | assert result.findings_count == 0
54 |
55 | def test_eval_result_with_findings(self):
56 | """Test creating an EvalResult with findings."""
57 | findings = [
58 | {"file": "test.py", "line": 10, "severity": "HIGH"}
59 | ]
60 | result = EvalResult(
61 | repo_name="test/repo",
62 | pr_number=789,
63 | description="PR with findings",
64 | success=True,
65 | runtime_seconds=15.0,
66 | findings_count=1,
67 | detected_vulnerabilities=True,
68 | findings_summary=findings,
69 | full_findings=findings
70 | )
71 |
72 | assert result.findings_count == 1
73 | assert result.detected_vulnerabilities is True
74 | assert result.findings_summary is not None
75 | assert len(result.findings_summary) == 1
76 |
77 | def test_eval_result_to_dict(self):
78 | """Test converting EvalResult to dictionary."""
79 | result = EvalResult(
80 | repo_name="test/repo",
81 | pr_number=123,
82 | description="Test",
83 | success=True,
84 | runtime_seconds=10.0,
85 | findings_count=0,
86 | detected_vulnerabilities=False
87 | )
88 |
89 | result_dict = result.to_dict()
90 | assert result_dict['repo_name'] == "test/repo"
91 | assert result_dict['pr_number'] == 123
92 | assert result_dict['success'] is True
93 |
94 |
95 | class TestEvalCase:
96 | """Test the EvalCase dataclass."""
97 |
98 | def test_eval_case_creation(self):
99 | """Test creating an EvalCase instance."""
100 | case = EvalCase(
101 | repo_name="test/repo",
102 | pr_number=123,
103 | description="Test case"
104 | )
105 |
106 | assert case.repo_name == "test/repo"
107 | assert case.pr_number == 123
108 | assert case.description == "Test case"
109 |
110 |
111 | class TestEvaluationEngine:
112 | """Test the EvaluationEngine class."""
113 |
114 | def test_engine_initialization(self):
115 | """Test engine initialization with API key."""
116 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
117 | engine = EvaluationEngine()
118 |
119 | assert engine.work_dir == os.path.expanduser("~/code/audit")
120 | assert engine.claude_api_key == 'test-key'
121 |
122 | def test_engine_initialization_no_api_key(self):
123 | """Test engine initialization without API key."""
124 | with patch.dict(os.environ, {}, clear=True):
125 | with pytest.raises(ValueError, match="ANTHROPIC_API_KEY"):
126 | EvaluationEngine()
127 |
128 | def test_get_eval_branch_name(self):
129 | """Test branch name generation."""
130 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
131 | engine = EvaluationEngine()
132 |
133 | case = EvalCase("owner/repo", 123)
134 | branch_name = engine._get_eval_branch_name(case)
135 |
136 | assert branch_name.startswith("eval-pr-owner-repo-123-")
137 | assert len(branch_name) > len("eval-pr-owner-repo-123-")
138 |
139 | @patch('os.path.exists')
140 | @patch('subprocess.run')
141 | def test_clean_worktrees(self, mock_run, mock_exists):
142 | """Test worktree cleanup."""
143 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
144 | # Mock git auth token call in __init__
145 | mock_run.side_effect = [
146 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token)
147 | Mock(returncode=0), # prune
148 | Mock(returncode=0, stdout=""), # list (empty)
149 | Mock(returncode=0, stdout=""), # branch --list (empty)
150 | ]
151 |
152 | engine = EvaluationEngine()
153 |
154 | mock_exists.return_value = True # repo_path exists
155 |
156 | engine._clean_worktrees("/repo/path", "eval-pr-test-123")
157 |
158 | # Should call run four times: gh auth token (in __init__), prune, list, branch --list
159 | assert mock_run.call_count == 4
160 |
161 | @patch('subprocess.run')
162 | @patch('os.path.exists')
163 | def test_setup_repository_clone(self, mock_exists, mock_run):
164 | """Test repository setup with cloning."""
165 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
166 | mock_exists.return_value = False # Repository doesn't exist
167 | mock_run.side_effect = [
168 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token)
169 | Mock(returncode=0), # git clone
170 | Mock(returncode=0), # git fetch
171 | Mock(returncode=0), # git worktree add
172 | ]
173 |
174 | engine = EvaluationEngine()
175 |
176 | case = EvalCase("owner/repo", 123)
177 | success, worktree_path, error = engine._setup_repository(case)
178 |
179 | assert success is True
180 | assert worktree_path != ""
181 | assert error == ""
182 |
183 | @patch('subprocess.run')
184 | @patch('os.path.exists')
185 | def test_setup_repository_existing(self, mock_exists, mock_run):
186 | """Test repository setup with existing repository."""
187 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
188 | # First call checks base_repo_path, second checks repo_path inside _clean_worktrees
189 | mock_exists.side_effect = [True, True]
190 | mock_run.side_effect = [
191 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token)
192 | Mock(returncode=0), # worktree prune
193 | Mock(returncode=0, stdout=""), # worktree list
194 | Mock(returncode=0, stdout=""), # git branch --list
195 | Mock(returncode=0), # git fetch
196 | Mock(returncode=0), # git worktree add
197 | ]
198 |
199 | engine = EvaluationEngine()
200 |
201 | case = EvalCase("owner/repo", 123)
202 | success, worktree_path, error = engine._setup_repository(case)
203 |
204 | assert success is True
205 | assert error == ""
206 |
207 | @patch('subprocess.run')
208 | def test_run_sast_audit_success(self, mock_run):
209 | """Test successful SAST audit run."""
210 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
211 | # Mock gh auth token call first, then the audit
212 | mock_run.side_effect = [
213 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token)
214 | Mock(returncode=0, stdout=json.dumps({
215 | "findings": [
216 | {"file": "test.py", "line": 10, "severity": "HIGH"}
217 | ]
218 | }), stderr="") # SAST audit
219 | ]
220 |
221 | engine = EvaluationEngine()
222 |
223 | case = EvalCase("owner/repo", 123)
224 | success, output, parsed, error = engine._run_sast_audit(case, "/repo/path")
225 |
226 | assert success is True
227 | assert parsed is not None
228 | assert len(parsed["findings"]) == 1
229 | assert error is None
230 |
231 | @patch('subprocess.run')
232 | def test_run_sast_audit_failure(self, mock_run):
233 | """Test failed SAST audit run."""
234 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
235 | mock_run.side_effect = [
236 | Mock(returncode=1, stdout=""), # gh auth token (fails, no token)
237 | Mock(returncode=1, stdout="", stderr="Error running audit") # SAST audit fails
238 | ]
239 |
240 | engine = EvaluationEngine()
241 |
242 | case = EvalCase("owner/repo", 123)
243 | success, output, parsed, error = engine._run_sast_audit(case, "/repo/path")
244 |
245 | assert success is False
246 | assert error is not None
247 | assert "Exit code 1" in error
248 |
249 | @patch.object(EvaluationEngine, '_setup_repository')
250 | @patch.object(EvaluationEngine, '_run_sast_audit')
251 | @patch.object(EvaluationEngine, '_cleanup_worktree')
252 | def test_run_evaluation_success(self, mock_cleanup, mock_audit, mock_setup):
253 | """Test successful evaluation run."""
254 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
255 | engine = EvaluationEngine()
256 |
257 | mock_setup.return_value = (True, "/worktree/path", "")
258 | mock_audit.return_value = (
259 | True,
260 | '{"findings": [{"file": "test.py", "line": 10}]}',
261 | {"findings": [{"file": "test.py", "line": 10}]},
262 | None
263 | )
264 |
265 | case = EvalCase("owner/repo", 123, "Test PR")
266 | result = engine.run_evaluation(case)
267 |
268 | assert result.success is True
269 | assert result.findings_count == 1
270 | assert result.detected_vulnerabilities is True
271 | assert result.findings_summary is not None
272 | assert len(result.findings_summary) == 1
273 |
274 | mock_cleanup.assert_called_once()
275 |
276 | @patch.object(EvaluationEngine, '_setup_repository')
277 | def test_run_evaluation_setup_failure(self, mock_setup):
278 | """Test evaluation with repository setup failure."""
279 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
280 | engine = EvaluationEngine()
281 |
282 | mock_setup.return_value = (False, "", "Clone failed")
283 |
284 | case = EvalCase("owner/repo", 123, "Test PR")
285 | result = engine.run_evaluation(case)
286 |
287 | assert result.success is False
288 | assert result.findings_count == 0
289 | assert "Repository setup failed" in result.error_message
290 |
291 |
292 | class TestHelperFunctions:
293 | """Test helper functions."""
294 |
295 | @patch.object(EvaluationEngine, 'run_evaluation')
296 | def test_run_single_evaluation(self, mock_run):
297 | """Test run_single_evaluation helper."""
298 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
299 | mock_result = Mock(spec=EvalResult)
300 | mock_run.return_value = mock_result
301 |
302 | case = EvalCase("owner/repo", 123)
303 | result = run_single_evaluation(case, verbose=True)
304 |
305 | assert result == mock_result
306 | mock_run.assert_called_once_with(case)
--------------------------------------------------------------------------------
/claudecode/test_github_client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Unit tests for GitHubActionClient.
4 | """
5 |
6 | import pytest
7 | import os
8 | from unittest.mock import Mock, patch
9 |
10 | from claudecode.github_action_audit import GitHubActionClient
11 |
12 |
13 | class TestGitHubActionClient:
14 | """Test GitHubActionClient functionality."""
15 |
16 | def test_init_requires_token(self):
17 | """Test that client initialization requires GITHUB_TOKEN."""
18 | # Remove token if it exists
19 | original_token = os.environ.pop('GITHUB_TOKEN', None)
20 |
21 | try:
22 | with pytest.raises(ValueError, match="GITHUB_TOKEN environment variable required"):
23 | GitHubActionClient()
24 | finally:
25 | # Restore token
26 | if original_token:
27 | os.environ['GITHUB_TOKEN'] = original_token
28 |
29 | def test_init_with_token(self):
30 | """Test successful initialization with token."""
31 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
32 | client = GitHubActionClient()
33 | assert client.github_token == 'test-token'
34 | assert client.headers['Authorization'] == 'Bearer test-token'
35 | assert 'Accept' in client.headers
36 | assert 'X-GitHub-Api-Version' in client.headers
37 |
38 | @patch('requests.get')
39 | def test_get_pr_data_success(self, mock_get):
40 | """Test successful PR data retrieval."""
41 | # Mock responses
42 | pr_response = Mock()
43 | pr_response.json.return_value = {
44 | 'number': 123,
45 | 'title': 'Test PR',
46 | 'body': 'PR description',
47 | 'user': {'login': 'testuser'},
48 | 'created_at': '2024-01-01T00:00:00Z',
49 | 'updated_at': '2024-01-01T01:00:00Z',
50 | 'state': 'open',
51 | 'head': {
52 | 'ref': 'feature-branch',
53 | 'sha': 'abc123',
54 | 'repo': {
55 | 'full_name': 'owner/repo'
56 | }
57 | },
58 | 'base': {
59 | 'ref': 'main',
60 | 'sha': 'def456'
61 | },
62 | 'additions': 50,
63 | 'deletions': 10,
64 | 'changed_files': 3
65 | }
66 |
67 | files_response = Mock()
68 | files_response.json.return_value = [
69 | {
70 | 'filename': 'src/main.py',
71 | 'status': 'modified',
72 | 'additions': 30,
73 | 'deletions': 5,
74 | 'changes': 35,
75 | 'patch': '@@ -1,5 +1,10 @@\n+import os\n def main():'
76 | },
77 | {
78 | 'filename': 'tests/test_main.py',
79 | 'status': 'added',
80 | 'additions': 20,
81 | 'deletions': 5,
82 | 'changes': 25,
83 | 'patch': '@@ -0,0 +1,20 @@\n+def test_main():'
84 | }
85 | ]
86 |
87 | mock_get.side_effect = [pr_response, files_response]
88 |
89 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
90 | client = GitHubActionClient()
91 | result = client.get_pr_data('owner/repo', 123)
92 |
93 | # Verify API calls
94 | assert mock_get.call_count == 2
95 | mock_get.assert_any_call(
96 | 'https://api.github.com/repos/owner/repo/pulls/123',
97 | headers=client.headers
98 | )
99 | mock_get.assert_any_call(
100 | 'https://api.github.com/repos/owner/repo/pulls/123/files?per_page=100',
101 | headers=client.headers
102 | )
103 |
104 | # Verify result structure
105 | assert result['number'] == 123
106 | assert result['title'] == 'Test PR'
107 | assert result['user'] == 'testuser'
108 | assert len(result['files']) == 2
109 | assert result['files'][0]['filename'] == 'src/main.py'
110 | assert result['files'][1]['status'] == 'added'
111 |
112 | @patch('requests.get')
113 | def test_get_pr_data_null_head_repo(self, mock_get):
114 | """Test PR data retrieval when head repo is null (deleted fork)."""
115 | pr_response = Mock()
116 | pr_response.json.return_value = {
117 | 'number': 123,
118 | 'title': 'Test PR',
119 | # Don't include body key to test the get() default
120 | 'user': {'login': 'testuser'},
121 | 'created_at': '2024-01-01T00:00:00Z',
122 | 'updated_at': '2024-01-01T01:00:00Z',
123 | 'state': 'open',
124 | 'head': {
125 | 'ref': 'feature-branch',
126 | 'sha': 'abc123',
127 | 'repo': None # Deleted fork
128 | },
129 | 'base': {
130 | 'ref': 'main',
131 | 'sha': 'def456'
132 | },
133 | 'additions': 50,
134 | 'deletions': 10,
135 | 'changed_files': 3
136 | }
137 |
138 | files_response = Mock()
139 | files_response.json.return_value = []
140 |
141 | mock_get.side_effect = [pr_response, files_response]
142 |
143 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
144 | client = GitHubActionClient()
145 | result = client.get_pr_data('owner/repo', 123)
146 |
147 | # Should use original repo name when head repo is None
148 | assert result['head']['repo']['full_name'] == 'owner/repo'
149 | # The implementation passes None through, test should match that
150 | assert result['body'] == ''
151 |
152 | @patch('requests.get')
153 | def test_get_pr_data_api_error(self, mock_get):
154 | """Test PR data retrieval with API error."""
155 | mock_response = Mock()
156 | mock_response.raise_for_status.side_effect = Exception("API Error")
157 | mock_get.return_value = mock_response
158 |
159 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
160 | client = GitHubActionClient()
161 | with pytest.raises(Exception, match="API Error"):
162 | client.get_pr_data('owner/repo', 123)
163 |
164 | @patch('requests.get')
165 | def test_get_pr_diff_success(self, mock_get):
166 | """Test successful PR diff retrieval."""
167 | diff_content = """diff --git a/src/main.py b/src/main.py
168 | index abc123..def456 100644
169 | --- a/src/main.py
170 | +++ b/src/main.py
171 | @@ -1,5 +1,10 @@
172 | +import os
173 | def main():
174 | print("Hello")
175 | + # New feature
176 | + process_data()
177 | """
178 |
179 | mock_response = Mock()
180 | mock_response.text = diff_content
181 | mock_response.raise_for_status.return_value = None
182 | mock_get.return_value = mock_response
183 |
184 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
185 | client = GitHubActionClient()
186 | result = client.get_pr_diff('owner/repo', 123)
187 |
188 | # Verify API call
189 | mock_get.assert_called_once()
190 | call_args = mock_get.call_args
191 | assert call_args[0][0] == 'https://api.github.com/repos/owner/repo/pulls/123'
192 | assert call_args[1]['headers']['Accept'] == 'application/vnd.github.diff'
193 |
194 | # Verify result
195 | assert 'import os' in result
196 | assert 'process_data()' in result
197 |
198 | @patch('requests.get')
199 | def test_get_pr_diff_filters_generated_files(self, mock_get):
200 | """Test that generated files are filtered from diff."""
201 | diff_with_generated = """diff --git a/src/main.py b/src/main.py
202 | index abc123..def456 100644
203 | --- a/src/main.py
204 | +++ b/src/main.py
205 | @@ -1,5 +1,10 @@
206 | +import os
207 | def main():
208 | print("Hello")
209 | diff --git a/generated/code.py b/generated/code.py
210 | index 111..222 100644
211 | --- a/generated/code.py
212 | +++ b/generated/code.py
213 | @@ -1,3 +1,5 @@
214 | # @generated by protoc
215 | +# More generated code
216 | +print("generated")
217 | diff --git a/src/feature.py b/src/feature.py
218 | index 333..444 100644
219 | --- a/src/feature.py
220 | +++ b/src/feature.py
221 | @@ -1,3 +1,5 @@
222 | +# Real code
223 | def feature():
224 | pass
225 | """
226 |
227 | mock_response = Mock()
228 | mock_response.text = diff_with_generated
229 | mock_response.raise_for_status.return_value = None
230 | mock_get.return_value = mock_response
231 |
232 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
233 | client = GitHubActionClient()
234 | result = client.get_pr_diff('owner/repo', 123)
235 |
236 | # Verify generated file is filtered out
237 | assert 'src/main.py' in result
238 | assert 'src/feature.py' in result
239 | assert 'generated/code.py' not in result
240 | assert '@generated' not in result
241 | assert 'More generated code' not in result
242 |
243 | def test_filter_generated_files_edge_cases(self):
244 | """Test edge cases in generated file filtering."""
245 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
246 | client = GitHubActionClient()
247 |
248 | # Empty diff
249 | assert client._filter_generated_files('') == ''
250 |
251 | # No diff markers - if no diff format, everything is filtered
252 | text = "Just some random text\nwith @generated in it"
253 | # Since there's no 'diff --git' marker, the split results in one section
254 | # that contains @generated, so it gets filtered out
255 | assert client._filter_generated_files(text) == ''
256 |
257 | # Multiple generated markers
258 | diff = """diff --git a/a.py b/a.py
259 | @generated by tool
260 | content
261 | diff --git a/b.py b/b.py
262 | normal content
263 | diff --git a/c.py b/c.py
264 | # This file is @generated
265 | more content
266 | """
267 | result = client._filter_generated_files(diff)
268 | assert 'a.py' not in result
269 | assert 'b.py' in result
270 | assert 'c.py' not in result
271 |
272 |
273 | class TestGitHubAPIIntegration:
274 | """Test GitHub API integration scenarios."""
275 |
276 | @patch('requests.get')
277 | def test_rate_limit_handling(self, mock_get):
278 | """Test that rate limit headers are respected."""
279 | mock_response = Mock()
280 | mock_response.headers = {
281 | 'X-RateLimit-Remaining': '0',
282 | 'X-RateLimit-Reset': '1234567890'
283 | }
284 | mock_response.status_code = 403
285 | mock_response.json.return_value = {
286 | 'message': 'API rate limit exceeded'
287 | }
288 | mock_response.raise_for_status.side_effect = Exception("Rate limit exceeded")
289 | mock_get.return_value = mock_response
290 |
291 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
292 | client = GitHubActionClient()
293 | with pytest.raises(Exception, match="Rate limit exceeded"):
294 | client.get_pr_data('owner/repo', 123)
295 |
296 | @patch('requests.get')
297 | def test_pagination_not_needed_for_pr_files(self, mock_get):
298 | """Test that PR files endpoint returns all files without pagination."""
299 | # GitHub API returns up to 3000 files per PR without pagination
300 | large_file_list = [
301 | {
302 | 'filename': f'file{i}.py',
303 | 'status': 'added',
304 | 'additions': 10,
305 | 'deletions': 0,
306 | 'changes': 10,
307 | 'patch': f'@@ -0,0 +1,10 @@\n+# File {i}'
308 | }
309 | for i in range(100) # 100 files
310 | ]
311 |
312 | pr_response = Mock()
313 | pr_response.json.return_value = {
314 | 'number': 123,
315 | 'title': 'Large PR',
316 | 'body': 'Many files',
317 | 'user': {'login': 'testuser'},
318 | 'created_at': '2024-01-01T00:00:00Z',
319 | 'updated_at': '2024-01-01T01:00:00Z',
320 | 'state': 'open',
321 | 'head': {'ref': 'feature', 'sha': 'abc123', 'repo': {'full_name': 'owner/repo'}},
322 | 'base': {'ref': 'main', 'sha': 'def456'},
323 | 'additions': 1000,
324 | 'deletions': 0,
325 | 'changed_files': 100
326 | }
327 |
328 | files_response = Mock()
329 | files_response.json.return_value = large_file_list
330 |
331 | mock_get.side_effect = [pr_response, files_response]
332 |
333 | with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
334 | client = GitHubActionClient()
335 | result = client.get_pr_data('owner/repo', 123)
336 |
337 | assert len(result['files']) == 100
338 | assert result['files'][0]['filename'] == 'file0.py'
339 | assert result['files'][99]['filename'] == 'file99.py'
340 |
--------------------------------------------------------------------------------
/action.yml:
--------------------------------------------------------------------------------
1 | name: 'Claude Code Security Reviewer'
2 | description: 'AI-powered security review GitHub Action using Claude to analyze code changes for security vulnerabilities'
3 | author: 'Anthropic'
4 |
5 | inputs:
6 | comment-pr:
7 | description: 'Whether to comment on PRs with findings'
8 | required: false
9 | default: 'true'
10 |
11 | upload-results:
12 | description: 'Whether to upload results as artifacts'
13 | required: false
14 | default: 'true'
15 |
16 | exclude-directories:
17 | description: 'Comma-separated list of directories to exclude from scanning'
18 | required: false
19 | default: ''
20 |
21 | claudecode-timeout:
22 | description: 'Timeout for ClaudeCode analysis in minutes'
23 | required: false
24 | default: '20'
25 |
26 | claude-api-key:
27 | description: 'Anthropic Claude API key for security analysis'
28 | required: true
29 | default: ''
30 |
31 | claude-model:
32 | description: 'Claude model to use for security analysis (e.g., claude-sonnet-4-20250514)'
33 | required: false
34 | default: ''
35 |
36 | run-every-commit:
37 | description: 'Run ClaudeCode on every commit (skips cache check). Warning: This may lead to more false positives on PRs with many commits as the AI analyzes the same code multiple times.'
38 | required: false
39 | default: 'false'
40 |
41 | false-positive-filtering-instructions:
42 | description: 'Path to custom false positive filtering instructions text file'
43 | required: false
44 | default: ''
45 |
46 | custom-security-scan-instructions:
47 | description: 'Path to custom security scan instructions text file to append to audit prompt'
48 | required: false
49 | default: ''
50 |
51 | outputs:
52 | findings-count:
53 | description: 'Number of security findings'
54 | value: ${{ steps.claudecode-scan.outputs.findings_count }}
55 |
56 | results-file:
57 | description: 'Path to the results JSON file'
58 | value: ${{ steps.claudecode-scan.outputs.results_file }}
59 |
60 | runs:
61 | using: 'composite'
62 | steps:
63 | - name: Install GitHub CLI
64 | shell: bash
65 | run: |
66 | echo "::group::Install gh CLI"
67 | # Install GitHub CLI for PR operations
68 | sudo apt-get update && sudo apt-get install -y gh
69 | echo "::endgroup::"
70 |
71 | - name: Set up Python
72 | uses: actions/setup-python@v5
73 | with:
74 | python-version: '3.x'
75 |
76 | - name: Check ClaudeCode run history
77 | id: claudecode-history
78 | if: github.event_name == 'pull_request'
79 | uses: actions/cache@v4
80 | with:
81 | path: .claudecode-marker
82 | key: claudecode-${{ github.repository_id }}-pr-${{ github.event.pull_request.number }}-${{ github.sha }}
83 | restore-keys: |
84 | claudecode-${{ github.repository_id }}-pr-${{ github.event.pull_request.number }}-
85 |
86 | - name: Determine ClaudeCode enablement
87 | id: claudecode-check
88 | shell: bash
89 | env:
90 | PR_NUMBER: ${{ github.event.pull_request.number }}
91 | RUN_EVERY_COMMIT: ${{ inputs.run-every-commit }}
92 | run: |
93 | # Check if ClaudeCode should be enabled
94 | ENABLE_CLAUDECODE="true"
95 | SILENCE_CLAUDECODE_COMMENTS="false"
96 |
97 | # For PRs, check sampling and cache
98 | if [ "${{ github.event_name }}" == "pull_request" ]; then
99 | PR_NUMBER="$PR_NUMBER"
100 | CACHE_HIT="${{ steps.claudecode-history.outputs.cache-hit }}"
101 |
102 | # Now check cache - if ClaudeCode has already run, disable unless run-every-commit is true
103 | # Check if marker file exists (cache may have been restored from a different SHA)
104 | if [ "$RUN_EVERY_COMMIT" != "true" ] && [ -f ".claudecode-marker/marker.json" ]; then
105 | echo "ClaudeCode has already run on PR #$PR_NUMBER (found marker file), forcing disable to avoid false positives"
106 | ENABLE_CLAUDECODE="false"
107 | elif [ "$RUN_EVERY_COMMIT" == "true" ] && [ -f ".claudecode-marker/marker.json" ]; then
108 | echo "ClaudeCode has already run on PR #$PR_NUMBER but run-every-commit is enabled, running again"
109 | elif [ "$ENABLE_CLAUDECODE" == "true" ]; then
110 | echo "ClaudeCode will run for PR #$PR_NUMBER (first run)"
111 | fi
112 | fi
113 |
114 | echo "enable_claudecode=$ENABLE_CLAUDECODE" >> $GITHUB_OUTPUT
115 | echo "silence_claudecode_comments=$SILENCE_CLAUDECODE_COMMENTS" >> $GITHUB_OUTPUT
116 |
117 | if [ "$ENABLE_CLAUDECODE" == "true" ]; then
118 | echo "ClaudeCode is enabled for this run"
119 | else
120 | echo "ClaudeCode is disabled for this run"
121 | fi
122 |
123 | - name: Reserve ClaudeCode slot to prevent race conditions
124 | if: steps.claudecode-check.outputs.enable_claudecode == 'true' && github.event_name == 'pull_request'
125 | shell: bash
126 | env:
127 | REPOSITORY_ID: ${{ github.repository_id }}
128 | REPOSITORY: ${{ github.repository }}
129 | PR_NUMBER: ${{ github.event.pull_request.number }}
130 | SHA: ${{ github.sha }}
131 | RUN_ID: ${{ github.run_id }}
132 | RUN_NUMBER: ${{ github.run_number }}
133 | run: |
134 | # Create a reservation marker immediately to prevent other concurrent runs
135 | mkdir -p .claudecode-marker
136 | cat > .claudecode-marker/marker.json << EOF
137 | {
138 | "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
139 | "repository_id": "$REPOSITORY_ID",
140 | "repository": "$REPOSITORY",
141 | "pr_number": $PR_NUMBER,
142 | "sha": "$SHA",
143 | "status": "reserved",
144 | "run_id": "$RUN_ID",
145 | "run_number": "$RUN_NUMBER"
146 | }
147 | EOF
148 | echo "Created ClaudeCode reservation marker for PR #$PR_NUMBER"
149 |
150 | - name: Save ClaudeCode reservation to cache
151 | if: steps.claudecode-check.outputs.enable_claudecode == 'true' && github.event_name == 'pull_request'
152 | uses: actions/cache/save@v4
153 | with:
154 | path: .claudecode-marker
155 | key: claudecode-${{ github.repository_id }}-pr-${{ github.event.pull_request.number }}-${{ github.sha }}
156 |
157 | - name: Set up Node.js
158 | if: steps.claudecode-check.outputs.enable_claudecode == 'true'
159 | uses: actions/setup-node@v4
160 | with:
161 | node-version: '18'
162 |
163 | - name: Install dependencies
164 | shell: bash
165 | env:
166 | ACTION_PATH: ${{ github.action_path }}
167 | run: |
168 | echo "::group::Install Deps"
169 | if [ "${{ steps.claudecode-check.outputs.enable_claudecode }}" == "true" ]; then
170 | pip install -r "$ACTION_PATH/claudecode/requirements.txt"
171 | npm install -g @anthropic-ai/claude-code
172 | fi
173 | sudo apt-get update && sudo apt-get install -y jq
174 | echo "::endgroup::"
175 |
176 | - name: Run ClaudeCode scan
177 | id: claudecode-scan
178 | if: steps.claudecode-check.outputs.enable_claudecode == 'true'
179 | shell: bash
180 | env:
181 | GITHUB_TOKEN: ${{ github.token }}
182 | GITHUB_REPOSITORY: ${{ github.repository }}
183 | PR_NUMBER: ${{ github.event.pull_request.number }}
184 | ANTHROPIC_API_KEY: ${{ inputs.claude-api-key }}
185 | ENABLE_CLAUDE_FILTERING: 'true'
186 | EXCLUDE_DIRECTORIES: ${{ inputs.exclude-directories }}
187 | FALSE_POSITIVE_FILTERING_INSTRUCTIONS: ${{ inputs.false-positive-filtering-instructions }}
188 | CUSTOM_SECURITY_SCAN_INSTRUCTIONS: ${{ inputs.custom-security-scan-instructions }}
189 | CLAUDE_MODEL: ${{ inputs.claude-model }}
190 | CLAUDECODE_TIMEOUT: ${{ inputs.claudecode-timeout }}
191 | ACTION_PATH: ${{ github.action_path }}
192 | run: |
193 | echo "Running ClaudeCode AI security analysis..."
194 | echo "----------------------------------------"
195 |
196 | # Initialize outputs
197 | echo "findings_count=0" >> $GITHUB_OUTPUT
198 | echo "results_file=claudecode/claudecode-results.json" >> $GITHUB_OUTPUT
199 |
200 | # Skip ClaudeCode if not a PR
201 | if [ "${{ github.event_name }}" != "pull_request" ]; then
202 | echo "ClaudeCode only runs on pull requests, skipping"
203 | exit 0
204 | fi
205 |
206 | # Validate API key is provided
207 | if [ -z "$ANTHROPIC_API_KEY" ]; then
208 | echo "::error::ANTHROPIC_API_KEY is not set. Please provide the claude-api-key input to the action."
209 | echo "Example usage:"
210 | echo " - uses: anthropics/claude-code-security-reviewer@main"
211 | echo " with:"
212 | echo " claude-api-key: \$\{{ secrets.ANTHROPIC_API_KEY }}"
213 | exit 1
214 | fi
215 |
216 | # Set timeout
217 | export CLAUDE_TIMEOUT="$CLAUDECODE_TIMEOUT"
218 |
219 | # Run ClaudeCode audit with verbose debugging
220 | export REPO_PATH=$(pwd)
221 | cd "$ACTION_PATH"
222 |
223 | # Enable verbose debugging
224 | echo "::group::ClaudeCode Environment"
225 | echo "Current directory: $(pwd)"
226 | echo "Python version: $(python --version)"
227 | echo "Claude CLI version: $(claude --version 2>&1 || echo 'Claude CLI not found')"
228 | echo "ANTHROPIC_API_KEY set: $(if [ -n "$ANTHROPIC_API_KEY" ]; then echo 'Yes'; else echo 'No'; fi)"
229 | echo "GITHUB_REPOSITORY: $GITHUB_REPOSITORY"
230 | echo "PR_NUMBER: $PR_NUMBER"
231 | echo "Python path: $PYTHONPATH"
232 | echo "Files in claudecode directory:"
233 | ls -la claudecode/
234 | echo "::endgroup::"
235 |
236 | echo "::group::ClaudeCode Execution"
237 | # Add current directory to Python path so it can find the claudecode module
238 | export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}$(pwd)"
239 | echo "Updated PYTHONPATH: $PYTHONPATH"
240 |
241 | # Run from the action root directory so Python can find the claudecode module
242 | python -u claudecode/github_action_audit.py > claudecode/claudecode-results.json 2>claudecode/claudecode-error.log || CLAUDECODE_EXIT_CODE=$?
243 |
244 | if [ -n "$CLAUDECODE_EXIT_CODE" ]; then
245 | echo "::warning::ClaudeCode exited with code $CLAUDECODE_EXIT_CODE"
246 | else
247 | echo "ClaudeCode scan completed successfully"
248 | fi
249 |
250 | # Parse ClaudeCode results and count findings regardless of exit code
251 | if [ -f claudecode/claudecode-results.json ]; then
252 | FILE_SIZE=$(wc -c < claudecode/claudecode-results.json)
253 | echo "ClaudeCode results file size: $FILE_SIZE bytes"
254 |
255 | # Check if file is empty or too small
256 | if [ "$FILE_SIZE" -lt 2 ]; then
257 | echo "::warning::ClaudeCode results file is empty or invalid (size: $FILE_SIZE bytes)"
258 | echo "::warning::ClaudeCode may have failed silently. Check claudecode-error.log"
259 | if [ -f claudecode/claudecode-error.log ]; then
260 | echo "Error log contents:"
261 | cat claudecode/claudecode-error.log
262 | fi
263 | echo "findings_count=0" >> $GITHUB_OUTPUT
264 | else
265 | echo "ClaudeCode results preview:"
266 | head -n 300 claudecode/claudecode-results.json || echo "Unable to preview results"
267 |
268 | # Check if the result is an error
269 | if jq -e '.error' claudecode/claudecode-results.json > /dev/null 2>&1; then
270 | ERROR_MSG=$(jq -r '.error' claudecode/claudecode-results.json)
271 | echo "::warning::ClaudeCode error: $ERROR_MSG"
272 | echo "findings_count=0" >> $GITHUB_OUTPUT
273 | else
274 | # Use -r to get raw output and handle potential null/missing findings array
275 | CLAUDECODE_FINDINGS_COUNT=$(jq -r '.findings | if . == null then 0 else length end' claudecode/claudecode-results.json 2>/dev/null || echo "0")
276 | echo "::debug::Extracted ClaudeCode findings count: $CLAUDECODE_FINDINGS_COUNT"
277 | echo "findings_count=$CLAUDECODE_FINDINGS_COUNT" >> $GITHUB_OUTPUT
278 | echo "ClaudeCode found $CLAUDECODE_FINDINGS_COUNT security issues"
279 |
280 | # Also create findings.json for PR comment script
281 | jq '.findings // []' claudecode/claudecode-results.json > findings.json || echo '[]' > findings.json
282 | fi
283 | fi
284 | else
285 | echo "::warning::ClaudeCode results file not found"
286 | if [ -f claudecode/claudecode-error.log ]; then
287 | echo "Error log contents:"
288 | cat claudecode/claudecode-error.log
289 | fi
290 | echo "findings_count=0" >> $GITHUB_OUTPUT
291 | fi
292 |
293 | # Always copy files to workspace root regardless of the outcome
294 | # This ensures artifact upload and PR commenting can find them
295 | if [ -f findings.json ]; then
296 | cp findings.json ${{ github.workspace }}/findings.json || true
297 | fi
298 | if [ -f claudecode/claudecode-results.json ]; then
299 | cp claudecode/claudecode-results.json ${{ github.workspace }}/claudecode-results.json || true
300 | fi
301 | if [ -f claudecode/claudecode-error.log ]; then
302 | cp claudecode/claudecode-error.log ${{ github.workspace }}/claudecode-error.log || true
303 | fi
304 |
305 | echo "::endgroup::"
306 |
307 |
308 | - name: Upload scan results
309 | if: always() && inputs.upload-results == 'true'
310 | uses: actions/upload-artifact@v4
311 | with:
312 | name: security-review-results
313 | path: |
314 | findings.json
315 | claudecode-results.json
316 | claudecode-error.log
317 | retention-days: 7
318 | if-no-files-found: ignore
319 |
320 | - name: Comment PR with findings
321 | if: github.event_name == 'pull_request' && inputs.comment-pr == 'true' && steps.claudecode-check.outputs.enable_claudecode == 'true'
322 | shell: bash
323 | env:
324 | GITHUB_TOKEN: ${{ github.token }}
325 | CLAUDECODE_FINDINGS: ${{ steps.claudecode-scan.outputs.findings_count }}
326 | SILENCE_CLAUDECODE_COMMENTS: ${{ steps.claudecode-check.outputs.silence_claudecode_comments }}
327 | ACTION_PATH: ${{ github.action_path }}
328 | run: |
329 | node "$ACTION_PATH/scripts/comment-pr-findings.js"
330 |
331 | branding:
332 | icon: 'shield'
333 | color: 'red'
334 |
--------------------------------------------------------------------------------
/claudecode/findings_filter.py:
--------------------------------------------------------------------------------
1 | """Findings filter for reducing false positives in security audit results."""
2 |
3 | import re
4 | from typing import Dict, Any, List, Tuple, Optional, Pattern
5 | import time
6 | from dataclasses import dataclass, field
7 |
8 | from claudecode.claude_api_client import ClaudeAPIClient
9 | from claudecode.constants import DEFAULT_CLAUDE_MODEL
10 | from claudecode.logger import get_logger
11 |
12 | logger = get_logger(__name__)
13 |
14 |
15 | @dataclass
16 | class FilterStats:
17 | """Statistics about the filtering process."""
18 | total_findings: int = 0
19 | hard_excluded: int = 0
20 | claude_excluded: int = 0
21 | kept_findings: int = 0
22 | exclusion_breakdown: Dict[str, int] = field(default_factory=dict)
23 | confidence_scores: List[float] = field(default_factory=list)
24 | runtime_seconds: float = 0.0
25 |
26 |
27 | class HardExclusionRules:
28 | """Hard exclusion rules for common false positives."""
29 |
30 | # Pre-compiled regex patterns for better performance
31 | _DOS_PATTERNS: List[Pattern] = [
32 | re.compile(r'\b(denial of service|dos attack|resource exhaustion)\b', re.IGNORECASE),
33 | re.compile(r'\b(exhaust|overwhelm|overload).*?(resource|memory|cpu)\b', re.IGNORECASE),
34 | re.compile(r'\b(infinite|unbounded).*?(loop|recursion)\b', re.IGNORECASE),
35 | ]
36 |
37 |
38 | _RATE_LIMITING_PATTERNS: List[Pattern] = [
39 | re.compile(r'\b(missing|lack of|no)\s+rate\s+limit', re.IGNORECASE),
40 | re.compile(r'\brate\s+limiting\s+(missing|required|not implemented)', re.IGNORECASE),
41 | re.compile(r'\b(implement|add)\s+rate\s+limit', re.IGNORECASE),
42 | re.compile(r'\bunlimited\s+(requests|calls|api)', re.IGNORECASE),
43 | ]
44 |
45 | _RESOURCE_PATTERNS: List[Pattern] = [
46 | re.compile(r'\b(resource|memory|file)\s+leak\s+potential', re.IGNORECASE),
47 | re.compile(r'\bunclosed\s+(resource|file|connection)', re.IGNORECASE),
48 | re.compile(r'\b(close|cleanup|release)\s+(resource|file|connection)', re.IGNORECASE),
49 | re.compile(r'\bpotential\s+memory\s+leak', re.IGNORECASE),
50 | re.compile(r'\b(database|thread|socket|connection)\s+leak', re.IGNORECASE),
51 | ]
52 |
53 | _OPEN_REDIRECT_PATTERNS: List[Pattern] = [
54 | re.compile(r'\b(open redirect|unvalidated redirect)\b', re.IGNORECASE),
55 | re.compile(r'\b(redirect.(attack|exploit|vulnerability))\b', re.IGNORECASE),
56 | re.compile(r'\b(malicious.redirect)\b', re.IGNORECASE),
57 | ]
58 |
59 | _MEMORY_SAFETY_PATTERNS: List[Pattern] = [
60 | re.compile(r'\b(buffer overflow|stack overflow|heap overflow)\b', re.IGNORECASE),
61 | re.compile(r'\b(oob)\s+(read|write|access)\b', re.IGNORECASE),
62 | re.compile(r'\b(out.?of.?bounds?)\b', re.IGNORECASE),
63 | re.compile(r'\b(memory safety|memory corruption)\b', re.IGNORECASE),
64 | re.compile(r'\b(use.?after.?free|double.?free|null.?pointer.?dereference)\b', re.IGNORECASE),
65 | re.compile(r'\b(segmentation fault|segfault|memory violation)\b', re.IGNORECASE),
66 | re.compile(r'\b(bounds check|boundary check|array bounds)\b', re.IGNORECASE),
67 | re.compile(r'\b(integer overflow|integer underflow|integer conversion)\b', re.IGNORECASE),
68 | re.compile(r'\barbitrary.?(memory read|pointer dereference|memory address|memory pointer)\b', re.IGNORECASE),
69 | ]
70 |
71 | _REGEX_INJECTION: List[Pattern] = [
72 | re.compile(r'\b(regex|regular expression)\s+injection\b', re.IGNORECASE),
73 | re.compile(r'\b(regex|regular expression)\s+denial of service\b', re.IGNORECASE),
74 | re.compile(r'\b(regex|regular expression)\s+flooding\b', re.IGNORECASE),
75 | ]
76 |
77 | _SSRF_PATTERNS: List[Pattern] = [
78 | re.compile(r'\b(ssrf|server\s+.?side\s+.?request\s+.?forgery)\b', re.IGNORECASE),
79 | ]
80 |
81 | @classmethod
82 | def get_exclusion_reason(cls, finding: Dict[str, Any]) -> Optional[str]:
83 | """Check if a finding should be excluded based on hard rules.
84 |
85 | Args:
86 | finding: Security finding to check
87 |
88 | Returns:
89 | Exclusion reason if finding should be excluded, None otherwise
90 | """
91 | # Check if finding is in a Markdown file
92 | file_path = finding.get('file', '')
93 | if file_path.lower().endswith('.md'):
94 | return "Finding in Markdown documentation file"
95 |
96 | description = finding.get('description', '')
97 | title = finding.get('title', '')
98 |
99 | # Handle None values
100 | if description is None:
101 | description = ''
102 | if title is None:
103 | title = ''
104 |
105 | combined_text = f"{title} {description}".lower()
106 |
107 | # Check DOS patterns
108 | for pattern in cls._DOS_PATTERNS:
109 | if pattern.search(combined_text):
110 | return "Generic DOS/resource exhaustion finding (low signal)"
111 |
112 |
113 | # Check rate limiting patterns
114 | for pattern in cls._RATE_LIMITING_PATTERNS:
115 | if pattern.search(combined_text):
116 | return "Generic rate limiting recommendation"
117 |
118 | # Check resource patterns - always exclude
119 | for pattern in cls._RESOURCE_PATTERNS:
120 | if pattern.search(combined_text):
121 | return "Resource management finding (not a security vulnerability)"
122 |
123 | # Check open redirect patterns
124 | for pattern in cls._OPEN_REDIRECT_PATTERNS:
125 | if pattern.search(combined_text):
126 | return "Open redirect vulnerability (not high impact)"
127 |
128 | # Check regex injection patterns
129 | for pattern in cls._REGEX_INJECTION:
130 | if pattern.search(combined_text):
131 | return "Regex injection finding (not applicable)"
132 |
133 | # Check memory safety patterns - exclude if NOT in C/C++ files
134 | c_cpp_extensions = {'.c', '.cc', '.cpp', '.h'}
135 | file_ext = ''
136 | if '.' in file_path:
137 | file_ext = f".{file_path.lower().split('.')[-1]}"
138 |
139 | # If file doesn't have a C/C++ extension (including no extension), exclude memory safety findings
140 | if file_ext not in c_cpp_extensions:
141 | for pattern in cls._MEMORY_SAFETY_PATTERNS:
142 | if pattern.search(combined_text):
143 | return "Memory safety finding in non-C/C++ code (not applicable)"
144 |
145 | # Check SSRF patterns - exclude if in HTML files only
146 | html_extensions = {'.html'}
147 |
148 | # If file has HTML extension, exclude SSRF findings
149 | if file_ext in html_extensions:
150 | for pattern in cls._SSRF_PATTERNS:
151 | if pattern.search(combined_text):
152 | return "SSRF finding in HTML file (not applicable to client-side code)"
153 |
154 | return None
155 |
156 |
157 | class FindingsFilter:
158 | """Main filter class for security findings."""
159 |
160 | def __init__(self,
161 | use_hard_exclusions: bool = True,
162 | use_claude_filtering: bool = True,
163 | api_key: Optional[str] = None,
164 | model: str = DEFAULT_CLAUDE_MODEL,
165 | custom_filtering_instructions: Optional[str] = None):
166 | """Initialize findings filter.
167 |
168 | Args:
169 | use_hard_exclusions: Whether to apply hard exclusion rules
170 | use_claude_filtering: Whether to use Claude API for filtering
171 | api_key: Anthropic API key for Claude filtering
172 | model: Claude model to use for filtering
173 | custom_filtering_instructions: Optional custom filtering instructions
174 | """
175 | self.use_hard_exclusions = use_hard_exclusions
176 | self.use_claude_filtering = use_claude_filtering
177 | self.custom_filtering_instructions = custom_filtering_instructions
178 |
179 | # Initialize Claude client if filtering is enabled
180 | self.claude_client = None
181 | if self.use_claude_filtering:
182 | try:
183 | self.claude_client = ClaudeAPIClient(
184 | model=model,
185 | api_key=api_key
186 | )
187 | # Validate API access
188 | valid, error = self.claude_client.validate_api_access()
189 | if not valid:
190 | logger.warning(f"Claude API validation failed: {error}")
191 | self.claude_client = None
192 | self.use_claude_filtering = False
193 | except Exception as e:
194 | logger.error(f"Failed to initialize Claude client: {str(e)}")
195 | self.use_claude_filtering = False
196 |
197 | def filter_findings(self,
198 | findings: List[Dict[str, Any]],
199 | pr_context: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any], FilterStats]:
200 | """Filter security findings to remove false positives.
201 |
202 | Args:
203 | findings: List of security findings from Claude Code audit
204 | pr_context: Optional PR context for better analysis
205 |
206 | Returns:
207 | Tuple of (success, filtered_results, stats)
208 | """
209 | start_time = time.time()
210 |
211 | if not findings:
212 | stats = FilterStats(total_findings=0, runtime_seconds=0.0)
213 | return True, {
214 | "filtered_findings": [],
215 | "excluded_findings": [],
216 | "analysis_summary": {
217 | "total_findings": 0,
218 | "kept_findings": 0,
219 | "excluded_findings": 0,
220 | "exclusion_breakdown": {}
221 | }
222 | }, stats
223 |
224 | logger.info(f"Filtering {len(findings)} security findings")
225 |
226 | # Initialize statistics
227 | stats = FilterStats(total_findings=len(findings))
228 |
229 | # Step 1: Apply hard exclusion rules
230 | findings_after_hard = []
231 | excluded_hard = []
232 |
233 | if self.use_hard_exclusions:
234 | for i, finding in enumerate(findings):
235 | exclusion_reason = HardExclusionRules.get_exclusion_reason(finding)
236 | if exclusion_reason:
237 | excluded_hard.append({
238 | "finding": finding,
239 | "index": i,
240 | "exclusion_reason": exclusion_reason,
241 | "filter_stage": "hard_rules"
242 | })
243 | stats.hard_excluded += 1
244 |
245 | # Track exclusion breakdown
246 | key = exclusion_reason.split('(')[0].strip()
247 | stats.exclusion_breakdown[key] = stats.exclusion_breakdown.get(key, 0) + 1
248 | else:
249 | findings_after_hard.append((i, finding))
250 |
251 | logger.info(f"Hard exclusions removed {stats.hard_excluded} findings")
252 | else:
253 | findings_after_hard = [(i, f) for i, f in enumerate(findings)]
254 |
255 | # Step 2: Apply Claude API filtering if enabled
256 | findings_after_claude = []
257 | excluded_claude = []
258 |
259 | if self.use_claude_filtering and self.claude_client and findings_after_hard:
260 | # Process findings individually
261 | logger.info(f"Processing {len(findings_after_hard)} findings individually through Claude API")
262 |
263 | for orig_idx, finding in findings_after_hard:
264 | # Call Claude API for single finding
265 | success, analysis_result, error_msg = self.claude_client.analyze_single_finding(
266 | finding, pr_context, self.custom_filtering_instructions
267 | )
268 |
269 | if success and analysis_result:
270 | # Process Claude's analysis for single finding
271 | confidence = analysis_result.get('confidence_score', 10.0)
272 | keep_finding = analysis_result.get('keep_finding', True)
273 | justification = analysis_result.get('justification', '')
274 | exclusion_reason = analysis_result.get('exclusion_reason')
275 |
276 | stats.confidence_scores.append(confidence)
277 |
278 | if not keep_finding:
279 | # Claude recommends excluding
280 | excluded_claude.append({
281 | "finding": finding,
282 | "confidence_score": confidence,
283 | "exclusion_reason": exclusion_reason or f"Low confidence score: {confidence}",
284 | "justification": justification,
285 | "filter_stage": "claude_api"
286 | })
287 | stats.claude_excluded += 1
288 | else:
289 | # Keep finding with metadata
290 | enriched_finding = finding.copy()
291 | enriched_finding['_filter_metadata'] = {
292 | 'confidence_score': confidence,
293 | 'justification': justification,
294 | }
295 | findings_after_claude.append(enriched_finding)
296 | stats.kept_findings += 1
297 | else:
298 | # Claude API call failed for this finding - keep it with warning
299 | logger.warning(f"Claude API call failed for finding {orig_idx}: {error_msg}")
300 | enriched_finding = finding.copy()
301 | enriched_finding['_filter_metadata'] = {
302 | 'confidence_score': 10.0, # Default high confidence
303 | 'justification': f'Claude API failed: {error_msg}',
304 | }
305 | findings_after_claude.append(enriched_finding)
306 | stats.kept_findings += 1
307 | else:
308 | # Claude filtering disabled or no client - keep all findings from hard filter
309 | for orig_idx, finding in findings_after_hard:
310 | enriched_finding = finding.copy()
311 | enriched_finding['_filter_metadata'] = {
312 | 'confidence_score': 10.0, # Default high confidence
313 | 'justification': 'Claude filtering disabled',
314 | }
315 | findings_after_claude.append(enriched_finding)
316 | stats.kept_findings += 1
317 |
318 | # Combine all excluded findings
319 | all_excluded = excluded_hard + excluded_claude
320 |
321 | # Calculate final statistics
322 | stats.runtime_seconds = time.time() - start_time
323 |
324 | # Build filtered results
325 | filtered_results = {
326 | "filtered_findings": findings_after_claude,
327 | "excluded_findings": all_excluded,
328 | "analysis_summary": {
329 | "total_findings": stats.total_findings,
330 | "kept_findings": stats.kept_findings,
331 | "excluded_findings": len(all_excluded),
332 | "hard_excluded": stats.hard_excluded,
333 | "claude_excluded": stats.claude_excluded,
334 | "exclusion_breakdown": stats.exclusion_breakdown,
335 | "average_confidence": sum(stats.confidence_scores) / len(stats.confidence_scores) if stats.confidence_scores else None,
336 | "runtime_seconds": stats.runtime_seconds
337 | }
338 | }
339 |
340 | logger.info(f"Filtering completed: {stats.kept_findings}/{stats.total_findings} findings kept "
341 | f"({stats.runtime_seconds:.1f}s)")
342 |
343 | return True, filtered_results, stats
344 |
--------------------------------------------------------------------------------
/claudecode/test_claude_runner.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Unit tests for SimpleClaudeRunner.
4 | """
5 |
6 | import json
7 | import os
8 | import subprocess
9 | from unittest.mock import Mock, patch
10 | from pathlib import Path
11 |
12 | from claudecode.github_action_audit import SimpleClaudeRunner
13 | from claudecode.constants import DEFAULT_CLAUDE_MODEL
14 |
15 |
16 | class TestSimpleClaudeRunner:
17 | """Test SimpleClaudeRunner functionality."""
18 |
19 | def test_init(self):
20 | """Test runner initialization."""
21 | runner = SimpleClaudeRunner(timeout_minutes=30)
22 | assert runner.timeout_seconds == 1800
23 |
24 | runner2 = SimpleClaudeRunner() # Default
25 | assert runner2.timeout_seconds == 1200 # 20 minutes default
26 |
27 | @patch('subprocess.run')
28 | def test_validate_claude_available_success(self, mock_run):
29 | """Test successful Claude validation."""
30 | mock_run.return_value = Mock(
31 | returncode=0,
32 | stdout='claude version 1.0.0',
33 | stderr=''
34 | )
35 |
36 | with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
37 | runner = SimpleClaudeRunner()
38 | success, error = runner.validate_claude_available()
39 |
40 | assert success is True
41 | assert error == ''
42 | mock_run.assert_called_once_with(
43 | ['claude', '--version'],
44 | capture_output=True,
45 | text=True,
46 | timeout=10
47 | )
48 |
49 | @patch('subprocess.run')
50 | def test_validate_claude_available_no_api_key(self, mock_run):
51 | """Test Claude validation without API key."""
52 | mock_run.return_value = Mock(
53 | returncode=0,
54 | stdout='claude version 1.0.0',
55 | stderr=''
56 | )
57 |
58 | # Remove API key
59 | env = os.environ.copy()
60 | env.pop('ANTHROPIC_API_KEY', None)
61 |
62 | with patch.dict(os.environ, env, clear=True):
63 | runner = SimpleClaudeRunner()
64 | success, error = runner.validate_claude_available()
65 |
66 | assert success is False
67 | assert 'ANTHROPIC_API_KEY environment variable is not set' in error
68 |
69 | @patch('subprocess.run')
70 | def test_validate_claude_available_not_installed(self, mock_run):
71 | """Test Claude validation when not installed."""
72 | mock_run.side_effect = FileNotFoundError()
73 |
74 | runner = SimpleClaudeRunner()
75 | success, error = runner.validate_claude_available()
76 |
77 | assert success is False
78 | assert 'Claude Code is not installed or not in PATH' in error
79 |
80 | @patch('subprocess.run')
81 | def test_validate_claude_available_error(self, mock_run):
82 | """Test Claude validation with error."""
83 | mock_run.return_value = Mock(
84 | returncode=1,
85 | stdout='',
86 | stderr='Error: Authentication failed'
87 | )
88 |
89 | runner = SimpleClaudeRunner()
90 | success, error = runner.validate_claude_available()
91 |
92 | assert success is False
93 | assert 'exit code 1' in error
94 | assert 'Authentication failed' in error
95 |
96 | @patch('subprocess.run')
97 | def test_validate_claude_available_timeout(self, mock_run):
98 | """Test Claude validation timeout."""
99 | mock_run.side_effect = subprocess.TimeoutExpired(['claude'], 10)
100 |
101 | runner = SimpleClaudeRunner()
102 | success, error = runner.validate_claude_available()
103 |
104 | assert success is False
105 | assert 'timed out' in error
106 |
107 | def test_run_security_audit_missing_directory(self):
108 | """Test audit with missing directory."""
109 | runner = SimpleClaudeRunner()
110 | success, error, results = runner.run_security_audit(
111 | Path('/non/existent/path'),
112 | "test prompt"
113 | )
114 |
115 | assert success is False
116 | assert 'Repository directory does not exist' in error
117 | assert results == {}
118 |
119 | @patch('subprocess.run')
120 | def test_run_security_audit_success(self, mock_run):
121 | """Test successful security audit."""
122 | # Claude Code returns wrapped format with 'result' field
123 | findings_data = {
124 | "findings": [
125 | {
126 | "file": "test.py",
127 | "line": 10,
128 | "severity": "HIGH",
129 | "description": "SQL injection vulnerability"
130 | }
131 | ],
132 | "analysis_summary": {
133 | "files_reviewed": 5,
134 | "high_severity": 1,
135 | "medium_severity": 0,
136 | "low_severity": 0,
137 | "review_completed": True
138 | }
139 | }
140 |
141 | audit_result = {
142 | "result": json.dumps(findings_data)
143 | }
144 |
145 | mock_run.return_value = Mock(
146 | returncode=0,
147 | stdout=json.dumps(audit_result),
148 | stderr=''
149 | )
150 |
151 | runner = SimpleClaudeRunner()
152 | with patch('pathlib.Path.exists', return_value=True):
153 | success, error, results = runner.run_security_audit(
154 | Path('/tmp/test'),
155 | "test prompt"
156 | )
157 |
158 | assert success is True
159 | assert error == ''
160 | assert len(results['findings']) == 1
161 | assert results['findings'][0]['severity'] == 'HIGH'
162 |
163 | # Verify subprocess call
164 | mock_run.assert_called_once()
165 | call_args = mock_run.call_args
166 | assert call_args[0][0] == [
167 | 'claude',
168 | '--output-format', 'json',
169 | '--model', DEFAULT_CLAUDE_MODEL,
170 | '--disallowed-tools', 'Bash(ps:*)'
171 | ]
172 | assert call_args[1]['input'] == 'test prompt'
173 | assert call_args[1]['cwd'] == Path('/tmp/test')
174 |
175 | @patch('subprocess.run')
176 | def test_run_security_audit_large_prompt_warning(self, mock_run, capsys):
177 | """Test warning for large prompts."""
178 | mock_run.return_value = Mock(
179 | returncode=0,
180 | stdout='{"findings": []}',
181 | stderr=''
182 | )
183 |
184 | # Create a prompt larger than 1MB
185 | large_prompt = 'x' * (1024 * 1024 + 1000)
186 |
187 | runner = SimpleClaudeRunner()
188 | with patch('pathlib.Path.exists', return_value=True):
189 | success, error, results = runner.run_security_audit(
190 | Path('/tmp/test'),
191 | large_prompt
192 | )
193 |
194 | captured = capsys.readouterr()
195 | assert '[Warning] Large prompt size' in captured.err
196 | assert success is True
197 |
198 | @patch('subprocess.run')
199 | def test_run_security_audit_retry_on_failure(self, mock_run):
200 | """Test retry logic on failure."""
201 | # First call fails, second succeeds
202 | mock_run.side_effect = [
203 | Mock(returncode=1, stdout='', stderr='Temporary error'),
204 | Mock(returncode=0, stdout='{"findings": []}', stderr='')
205 | ]
206 |
207 | runner = SimpleClaudeRunner()
208 | with patch('pathlib.Path.exists', return_value=True):
209 | success, error, results = runner.run_security_audit(
210 | Path('/tmp/test'),
211 | "test prompt"
212 | )
213 |
214 | assert success is True
215 | assert error == ''
216 | assert mock_run.call_count == 2 # Retried once
217 |
218 | @patch('subprocess.run')
219 | def test_run_security_audit_retry_on_error_during_execution(self, mock_run):
220 | """Test retry on error_during_execution result."""
221 | error_result = {
222 | "type": "result",
223 | "subtype": "error_during_execution",
224 | "error": "Temporary execution error"
225 | }
226 |
227 | success_result = {
228 | "result": json.dumps({
229 | "findings": [{"file": "test.py", "line": 1, "severity": "LOW", "description": "Issue"}],
230 | "analysis_summary": {
231 | "files_reviewed": 1,
232 | "high_severity": 0,
233 | "medium_severity": 0,
234 | "low_severity": 1,
235 | "review_completed": True
236 | }
237 | })
238 | }
239 |
240 | mock_run.side_effect = [
241 | Mock(returncode=0, stdout=json.dumps(error_result), stderr=''),
242 | Mock(returncode=0, stdout=json.dumps(success_result), stderr='')
243 | ]
244 |
245 | runner = SimpleClaudeRunner()
246 | with patch('pathlib.Path.exists', return_value=True):
247 | success, error, results = runner.run_security_audit(
248 | Path('/tmp/test'),
249 | "test prompt"
250 | )
251 |
252 | assert success is True
253 | assert len(results['findings']) == 1
254 | assert mock_run.call_count == 2
255 |
256 | @patch('subprocess.run')
257 | def test_run_security_audit_timeout(self, mock_run):
258 | """Test timeout handling."""
259 | mock_run.side_effect = subprocess.TimeoutExpired(['claude'], 1200)
260 |
261 | runner = SimpleClaudeRunner()
262 | with patch('pathlib.Path.exists', return_value=True):
263 | success, error, results = runner.run_security_audit(
264 | Path('/tmp/test'),
265 | "test prompt"
266 | )
267 |
268 | assert success is False
269 | assert 'timed out after 20 minutes' in error
270 | assert results == {}
271 |
272 | @patch('subprocess.run')
273 | def test_run_security_audit_json_parse_failure_with_retry(self, mock_run):
274 | """Test JSON parse failure with retry."""
275 | mock_run.side_effect = [
276 | Mock(returncode=0, stdout='Invalid JSON', stderr=''),
277 | Mock(returncode=0, stdout='Still invalid', stderr='')
278 | ]
279 |
280 | runner = SimpleClaudeRunner()
281 | with patch('pathlib.Path.exists', return_value=True):
282 | success, error, results = runner.run_security_audit(
283 | Path('/tmp/test'),
284 | "test prompt"
285 | )
286 |
287 | assert success is False
288 | assert 'Failed to parse Claude output' in error
289 | assert mock_run.call_count == 2
290 |
291 | def test_extract_security_findings_claude_wrapper(self):
292 | """Test extraction from Claude Code wrapper format."""
293 | runner = SimpleClaudeRunner()
294 |
295 | # Test with result field containing JSON string
296 | claude_output = {
297 | "result": json.dumps({
298 | "findings": [
299 | {"file": "test.py", "line": 10, "severity": "HIGH"}
300 | ]
301 | })
302 | }
303 |
304 | result = runner._extract_security_findings(claude_output)
305 | assert len(result['findings']) == 1
306 | assert result['findings'][0]['file'] == 'test.py'
307 |
308 | def test_extract_security_findings_direct_format(self):
309 | """Test that direct findings format was removed - only wrapped format is supported."""
310 | runner = SimpleClaudeRunner()
311 |
312 | # Direct format (without 'result' wrapper) should return empty
313 | claude_output = {
314 | "findings": [
315 | {"file": "main.py", "line": 20, "severity": "MEDIUM"}
316 | ],
317 | "analysis_summary": {
318 | "files_reviewed": 3,
319 | "high_severity": 0,
320 | "medium_severity": 1,
321 | "low_severity": 0
322 | }
323 | }
324 |
325 | result = runner._extract_security_findings(claude_output)
326 | # Should return empty structure since direct format is not supported
327 | assert len(result['findings']) == 0
328 | assert result['analysis_summary']['review_completed'] is False
329 |
330 | def test_extract_security_findings_text_fallback(self):
331 | """Test that text fallback was removed - only JSON is supported."""
332 | runner = SimpleClaudeRunner()
333 |
334 | # Test with result containing text (not JSON)
335 | claude_output = {
336 | "result": "Found SQL injection vulnerability in database.py line 45"
337 | }
338 |
339 | # Should return empty findings since we don't parse text anymore
340 | result = runner._extract_security_findings(claude_output)
341 | assert len(result['findings']) == 0
342 | assert result['analysis_summary']['review_completed'] is False
343 |
344 | def test_extract_security_findings_empty(self):
345 | """Test extraction with no findings."""
346 | runner = SimpleClaudeRunner()
347 |
348 | # Various empty formats
349 | for output in [None, {}, {"result": ""}, {"other": "data"}]:
350 | result = runner._extract_security_findings(output)
351 | assert result['findings'] == []
352 | assert result['analysis_summary']['review_completed'] is False
353 |
354 | def test_create_findings_from_text(self):
355 | """Test that _create_findings_from_text was removed."""
356 | runner = SimpleClaudeRunner()
357 |
358 | # Method should not exist
359 | assert not hasattr(runner, '_create_findings_from_text')
360 |
361 | def test_create_findings_from_text_no_issues(self):
362 | """Test that _create_findings_from_text was removed."""
363 | runner = SimpleClaudeRunner()
364 |
365 | # Method should not exist
366 | assert not hasattr(runner, '_create_findings_from_text')
367 |
368 |
369 | class TestClaudeRunnerEdgeCases:
370 | """Test edge cases and error scenarios."""
371 |
372 | @patch('subprocess.run')
373 | def test_claude_output_formats(self, mock_run):
374 | """Test various Claude output formats."""
375 | runner = SimpleClaudeRunner()
376 |
377 | # Test nested JSON in result - result field should be string
378 | nested_output = {
379 | "type": "result",
380 | "result": json.dumps({
381 | "findings": [
382 | {"file": "test.py", "line": 1, "severity": "HIGH", "description": "Issue"}
383 | ]
384 | })
385 | }
386 |
387 | with patch('pathlib.Path.exists', return_value=True):
388 | mock_run.return_value = Mock(
389 | returncode=0,
390 | stdout=json.dumps(nested_output),
391 | stderr=''
392 | )
393 |
394 | success, error, results = runner.run_security_audit(
395 | Path('/tmp/test'),
396 | "test"
397 | )
398 |
399 | # Should extract findings from nested structure
400 | assert success is True
401 | assert len(results['findings']) == 1
402 |
403 | @patch('subprocess.run')
404 | def test_partial_json_recovery(self, mock_run):
405 | """Test recovery from partial JSON output."""
406 | # Simulate truncated JSON
407 | partial_json = '{"findings": [{"file": "test.py", "line": 10, "sev'
408 |
409 | mock_run.return_value = Mock(
410 | returncode=0,
411 | stdout=partial_json,
412 | stderr=''
413 | )
414 |
415 | runner = SimpleClaudeRunner()
416 | with patch('pathlib.Path.exists', return_value=True):
417 | success, error, results = runner.run_security_audit(
418 | Path('/tmp/test'),
419 | "test"
420 | )
421 |
422 | # Should fail to parse and retry
423 | assert mock_run.call_count == 2
424 |
425 | @patch('subprocess.run')
426 | def test_exception_handling(self, mock_run):
427 | """Test general exception handling."""
428 | mock_run.side_effect = Exception("Unexpected error")
429 |
430 | runner = SimpleClaudeRunner()
431 | with patch('pathlib.Path.exists', return_value=True):
432 | success, error, results = runner.run_security_audit(
433 | Path('/tmp/test'),
434 | "test"
435 | )
436 |
437 | assert success is False
438 | assert 'Unexpected error' in error
439 | assert results == {}
440 |
--------------------------------------------------------------------------------
/claudecode/test_findings_conversion.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Unit tests for findings conversion and edge cases.
4 | """
5 |
6 | import pytest
7 | import json
8 |
9 | from claudecode.findings_filter import FindingsFilter, HardExclusionRules
10 | from claudecode.json_parser import parse_json_with_fallbacks, extract_json_from_text
11 |
12 |
13 | def create_simple_filter():
14 | """Create a filter that only uses hard exclusions."""
15 | return FindingsFilter(use_hard_exclusions=True, use_claude_filtering=False)
16 |
17 |
18 | def filter_findings_simple(filter_instance, findings):
19 | """Helper to get simple kept/excluded tuple from FindingsFilter."""
20 | if findings is None:
21 | raise TypeError("'NoneType' object is not iterable")
22 |
23 | success, results, stats = filter_instance.filter_findings(findings)
24 | if success:
25 | kept = results.get('filtered_findings', [])
26 | excluded = results.get('excluded_findings', [])
27 | else:
28 | kept = findings
29 | excluded = []
30 | return kept, excluded
31 |
32 |
33 | class TestFindingsConversionEdgeCases:
34 | """Test edge cases in findings conversion and filtering."""
35 |
36 | def test_empty_findings_list(self):
37 | """Test filtering empty findings list."""
38 | filter = create_simple_filter()
39 | kept, excluded = filter_findings_simple(filter, [])
40 |
41 | assert kept == []
42 | assert excluded == []
43 |
44 | def test_none_findings_list(self):
45 | """Test filtering None findings list."""
46 | filter = create_simple_filter()
47 | # Should raise TypeError for None input
48 | with pytest.raises(TypeError):
49 | filter_findings_simple(filter, None)
50 |
51 | def test_malformed_finding_missing_fields(self):
52 | """Test filtering findings with missing required fields."""
53 | findings = [
54 | {'description': 'Issue 1'}, # Missing severity
55 | {'severity': 'HIGH'}, # Missing description
56 | {}, # Empty finding
57 | {'severity': 'HIGH', 'description': 'Valid issue'},
58 | ]
59 |
60 | filter = create_simple_filter()
61 | # The filter will process all findings, even with missing fields
62 | kept, excluded = filter_findings_simple(filter, findings)
63 |
64 | # All findings without exclusion patterns are kept
65 | assert len(kept) == 4
66 | assert len(excluded) == 0
67 |
68 | def test_finding_with_extra_fields(self):
69 | """Test findings with extra/unexpected fields."""
70 | findings = [
71 | {
72 | 'severity': 'HIGH',
73 | 'description': 'SQL injection',
74 | 'extra_field': 'value',
75 | 'nested': {'data': 'here'},
76 | 'array': [1, 2, 3]
77 | }
78 | ]
79 |
80 | filter = create_simple_filter()
81 | kept, excluded = filter_findings_simple(filter, findings)
82 |
83 | # Extra fields should be preserved
84 | assert len(kept) == 1
85 | assert kept[0]['extra_field'] == 'value'
86 | assert kept[0]['nested'] == {'data': 'here'}
87 |
88 | def test_unicode_in_findings(self):
89 | """Test findings with unicode characters."""
90 | findings = [
91 | {
92 | 'severity': 'HIGH',
93 | 'description': 'SQL injection in 用户输入',
94 | 'file': 'файл.py',
95 | 'exploit_scenario': 'Attacker könnte dies ausnutzen'
96 | }
97 | ]
98 |
99 | filter = create_simple_filter()
100 | kept, excluded = filter_findings_simple(filter, findings)
101 |
102 | assert len(kept) == 1
103 | assert '用户输入' in kept[0]['description']
104 | assert kept[0]['file'] == 'файл.py'
105 |
106 | def test_very_long_description(self):
107 | """Test findings with very long descriptions."""
108 | long_desc = 'A' * 10000 # 10k character description
109 | findings = [
110 | {
111 | 'severity': 'HIGH',
112 | 'description': f'SQL injection vulnerability. {long_desc}'
113 | }
114 | ]
115 |
116 | filter = create_simple_filter()
117 | kept, excluded = filter_findings_simple(filter, findings)
118 |
119 | # Should not crash on long descriptions
120 | assert len(kept) == 1
121 | assert len(kept[0]['description']) > 10000
122 |
123 | def test_special_characters_in_description(self):
124 | """Test findings with special regex characters."""
125 | findings = [
126 | {'severity': 'HIGH', 'description': 'Issue with [brackets] and (parens)'},
127 | {'severity': 'HIGH', 'description': 'Path: C:\\Users\\test\\file.py'},
128 | {'severity': 'HIGH', 'description': 'Regex pattern: .*$^[]{}'},
129 | {'severity': 'HIGH', 'description': 'Missing rate limiting for API'},
130 | ]
131 |
132 | filter = create_simple_filter()
133 | kept, excluded = filter_findings_simple(filter, findings)
134 |
135 | # Special characters shouldn't break filtering
136 | assert len(kept) == 3 # "Missing rate limiting" should be excluded
137 | assert len(excluded) == 1
138 |
139 | def test_case_sensitivity_in_exclusions(self):
140 | """Test case sensitivity in exclusion rules."""
141 | findings = [
142 | {'severity': 'HIGH', 'description': 'DENIAL OF SERVICE attack'},
143 | {'severity': 'HIGH', 'description': 'Denial Of Service issue'},
144 | {'severity': 'HIGH', 'description': 'dos vulnerability'},
145 | {'severity': 'HIGH', 'description': 'DoS attack vector'},
146 | ]
147 |
148 | filter = create_simple_filter()
149 | kept, excluded = filter_findings_simple(filter, findings)
150 |
151 | # DOS patterns use word boundaries, "dos vulnerability" doesn't match \bdos attack\b
152 | assert len(kept) == 1 # "dos vulnerability" is kept
153 | assert len(excluded) == 3
154 | assert kept[0]['description'] == 'dos vulnerability'
155 |
156 | def test_severity_normalization(self):
157 | """Test various severity formats."""
158 | findings = [
159 | {'severity': 'high', 'description': 'Issue 1'},
160 | {'severity': 'HIGH', 'description': 'Issue 2'},
161 | {'severity': 'High', 'description': 'Issue 3'},
162 | {'severity': 'CRITICAL', 'description': 'Issue 4'},
163 | {'severity': 'unknown', 'description': 'Issue 5'},
164 | {'severity': '', 'description': 'Issue 6'},
165 | {'severity': None, 'description': 'Issue 7'},
166 | ]
167 |
168 | filter = create_simple_filter()
169 | kept, excluded = filter_findings_simple(filter, findings)
170 |
171 | # All should be processed regardless of severity format
172 | assert len(kept) == 7
173 |
174 | def test_json_injection_in_findings(self):
175 | """Test findings that might contain JSON injection attempts."""
176 | findings = [
177 | {
178 | 'severity': 'HIGH',
179 | 'description': '{"injected": "json", "description": "fake"}'
180 | },
181 | {
182 | 'severity': 'HIGH',
183 | 'description': 'Issue with "}]} payload'
184 | }
185 | ]
186 |
187 | filter = create_simple_filter()
188 | kept, excluded = filter_findings_simple(filter, findings)
189 |
190 | # Should handle JSON-like content in descriptions
191 | assert len(kept) == 2
192 |
193 |
194 | class TestJsonParserEdgeCases:
195 | """Test JSON parser edge cases."""
196 |
197 | def test_parse_empty_string(self):
198 | """Test parsing empty string."""
199 | success, result = parse_json_with_fallbacks('', 'test')
200 | assert success is False
201 | # Empty string returns error structure
202 | assert 'error' in result
203 | assert "Invalid JSON response" in result['error']
204 |
205 | def test_parse_whitespace_only(self):
206 | """Test parsing whitespace-only string."""
207 | success, result = parse_json_with_fallbacks(' \n\t ', 'test')
208 | assert success is False
209 | # Whitespace returns error structure
210 | assert 'error' in result
211 | assert "Invalid JSON response" in result['error']
212 |
213 | def test_parse_truncated_json(self):
214 | """Test parsing truncated JSON."""
215 | truncated = '{"findings": [{"severity": "HIGH", "desc'
216 | success, result = parse_json_with_fallbacks(truncated, 'test')
217 | assert success is False
218 |
219 | def test_parse_json_with_comments(self):
220 | """Test parsing JSON with comments (invalid JSON)."""
221 | json_with_comments = """{
222 | "findings": [
223 | // This is a comment
224 | {"severity": "HIGH", "description": "Issue"}
225 | ]
226 | }"""
227 | success, result = parse_json_with_fallbacks(json_with_comments, 'test')
228 | assert success is False
229 |
230 | def test_parse_json_with_trailing_comma(self):
231 | """Test parsing JSON with trailing comma."""
232 | json_with_comma = '{"findings": [{"severity": "HIGH"},]}'
233 | success, result = parse_json_with_fallbacks(json_with_comma, 'test')
234 | assert success is False
235 |
236 | def test_parse_nested_json_string(self):
237 | """Test parsing JSON embedded in string."""
238 | nested = '{"result": "{\\"findings\\": [{\\"severity\\": \\"HIGH\\"}]}"}'
239 | success, result = parse_json_with_fallbacks(nested, 'test')
240 | assert success is True
241 | assert 'result' in result
242 |
243 | def test_extract_json_from_text_edge_cases(self):
244 | """Test JSON extraction from various text formats."""
245 | # No JSON
246 | assert extract_json_from_text('Just plain text') is None
247 |
248 | # Multiple JSON objects
249 | text = 'First: {"a": 1} Second: {"b": 2}'
250 | result = extract_json_from_text(text)
251 | assert result == {"a": 1} # Should extract first
252 |
253 | # JSON in markdown code block
254 | text = '''```json
255 | {"findings": [{"severity": "HIGH"}]}
256 | ```'''
257 | result = extract_json_from_text(text)
258 | assert result is not None
259 | assert 'findings' in result
260 |
261 | # Malformed JSON attempts
262 | text = 'Result: {invalid json}'
263 | assert extract_json_from_text(text) is None
264 |
265 | # Very large JSON
266 | large_obj = {"data": ["x" * 100 for _ in range(1000)]}
267 | text = f"Result: {json.dumps(large_obj)}"
268 | result = extract_json_from_text(text)
269 | assert result is not None
270 | assert len(result['data']) == 1000
271 |
272 | def test_extract_json_with_unicode(self):
273 | """Test JSON extraction with unicode."""
274 | text = 'Result: {"message": "Error: 文件未找到"}'
275 | result = extract_json_from_text(text)
276 | assert result is not None
277 | assert result['message'] == "Error: 文件未找到"
278 |
279 | def test_parse_json_arrays(self):
280 | """Test parsing JSON arrays."""
281 | # Direct array
282 | success, result = parse_json_with_fallbacks('[1, 2, 3]', 'test')
283 | assert success is True
284 | assert result == [1, 2, 3]
285 |
286 | # Array of findings
287 | findings_array = '[{"severity": "HIGH", "description": "Issue"}]'
288 | success, result = parse_json_with_fallbacks(findings_array, 'test')
289 | assert success is True
290 | assert isinstance(result, list)
291 | assert len(result) == 1
292 |
293 |
294 | class TestHardExclusionRulesEdgeCases:
295 | """Test hard exclusion rules edge cases."""
296 |
297 | def test_overlapping_patterns(self):
298 | """Test findings that match multiple exclusion patterns."""
299 | finding = {
300 | 'severity': 'HIGH',
301 | 'description': 'Denial of service via rate limiting bypass allows brute force attack'
302 | }
303 |
304 | # Matches both DOS and rate limiting patterns
305 | reason = HardExclusionRules.get_exclusion_reason(finding)
306 | assert reason is not None
307 | assert "DOS" in reason # Should match DOS pattern first
308 |
309 | def test_pattern_boundary_matching(self):
310 | """Test pattern matching at word boundaries."""
311 | findings = [
312 | {'severity': 'HIGH', 'description': 'dosomething() function'}, # Should not match DOS
313 | {'severity': 'HIGH', 'description': 'windows path issue'}, # Should not match
314 | {'severity': 'HIGH', 'description': 'pseudorandom number'}, # Should not match
315 | ]
316 |
317 | filter = create_simple_filter()
318 | kept, excluded = filter_findings_simple(filter, findings)
319 |
320 | # None should be excluded (no word boundary match)
321 | assert len(kept) == 3
322 | assert len(excluded) == 0
323 |
324 | def test_html_entities_in_description(self):
325 | """Test findings with HTML entities."""
326 | findings = [
327 | {'severity': 'HIGH', 'description': 'XSS via <script> tag'},
328 | {'severity': 'HIGH', 'description': 'Missing rate limiting & throttling'},
329 | ]
330 |
331 | filter = create_simple_filter()
332 | kept, excluded = filter_findings_simple(filter, findings)
333 |
334 | # "Missing rate limiting" should be excluded even with HTML entity
335 | assert len(kept) == 1
336 | assert len(excluded) == 1
337 | assert 'XSS' in kept[0]['description']
338 |
339 | def test_multiline_descriptions(self):
340 | """Test findings with multiline descriptions."""
341 | findings = [
342 | {
343 | 'severity': 'HIGH',
344 | 'description': '''SQL injection vulnerability
345 | in user input handling.
346 | This could lead to data exposure.'''
347 | },
348 | {
349 | 'severity': 'HIGH',
350 | 'description': '''Performance issue that could
351 | cause denial of service under
352 | heavy load conditions.'''
353 | }
354 | ]
355 |
356 | filter = create_simple_filter()
357 | kept, excluded = filter_findings_simple(filter, findings)
358 |
359 | # DOS should be found even across lines
360 | assert len(kept) == 1
361 | assert len(excluded) == 1
362 | assert 'SQL injection' in kept[0]['description']
363 |
364 |
365 | class TestFilteringCombinations:
366 | """Test combinations of filtering scenarios."""
367 |
368 | def test_mixed_valid_invalid_findings(self):
369 | """Test mix of valid, invalid, and excludable findings."""
370 | findings = [
371 | {'severity': 'HIGH', 'description': 'SQL injection'}, # Valid
372 | {'description': 'Missing severity'}, # Valid (no exclusion pattern)
373 | {'severity': 'HIGH', 'description': 'Missing rate limiting'}, # Excludable
374 | {'severity': 'MEDIUM', 'description': 'XSS vulnerability'}, # Valid
375 | {'severity': 'LOW', 'description': 'Denial of service attack'}, # Excludable
376 | {'severity': '', 'description': ''}, # Valid (no exclusion pattern)
377 | {'severity': 'HIGH', 'description': 'RCE possibility'}, # Valid
378 | ]
379 |
380 | filter = create_simple_filter()
381 | kept, excluded = filter_findings_simple(filter, findings)
382 |
383 | assert len(kept) == 5 # All except rate limiting and DOS
384 | assert len(excluded) == 2 # Rate limiting, DOS
385 |
386 | # Verify excluded findings
387 | excluded_descs = [e['finding']['description'] for e in excluded]
388 | assert 'Missing rate limiting' in excluded_descs
389 | assert 'Denial of service attack' in excluded_descs
390 |
391 | def test_duplicate_findings(self):
392 | """Test handling of duplicate findings."""
393 | finding = {'severity': 'HIGH', 'description': 'Same issue'}
394 | findings = [finding, finding, finding] # Same object repeated
395 |
396 | filter = create_simple_filter()
397 | kept, excluded = filter_findings_simple(filter, findings)
398 |
399 | # All duplicates should be kept (deduplication not filter's job)
400 | assert len(kept) == 3
401 |
402 | def test_similar_but_different_findings(self):
403 | """Test similar findings with slight differences."""
404 | findings = [
405 | {'severity': 'HIGH', 'description': 'SQL injection in login'},
406 | {'severity': 'HIGH', 'description': 'SQL injection in login()'},
407 | {'severity': 'HIGH', 'description': 'sql injection in login'},
408 | {'severity': 'MEDIUM', 'description': 'SQL injection in login'},
409 | ]
410 |
411 | filter = create_simple_filter()
412 | kept, excluded = filter_findings_simple(filter, findings)
413 |
414 | # All should be kept despite similarity
415 | assert len(kept) == 4
416 |
--------------------------------------------------------------------------------
/claudecode/test_hard_exclusion_rules.py:
--------------------------------------------------------------------------------
1 | """Unit tests for HardExclusionRules in findings_filter module."""
2 |
3 | from claudecode.findings_filter import HardExclusionRules
4 |
5 |
6 | class TestHardExclusionRules:
7 | """Test the HardExclusionRules class for filtering false positives."""
8 |
9 | def test_dos_pattern_exclusion(self):
10 | """Test exclusion of DOS-related findings."""
11 | dos_findings = [
12 | {
13 | "title": "Potential Denial of Service",
14 | "description": "This could lead to resource exhaustion"
15 | },
16 | {
17 | "title": "Resource consumption issue",
18 | "description": "Unbounded loop could exhaust CPU resources"
19 | },
20 | {
21 | "title": "Memory exhaustion",
22 | "description": "This function could overwhelm memory with large inputs"
23 | },
24 | {
25 | "title": "Stack overflow vulnerability",
26 | "description": "Infinite recursion detected"
27 | }
28 | ]
29 |
30 | for finding in dos_findings:
31 | reason = HardExclusionRules.get_exclusion_reason(finding)
32 | assert reason is not None
33 | assert "DOS/resource exhaustion" in reason
34 |
35 | def test_dos_pattern_not_excluded_with_exploit(self):
36 | """Test that stack overflow with exploit mention is not excluded."""
37 | finding = {
38 | "title": "Stack overflow exploit",
39 | "description": "This stack overflow can be exploited to execute arbitrary code",
40 | "file": "exploit.c" # Add C file so it's not excluded by memory safety rule
41 | }
42 |
43 | reason = HardExclusionRules.get_exclusion_reason(finding)
44 | assert reason is None # Should not be excluded
45 |
46 | def test_generic_validation_pattern_exclusion(self):
47 | """Test that generic validation findings are NOT excluded anymore."""
48 | validation_findings = [
49 | {
50 | "title": "Security Issue",
51 | "description": "Missing input validation"
52 | },
53 | {
54 | "title": "Security Issue",
55 | "description": "Input validation required"
56 | },
57 | {
58 | "title": "Security Issue",
59 | "description": "Validate parameters"
60 | },
61 | {
62 | "title": "Security Issue",
63 | "description": "Add input validation"
64 | }
65 | ]
66 |
67 | # Since we removed generic validation patterns, these should NOT be excluded
68 | for finding in validation_findings:
69 | reason = HardExclusionRules.get_exclusion_reason(finding)
70 | assert reason is None
71 |
72 | def test_specific_validation_not_excluded(self):
73 | """Test that specific validation issues are not excluded."""
74 | specific_findings = [
75 | {
76 | "title": "Missing input validation",
77 | "description": "SQL injection possible due to missing validation"
78 | },
79 | {
80 | "title": "No validation",
81 | "description": "Command injection vulnerability - validate shell commands"
82 | },
83 | {
84 | "title": "Missing validation",
85 | "description": "Path traversal - validate file paths"
86 | },
87 | {
88 | "title": "Add validation",
89 | "description": "Eval() used without input validation"
90 | }
91 | ]
92 |
93 | for finding in specific_findings:
94 | reason = HardExclusionRules.get_exclusion_reason(finding)
95 | assert reason is None # Should not be excluded due to specific context
96 |
97 | def test_secrets_pattern_exclusion(self):
98 | """Test that generic secrets warnings are NOT excluded anymore."""
99 | secrets_findings = [
100 | {
101 | "title": "Hardcoded password detected",
102 | "description": "Avoid hardcoding credentials in source code"
103 | },
104 | {
105 | "title": "Plaintext secrets",
106 | "description": "Credentials stored in plaintext"
107 | },
108 | {
109 | "title": "Embedded token",
110 | "description": "API key in source code"
111 | },
112 | {
113 | "title": "Password storage",
114 | "description": "Password stored in clear text"
115 | }
116 | ]
117 |
118 | # Since we removed secrets patterns, these should NOT be excluded
119 | for finding in secrets_findings:
120 | reason = HardExclusionRules.get_exclusion_reason(finding)
121 | assert reason is None
122 |
123 | def test_actual_secrets_not_excluded(self):
124 | """Test that actual exposed secrets are not excluded."""
125 | actual_secrets = [
126 | {
127 | "title": "Hardcoded password",
128 | "description": "Found actual password: 'admin123' in config file"
129 | },
130 | {
131 | "title": "API key exposed",
132 | "description": "Discovered API key in source: sk-1234567890"
133 | },
134 | {
135 | "title": "Plaintext password",
136 | "description": "Database password 'mypass' found in code"
137 | }
138 | ]
139 |
140 | for finding in actual_secrets:
141 | reason = HardExclusionRules.get_exclusion_reason(finding)
142 | assert reason is None # Should not be excluded
143 |
144 | def test_rate_limiting_pattern_exclusion(self):
145 | """Test exclusion of rate limiting recommendations."""
146 | rate_limit_findings = [
147 | {
148 | "title": "Missing rate limit",
149 | "description": "API endpoint has no rate limiting"
150 | },
151 | {
152 | "title": "Rate limiting required",
153 | "description": "Implement rate limiting for this endpoint"
154 | },
155 | {
156 | "title": "No rate limit",
157 | "description": "Unlimited requests allowed"
158 | },
159 | {
160 | "title": "Add rate limiting",
161 | "description": "This API needs rate limits"
162 | }
163 | ]
164 |
165 | for finding in rate_limit_findings:
166 | reason = HardExclusionRules.get_exclusion_reason(finding)
167 | assert reason is not None
168 | assert "rate limiting recommendation" in reason
169 |
170 | def test_resource_pattern_exclusion(self):
171 | """Test exclusion of generic resource management findings."""
172 | resource_findings = [
173 | {
174 | "title": "Security Issue",
175 | "description": "Potential memory leak detected"
176 | },
177 | {
178 | "title": "Security Issue",
179 | "description": "Resource leak potential in file handling"
180 | },
181 | {
182 | "title": "Security Issue",
183 | "description": "Unclosed resource detected in function"
184 | },
185 | {
186 | "title": "Security Issue",
187 | "description": "File cleanup required - close resource"
188 | }
189 | ]
190 |
191 | for finding in resource_findings:
192 | reason = HardExclusionRules.get_exclusion_reason(finding)
193 | assert reason is not None
194 | assert "Resource management finding" in reason
195 |
196 | def test_specific_resource_also_excluded(self):
197 | """Test that ALL resource issues are now excluded (including specific ones)."""
198 | specific_resources = [
199 | {
200 | "title": "Database connection leak",
201 | "description": "PostgreSQL connections not returned to pool"
202 | },
203 | {
204 | "title": "Thread leak",
205 | "description": "Thread pool exhaustion due to unclosed threads"
206 | },
207 | {
208 | "title": "Socket leak",
209 | "description": "TCP sockets remain open after errors"
210 | }
211 | ]
212 |
213 | # All resource issues should be excluded now
214 | for finding in specific_resources:
215 | reason = HardExclusionRules.get_exclusion_reason(finding)
216 | assert reason is not None
217 | assert "Resource management finding" in reason # Should not be excluded
218 |
219 | def test_open_redirect_pattern_exclusion(self):
220 | """Test exclusion of open redirect findings."""
221 | redirect_findings = [
222 | {
223 | "title": "Open redirect vulnerability",
224 | "description": "User input used in redirect without validation"
225 | },
226 | {
227 | "title": "Unvalidated redirect",
228 | "description": "Redirect URL not validated"
229 | },
230 | {
231 | "title": "Redirect vulnerability",
232 | "description": "Possible redirect attack"
233 | },
234 | {
235 | "title": "Malicious redirect possible",
236 | "description": "User-controlled redirect parameter"
237 | }
238 | ]
239 |
240 | for finding in redirect_findings:
241 | reason = HardExclusionRules.get_exclusion_reason(finding)
242 | assert reason is not None
243 | assert "Open redirect" in reason
244 |
245 | def test_mixed_case_handling(self):
246 | """Test that pattern matching handles mixed case correctly."""
247 | mixed_case_findings = [
248 | {
249 | "title": "DENIAL OF SERVICE",
250 | "description": "RESOURCE EXHAUSTION POSSIBLE"
251 | },
252 | {
253 | "title": "Security Issue",
254 | "description": "ADD INPUT VALIDATION"
255 | },
256 | {
257 | "title": "Security Issue",
258 | "description": "HARDCODED PASSWORD DETECTED"
259 | }
260 | ]
261 |
262 | # First finding should be excluded (DOS)
263 | reason = HardExclusionRules.get_exclusion_reason(mixed_case_findings[0])
264 | assert reason is not None
265 |
266 | # Second finding should NOT be excluded (we removed generic validation patterns)
267 | reason = HardExclusionRules.get_exclusion_reason(mixed_case_findings[1])
268 | assert reason is None
269 |
270 | # Third finding should NOT be excluded (we removed secrets patterns)
271 | reason = HardExclusionRules.get_exclusion_reason(mixed_case_findings[2])
272 | assert reason is None
273 |
274 | def test_empty_finding_handling(self):
275 | """Test handling of empty or malformed findings."""
276 | empty_findings = [
277 | {},
278 | {"title": "", "description": ""},
279 | {"title": "Some title"}, # Missing description
280 | {"description": "Some description"}, # Missing title
281 | {"title": None, "description": None}
282 | ]
283 |
284 | for finding in empty_findings:
285 | reason = HardExclusionRules.get_exclusion_reason(finding)
286 | assert reason is None # Should not crash, just return None
287 |
288 | def test_combined_patterns(self):
289 | """Test findings that match multiple patterns."""
290 | finding = {
291 | "title": "DOS and validation issue",
292 | "description": "Missing rate limit leads to resource exhaustion"
293 | }
294 |
295 | reason = HardExclusionRules.get_exclusion_reason(finding)
296 | assert reason is not None
297 | # Should match at least one pattern (DOS or rate limiting)
298 |
299 | def test_regex_special_characters(self):
300 | """Test that regex special characters in findings don't cause issues."""
301 | findings_with_special_chars = [
302 | {
303 | "title": "Issue with $pecial ch@rs",
304 | "description": "Contains [brackets] and (parentheses)"
305 | },
306 | {
307 | "title": "Path: C:\\Windows\\System32",
308 | "description": "Backslashes \\ and dots ..."
309 | },
310 | {
311 | "title": "Regex chars: .* + ? ^ $ { } ( ) [ ] \\ |",
312 | "description": "All the special regex characters"
313 | }
314 | ]
315 |
316 | for finding in findings_with_special_chars:
317 | # Should not raise regex errors
318 | reason = HardExclusionRules.get_exclusion_reason(finding)
319 | # These don't match any patterns, so should return None
320 | assert reason is None
321 |
322 | def test_performance_with_long_text(self):
323 | """Test performance with very long descriptions."""
324 | long_text = "A" * 10000 # 10k characters
325 | finding = {
326 | "title": "Long finding",
327 | "description": long_text + " denial of service " + long_text
328 | }
329 |
330 | # Should handle long text efficiently
331 | reason = HardExclusionRules.get_exclusion_reason(finding)
332 | assert reason is not None # Should find DOS pattern
333 | assert "DOS/resource exhaustion" in reason
334 |
335 | def test_memory_safety_exclusion_non_cpp_files(self):
336 | """Test that memory safety issues are excluded in non-C/C++ files."""
337 | memory_safety_findings = [
338 | {
339 | "title": "Buffer overflow vulnerability",
340 | "description": "Potential buffer overflow in string handling",
341 | "file": "app.py"
342 | },
343 | {
344 | "title": "Out of bounds access",
345 | "description": "Array out of bounds write detected",
346 | "file": "server.js"
347 | },
348 | {
349 | "title": "Memory corruption",
350 | "description": "Use after free vulnerability found",
351 | "file": "Main.java"
352 | },
353 | {
354 | "title": "Segmentation fault",
355 | "description": "Null pointer dereference causes segfault",
356 | "file": "handler.go"
357 | },
358 | {
359 | "title": "Integer overflow",
360 | "description": "Integer overflow in calculation",
361 | "file": "calc.rb"
362 | }
363 | ]
364 |
365 | for finding in memory_safety_findings:
366 | reason = HardExclusionRules.get_exclusion_reason(finding)
367 | assert reason is not None
368 | assert "Memory safety finding in non-C/C++ code" in reason
369 |
370 | def test_memory_safety_not_excluded_cpp_files(self):
371 | """Test that memory safety issues are NOT excluded in C/C++ files."""
372 | cpp_memory_findings = [
373 | {
374 | "title": "Buffer overflow",
375 | "description": "Stack buffer overflow in strcpy",
376 | "file": "main.c"
377 | },
378 | {
379 | "title": "Out of bounds write",
380 | "description": "Array index out of bounds",
381 | "file": "parser.cc"
382 | },
383 | {
384 | "title": "Memory safety",
385 | "description": "Use after free in destructor",
386 | "file": "object.cpp"
387 | },
388 | {
389 | "title": "Bounds check missing",
390 | "description": "No bounds checking on user input",
391 | "file": "input.h"
392 | }
393 | ]
394 |
395 | for finding in cpp_memory_findings:
396 | reason = HardExclusionRules.get_exclusion_reason(finding)
397 | assert reason is None # Should NOT be excluded
398 |
399 | def test_memory_safety_exclusion_case_insensitive(self):
400 | """Test that file extension checking is case insensitive."""
401 | findings = [
402 | {
403 | "title": "Buffer overflow",
404 | "description": "Buffer overflow detected",
405 | "file": "App.PY" # Uppercase extension
406 | },
407 | {
408 | "title": "Memory corruption",
409 | "description": "Memory corruption issue",
410 | "file": "SERVER.JS" # All uppercase
411 | }
412 | ]
413 |
414 | for finding in findings:
415 | reason = HardExclusionRules.get_exclusion_reason(finding)
416 | assert reason is not None
417 | assert "Memory safety finding in non-C/C++ code" in reason
418 |
419 | def test_memory_safety_no_file_extension(self):
420 | """Test handling of files without extensions."""
421 | findings = [
422 | {
423 | "title": "Buffer overflow",
424 | "description": "Buffer overflow detected",
425 | "file": "Makefile" # No extension
426 | },
427 | {
428 | "title": "Memory corruption",
429 | "description": "Memory corruption issue",
430 | "file": "" # Empty file path
431 | }
432 | ]
433 |
434 | for finding in findings:
435 | reason = HardExclusionRules.get_exclusion_reason(finding)
436 | # Should be excluded since they're not C/C++ files
437 | assert reason is not None
438 | assert "Memory safety finding in non-C/C++ code" in reason
--------------------------------------------------------------------------------