├── .ccignore ├── .github └── workflows │ ├── publish.yml │ ├── run-static.yml │ └── run-tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── AGENTS.md ├── README.md ├── pyproject.toml ├── src └── copychat │ ├── __init__.py │ ├── cli.py │ ├── core.py │ ├── format.py │ ├── patterns.py │ └── sources.py ├── tests ├── conftest.py ├── data │ ├── test1.txt │ └── test2.md ├── fixtures │ ├── .gitignore │ ├── __init__.py │ ├── config │ │ └── settings.yml │ ├── db │ │ └── schema.sql │ ├── docs │ │ └── README.md │ └── src │ │ ├── app.js │ │ ├── main.py │ │ ├── styles │ │ └── main.css │ │ ├── types.ts │ │ └── utils │ │ └── helpers.py ├── test_ccignore.py ├── test_cli.py ├── test_core.py ├── test_format.py ├── test_github_item.py ├── test_integration.py ├── test_patterns.py ├── test_sources.py └── tests │ └── data │ ├── test1.txt │ └── test2.md └── uv.lock /.ccignore: -------------------------------------------------------------------------------- 1 | # CopyChat default ignore patterns 2 | # This file uses the same syntax as .gitignore 3 | # It applies to the current directory and all subdirectories 4 | 5 | # Build artifacts 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | *.so 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | *.egg-info/ 16 | .mypy_cache/ 17 | 18 | # Editor and IDE files 19 | .idea/ 20 | .vscode/ 21 | *.swp 22 | *.swo 23 | *~ 24 | 25 | # Local development 26 | .env 27 | .venv 28 | env/ 29 | venv/ 30 | 31 | # Dependencies 32 | node_modules/ 33 | 34 | # Project-specific 35 | # Add patterns specific to your project here -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Copychat to PyPI 2 | on: 3 | release: 4 | types: [published] 5 | workflow_dispatch: 6 | 7 | jobs: 8 | publish-pypi-release: 9 | runs-on: ubuntu-latest 10 | environment: release 11 | permissions: 12 | contents: write 13 | id-token: write 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v4 17 | - name: Set up Python 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: "3.11" 21 | cache: pip 22 | cache-dependency-path: "**/pyproject.toml" 23 | - name: Install dependencies 24 | run: | 25 | pip install setuptools wheel build 26 | - name: Build 27 | run: | 28 | python -m build 29 | - name: Publish 30 | uses: pypa/gh-action-pypi-publish@release/v1 31 | with: 32 | verbose: true 33 | -------------------------------------------------------------------------------- /.github/workflows/run-static.yml: -------------------------------------------------------------------------------- 1 | name: Run static analysis 2 | 3 | env: 4 | # enable colored output 5 | # https://github.com/pytest-dev/pytest/issues/7443 6 | PY_COLORS: 1 7 | 8 | on: 9 | push: 10 | branches: ["main"] 11 | paths: 12 | - "src/**" 13 | - "tests/**" 14 | - "uv.lock" 15 | - "pyproject.toml" 16 | - ".github/workflows/**" 17 | 18 | # run on all pull requests because these checks are required and will block merges otherwise 19 | pull_request: 20 | 21 | workflow_dispatch: 22 | 23 | permissions: 24 | contents: read 25 | 26 | jobs: 27 | static_analysis: 28 | timeout-minutes: 2 29 | 30 | runs-on: ubuntu-latest 31 | 32 | steps: 33 | - uses: actions/checkout@v4 34 | - name: Install uv 35 | uses: astral-sh/setup-uv@v5 36 | with: 37 | enable-cache: true 38 | cache-dependency-glob: "uv.lock" 39 | - name: Set up Python 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: "3.12" 43 | - name: Install dependencies 44 | run: uv sync --dev 45 | - name: Run pre-commit 46 | uses: pre-commit/action@v3.0.1 47 | -------------------------------------------------------------------------------- /.github/workflows/run-tests.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | env: 4 | # enable colored output 5 | PY_COLORS: 1 6 | 7 | on: 8 | push: 9 | branches: ["main"] 10 | paths: 11 | - "src/**" 12 | - "tests/**" 13 | - "uv.lock" 14 | - "pyproject.toml" 15 | - ".github/workflows/**" 16 | 17 | # run on all pull requests because these checks are required and will block merges otherwise 18 | pull_request: 19 | 20 | workflow_dispatch: 21 | 22 | permissions: 23 | contents: read 24 | 25 | jobs: 26 | run_tests: 27 | name: "Run tests: Python ${{ matrix.python-version }} on ${{ matrix.os }}" 28 | runs-on: ${{ matrix.os }} 29 | strategy: 30 | matrix: 31 | os: [ubuntu-latest] 32 | python-version: ["3.10", "3.12"] 33 | fail-fast: false 34 | timeout-minutes: 5 35 | 36 | steps: 37 | - uses: actions/checkout@v4 38 | 39 | - name: Install uv 40 | uses: astral-sh/setup-uv@v5 41 | with: 42 | enable-cache: true 43 | cache-dependency-glob: "uv.lock" 44 | python-version: ${{ matrix.python-version }} 45 | 46 | - name: Install CopyChat 47 | run: uv sync --dev --locked 48 | 49 | - name: Run tests 50 | run: uv run pytest 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Virtual environments 24 | .env 25 | .venv 26 | env/ 27 | venv/ 28 | ENV/ 29 | env.bak/ 30 | venv.bak/ 31 | 32 | # IDE 33 | .idea/ 34 | .vscode/ 35 | *.swp 36 | *.swo 37 | *~ 38 | .project 39 | .classpath 40 | .settings/ 41 | *.sublime-workspace 42 | *.sublime-project 43 | 44 | # OS 45 | .DS_Store 46 | .DS_Store? 47 | ._* 48 | .Spotlight-V100 49 | .Trashes 50 | ehthumbs.db 51 | Thumbs.db 52 | Desktop.ini 53 | 54 | # Logs and databases 55 | *.log 56 | *.sqlite 57 | *.db 58 | 59 | # Coverage and test reports 60 | htmlcov/ 61 | .tox/ 62 | .nox/ 63 | .coverage 64 | .coverage.* 65 | .cache 66 | nosetests.xml 67 | coverage.xml 68 | *.cover 69 | *.py,cover 70 | .hypothesis/ 71 | .pytest_cache/ 72 | cover/ 73 | 74 | # Build and packaging 75 | *.manifest 76 | *.spec 77 | pip-log.txt 78 | pip-delete-this-directory.txt 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # Local development 84 | local_settings.py 85 | db.sqlite3 86 | db.sqlite3-journal 87 | 88 | # Node (in case of docs or frontend components) 89 | node_modules/ 90 | npm-debug.log* 91 | yarn-debug.log* 92 | yarn-error.log* 93 | .pnpm-debug.log* 94 | src/copychat/_version.py 95 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: true 2 | 3 | repos: 4 | - repo: https://github.com/abravalheri/validate-pyproject 5 | rev: v0.23 6 | hooks: 7 | - id: validate-pyproject 8 | 9 | - repo: https://github.com/astral-sh/ruff-pre-commit 10 | # Ruff version. 11 | rev: v0.11.4 12 | hooks: 13 | # Run the linter. 14 | - id: ruff 15 | args: [--fix, --exit-non-zero-on-fix] 16 | # Run the formatter. 17 | - id: ruff-format 18 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- 1 | # AGENTS 2 | 3 | Copychat converts project code into LLM-friendly context. This is a guide to help LLMs quickly understand and navigate the codebase. The repo is maintained by [@jlowin](https://github.com/jlowin) on GitHub[https://github.com/jlowin/copychat]. 4 | 5 | ## Project Overview 6 | 7 | Copychat is a CLI tool that prepares source code for LLM context windows by: 8 | 1. Scanning directories/files based on inclusion/exclusion patterns 9 | 2. Respecting `.gitignore` and `.ccignore` patterns 10 | 3. Formatting code with proper language tags 11 | 4. Including git diff information when requested 12 | 5. Estimating token counts for context planning 13 | 14 | ## Repository Layout 15 | 16 | * `README.md` – overview and documentation 17 | * `src/copychat/` – CLI and library implementation 18 | * `tests/` – pytest suite 19 | * `pyproject.toml` – PEP-621 metadata; build is managed by **uv** 20 | * `.github/workflows/` – CI that lints, runs tests, and publishes to PyPI 21 | * `.ccignore` – custom ignore patterns for copychat itself 22 | 23 | ## Core Components 24 | 25 | * `core.py` - Main scanning functionality, git integration, and file handling 26 | - `scan_directory()` - Primary function for finding and processing files 27 | - `DiffMode` - Enum defining different git diff display modes 28 | - Handles `.gitignore` and `.ccignore` patterns 29 | 30 | * `format.py` - Formats code for LLM consumption 31 | - `format_files()` - Formats file content with metadata 32 | - `estimate_tokens()` - Calculates approximate token usage 33 | 34 | * `cli.py` - Command-line interface 35 | - Main entry point for user interaction 36 | - Parses arguments and handles output (clipboard/file) 37 | 38 | * `sources.py` - Handles different source types (filesystem, GitHub) 39 | - `GitHubSource` - Fetches code from GitHub repositories 40 | 41 | * `patterns.py` - Defines file patterns and exclusions 42 | 43 | ## Key Workflows 44 | 45 | 1. **Basic Usage**: `copychat` scans the current directory and copies formatted code to clipboard 46 | 2. **Filtered Scanning**: `copychat --include py,js` only processes specified file types 47 | 3. **Git Integration**: `copychat --diff-mode full-with-diff` shows changes with context 48 | 4. **GitHub**: `copychat --source github:user/repo` fetches remote code 49 | 50 | ## Common CLI Flags 51 | 52 | * `--include py,js` - restrict scanned extensions 53 | * `--exclude "**/*.test.js"` - exclude specific patterns 54 | * `--diff-mode full-with-diff` - embed git diff chunks 55 | * `--diff-branch main` - compare against specific branch 56 | * `--source github:/` - pull remote code via GitHub 57 | * `--out file.md` - write to file instead of clipboard 58 | * `--depth 2` - limit directory recursion depth 59 | 60 | ## Data Flow 61 | 62 | 1. CLI parses arguments → determines source type 63 | 2. `scan_directory()` finds matching files → applies filters 64 | 3. Git diff information is added if requested 65 | 4. `format_files()` processes content → calculates tokens 66 | 5. Formatted output is sent to clipboard/file/stdout 67 | 68 | ## Common Patterns 69 | 70 | - Path handling uses `pathlib.Path` throughout 71 | - File content is processed as `(path, content)` tuples 72 | - Git operations use subprocess to call git commands 73 | - `.ccignore` supports hierarchical pattern inheritance 74 | 75 | ## Testing 76 | 77 | - `tests/test_integration.py` has examples of all major functionality 78 | - `sample_project` fixture provides test files of various types 79 | 80 | ## Development 81 | 82 | ```bash 83 | uv sync # install runtime + dev deps 84 | uv run pre-commit run --all-files 85 | uv run pytest 86 | ``` 87 | 88 | ## Release Process 89 | 90 | Pushing a semantic-version tag to `main` triggers the **Publish Copychat to PyPI** workflow, building and uploading the wheel. 91 | 92 | ## Development Guidelines 93 | 94 | 1. Fix all Ruff/Pyright warnings (`pre-commit` enforces) 95 | 2. Respect `.ccignore` and `.gitignore` patterns 96 | 3. Keep CLI flags backward-compatible 97 | 4. Document changes in `CHANGELOG.md` 98 | 99 | ## Extending the Tool 100 | 101 | - Add new file types in `patterns.py` 102 | - Add new source types in `sources.py` 103 | - Add new formatting options in `format.py` 104 | - `GitHubItem` in `sources.py` fetches issues and PRs. Pass an issue/PR URL 105 | (e.g. `owner/repo#123` or `https://github.com/owner/repo/issues/123`) directly 106 | to the main `copychat` command. 107 | 108 | **Note to LLMs**: When working on this repository, keep this AGENTS.md file up to date with new insights that would help future LLMs quickly understand the codebase structure and functionality. This file should serve as a quick reference that reduces the need for extensive code exploration. 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Copychat 📋🐈‍⬛ 2 | 3 | **Simple code-to-context.** 4 | 5 | Copychat is a lightweight CLI tool that prepares your code for conversations with LLMs. It intelligently formats your source files into chat-ready context, handling everything from file selection to git diffs. 6 | 7 | ## Features 8 | 9 | - 🎯 **Smart file selection**: Automatically identifies relevant source files while respecting `.gitignore` 10 | - 🔍 **Git-aware**: Can include diffs and focus on changed files 11 | - 📦 **GitHub integration**: Pull directly from repositories 12 | - 🎨 **Clean output**: Formats code with proper language tags and metadata 13 | - 📋 **Clipboard ready**: Results go straight to your clipboard 14 | - 🔢 **Token smart**: Estimates token count for context planning 15 | 16 | ## Running Copychat 17 | 18 | You can use [uv](https://docs.astral.sh/uv/) to run copychat directly from the command line, without needing to install it first: 19 | ```bash 20 | uvx copychat 21 | ``` 22 | 23 | Frequent users may want to add the following alias to their `.zshrc` or `.bashrc`: 24 | ```bash 25 | alias cc="uvx copychat" 26 | ``` 27 | 28 | This permits you to quickly copy context by running e.g. `cc docs/getting-started/ src/core/` from any directory, in any environment. 29 | 30 | If you want to save a few milliseconds, you can install copychat globally with `uv tool install copychat` or add it to your environment with `uv add copychat`. And of course, `pip install copychat` works too. 31 | 32 | ## Quick Start 33 | Collect, format, and copy all source code in the current directory (and subdirectories) to the clipboard: 34 | ```bash 35 | copychat 36 | ``` 37 | 38 | Copy only Python files to clipboard: 39 | ```bash 40 | copychat -i py 41 | ``` 42 | 43 | Copy specific files, including any git diffs: 44 | ```bash 45 | copychat src/ tests/test_api.py --diff-mode full-with-diff 46 | ``` 47 | 48 | Use GitHub as a source instead of the local filesystem: 49 | ```bash 50 | copychat src/ -s github:prefecthq/controlflow 51 | ``` 52 | 53 | ## Usage Guide 54 | 55 | Copychat is designed to be intuitive while offering powerful options for more complex needs. Let's walk through common use cases: 56 | 57 | ### Basic Directory Scanning 58 | 59 | At its simplest, run `copychat` in any directory to scan and format all recognized source files: 60 | 61 | ```bash 62 | copychat 63 | ``` 64 | 65 | This will scan the current directory, format all supported files, and copy the result to your clipboard. The output includes metadata like character and token counts to help you stay within LLM context limits. 66 | 67 | ### Targeting Specific Files 68 | 69 | You can specify exactly what you want to include: 70 | 71 | ```bash 72 | # Single file 73 | copychat src/main.py 74 | 75 | # Multiple specific files and directories 76 | copychat src/api.py tests/test_api.py docs/ 77 | 78 | # Glob patterns 79 | copychat src/*.py tests/**/*.md 80 | ``` 81 | 82 | ### Filtering by Language 83 | 84 | When you only want specific file types, use the `--include` flag with comma-separated extensions: 85 | 86 | ```bash 87 | # Just Python files 88 | copychat --include py 89 | 90 | # Python and JavaScript 91 | copychat --include py,js,jsx 92 | ``` 93 | 94 | ### Working with Git 95 | 96 | Copychat shines when working with git repositories. Use different diff modes to focus on what matters: 97 | 98 | ```bash 99 | # Show only files that have changed, with their diffs 100 | copychat --diff-mode changed-with-diff 101 | 102 | # Show all files, but include diffs for changed ones 103 | copychat --diff-mode full-with-diff 104 | 105 | # Show only the git diff chunks themselves 106 | copychat --diff-mode diff-only 107 | 108 | # See what changed since branching from develop 109 | copychat --diff-mode diff-only --diff-branch develop 110 | ``` 111 | 112 | The `-diff-mode` and `--diff-branch` options are particularly useful when you want to: 113 | - Review any changes you've made, either in isolation or in context 114 | - Compare changes against a specific branch 115 | 116 | ### Excluding Files 117 | 118 | You can exclude files that match certain patterns: 119 | 120 | ```bash 121 | # Skip test files 122 | copychat --exclude "**/*.test.js,**/*.spec.py" 123 | 124 | # Skip specific directories 125 | copychat --exclude "build/*,dist/*" 126 | ``` 127 | 128 | Copychat automatically respects your `.gitignore` file and common ignore patterns (like `node_modules`). 129 | 130 | ### GitHub Integration 131 | 132 | #### Reading GitHub Repositories 133 | 134 | Pull directly from GitHub repositories: 135 | 136 | ```bash 137 | # Using the github: prefix 138 | copychat --source github:username/repo 139 | 140 | # Or just paste a GitHub URL 141 | copychat --source https://github.com/username/repo 142 | 143 | # Process specific paths within the repository 144 | copychat --source github:username/repo src/main.py tests/ 145 | ``` 146 | 147 | The `--source` flag specifies where to look (GitHub, filesystem, etc.), and then any additional arguments specify which paths within that source to process. This means you can target specific files or directories within a GitHub repository just like you would with local files. 148 | 149 | #### Reading GitHub Issues, PRs & Discussions 150 | 151 | Copy the full text and comment history of a GitHub issue, pull request, or discussion by 152 | passing the identifier directly to the main command: 153 | 154 | ```bash 155 | # Issues and PRs 156 | copychat owner/repo#123 157 | copychat https://github.com/owner/repo/issues/123 158 | copychat https://github.com/owner/repo/pull/456 159 | 160 | # Discussions 161 | copychat https://github.com/owner/repo/discussions/789 162 | ``` 163 | 164 | For pull requests, the diff is included by default, giving you complete context of the proposed changes. 165 | 166 | Set `GITHUB_TOKEN` or use `--token` if you need to access private content or want higher rate limits. 167 | 168 | #### Reading Individual GitHub Files 169 | 170 | You can fetch individual files directly from GitHub without cloning the entire repository by using blob URLs: 171 | 172 | ```bash 173 | # Fetch a specific file from a commit/branch/tag 174 | copychat https://github.com/owner/repo/blob/main/src/api.py 175 | copychat https://github.com/owner/repo/blob/v1.2.3/config/settings.yaml 176 | copychat https://github.com/owner/repo/blob/abc123def/docs/README.md 177 | ``` 178 | 179 | This is perfect for quickly grabbing specific files for context without the overhead of repository cloning. 180 | 181 | The output is formatted like other files, with XML-style tags and proper language detection. 182 | 183 | ### Output Options 184 | 185 | By default, Copychat copies to your clipboard, but you have other options: 186 | 187 | ```bash 188 | # Append to clipboard 189 | copychat --append 190 | 191 | # Write to a file 192 | copychat --out context.md 193 | 194 | # Append to existing file 195 | copychat --out context.md --append 196 | 197 | # Print to screen 198 | copychat --print 199 | 200 | # Both copy to clipboard and save to file 201 | copychat --out context.md 202 | ``` 203 | 204 | ### Verbose Output 205 | 206 | Use the `--verbose` flag (or `-v`) to include detailed file information in the output, including token counts: 207 | 208 | ```bash 209 | copychat -v 210 | ``` 211 | 212 | ### Limiting Directory Depth 213 | 214 | Control how deep copychat scans subdirectories: 215 | 216 | ```bash 217 | # Only files in current directory 218 | copychat --depth 0 219 | 220 | # Current directory and immediate subdirectories only 221 | copychat --depth 1 222 | 223 | # Scan up to 3 levels deep 224 | copychat --depth 3 225 | ``` 226 | 227 | ## Options 228 | 229 | ```bash 230 | copychat [OPTIONS] [PATHS]... 231 | 232 | Options: 233 | -s, --source TEXT Source to scan (filesystem path, github:owner/repo, or URL) 234 | -o, --out PATH Write output to file 235 | -a, --append Append output instead of overwriting 236 | -p, --print Print output to screen 237 | -v, --verbose Show detailed file information in output 238 | -i, --include TEXT Extensions to include (comma-separated, e.g. 'py,js,ts') 239 | -x, --exclude TEXT Glob patterns to exclude 240 | -d, --depth INTEGER Maximum directory depth to scan (0 = current dir only) 241 | --diff-mode TEXT How to handle git diffs 242 | --diff-branch TEXT Compare changes against specified branch 243 | --debug Debug mode for development 244 | --help Show this message and exit 245 | ``` 246 | 247 | ## Supported File Types 248 | 249 | Copychat automatically recognizes and properly formats many common file types, including: 250 | 251 | - Python (`.py`, `.pyi`) 252 | - JavaScript/TypeScript (`.js`, `.ts`, `.jsx`, `.tsx`) 253 | - Web (`.html`, `.css`, `.scss`) 254 | - Systems (`.c`, `.cpp`, `.rs`, `.go`) 255 | - Config (`.yaml`, `.toml`, `.json`) 256 | - Documentation (`.md`, `.rst`, `.txt`) 257 | - And [many more](https://github.com/username/copychat/blob/main/copychat/patterns.py) 258 | 259 | ## Output Format 260 | 261 | Copychat generates clean, structured output with: 262 | - File paths and language tags 263 | - Token count estimates 264 | - Git diff information (when requested) 265 | - Proper syntax highlighting markers 266 | 267 | ## Using `.ccignore` Files 268 | 269 | CopyChat supports hierarchical ignore patterns through `.ccignore` files. These files work similarly to `.gitignore` files but with an important difference: they apply to all directories and subdirectories where they're located. 270 | 271 | ### Key Features 272 | 273 | - `.ccignore` files use the same syntax as `.gitignore` files 274 | - Each `.ccignore` file applies to its directory and all subdirectories 275 | - Patterns from multiple `.ccignore` files are inherited, with more specific directories taking precedence 276 | 277 | ### Example 278 | 279 | ``` 280 | project/ 281 | ├── .ccignore # Contains "*.log" - excludes log files in all directories 282 | ├── src/ 283 | │ ├── .ccignore # Contains "*.tmp" - excludes tmp files in src/ and below 284 | │ └── ... 285 | └── tests/ 286 | ├── .ccignore # Contains "*.fixture" - excludes fixture files in tests/ and below 287 | └── ... 288 | ``` 289 | 290 | In this example: 291 | - `*.log` files are excluded everywhere 292 | - `*.tmp` files are only excluded in `src/` and its subdirectories 293 | - `*.fixture` files are only excluded in `tests/` and its subdirectories 294 | 295 | ### Creating a `.ccignore` File 296 | 297 | Create a `.ccignore` file in your project root or any subdirectory: 298 | 299 | ``` 300 | # Comment lines start with # 301 | # Blank lines are ignored 302 | 303 | # Ignore all files with .log extension 304 | *.log 305 | 306 | # Ignore specific files 307 | secrets.json 308 | credentials.yaml 309 | 310 | # Ignore directories 311 | node_modules/ 312 | __pycache__/ 313 | ``` 314 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "copychat" 3 | dynamic = ["version"] 4 | description = "Easily copy code for LLM context" 5 | authors = [] 6 | dependencies = [ 7 | "typer>=0.9.0", 8 | "rich>=13.7.0", 9 | "pyperclip>=1.8.2", 10 | "pathspec>=0.12.1", 11 | "tiktoken>=0.6.0", 12 | "gitpython>=3.1.42", 13 | ] 14 | requires-python = ">=3.9" 15 | readme = "README.md" 16 | license = { text = "MIT" } 17 | 18 | [project.scripts] 19 | copychat = "copychat.cli:app" 20 | 21 | [build-system] 22 | requires = ["setuptools>=64", "setuptools_scm>=8"] 23 | build-backend = "setuptools.build_meta" 24 | 25 | [tool.setuptools_scm] 26 | version_file = "src/copychat/_version.py" 27 | 28 | [tool.ruff] 29 | line-length = 88 30 | target-version = "py39" 31 | 32 | [tool.pytest.ini_options] 33 | testpaths = ["tests"] 34 | addopts = "-v --tb=short" 35 | 36 | [dependency-groups] 37 | dev = [ 38 | "pre-commit>=4.2.0", 39 | "pytest>=8.3.3", 40 | "pytest-asyncio>=0.24.0", 41 | "pytest-cov>=5.0.0", 42 | ] 43 | -------------------------------------------------------------------------------- /src/copychat/__init__.py: -------------------------------------------------------------------------------- 1 | """Convert source code directories into markdown for LLM context.""" 2 | 3 | 4 | # --- Version --- 5 | 6 | try: 7 | from ._version import version as __version__ # type: ignore 8 | except ImportError: 9 | __version__ = "unknown" 10 | -------------------------------------------------------------------------------- /src/copychat/cli.py: -------------------------------------------------------------------------------- 1 | import typer 2 | from pathlib import Path 3 | from typing import Optional, List 4 | from rich.console import Console 5 | import pyperclip 6 | from enum import Enum 7 | from importlib.metadata import version as get_version 8 | import atexit 9 | import shutil 10 | 11 | from .core import ( 12 | scan_directory, 13 | DiffMode, 14 | get_file_content, 15 | ) 16 | from .format import ( 17 | format_files as format_files_xml, 18 | create_display_header, 19 | ) 20 | from .sources import GitHubSource, GitHubItem, GitHubFile 21 | 22 | 23 | # Register cleanup of temporary GitHub directory 24 | def _cleanup_github_temp(): 25 | from .sources import _github_temp_dir 26 | 27 | if _github_temp_dir is not None and _github_temp_dir.exists(): 28 | try: 29 | shutil.rmtree(_github_temp_dir) 30 | except Exception: 31 | pass # Ignore cleanup errors 32 | 33 | 34 | atexit.register(_cleanup_github_temp) 35 | 36 | 37 | class SourceType(Enum): 38 | """Type of source to scan.""" 39 | 40 | FILESYSTEM = "filesystem" # Default 41 | GITHUB = "github" 42 | WEB = "web" # For future use 43 | 44 | 45 | def parse_source(source: str) -> tuple[SourceType, str]: 46 | """Parse source string into type and location.""" 47 | import re 48 | 49 | if source.startswith(("github:", "gh:")): 50 | return SourceType.GITHUB, source.split(":", 1)[1] 51 | 52 | # Handle GitHub URLs with issues/pulls/discussions 53 | if source and source.startswith(("http://", "https://")) and "github.com" in source: 54 | pr_issue_match = re.search( 55 | r"github\.com/([^/]+/[^/]+)/(?:issues|pull|discussions)/([0-9]+)", source 56 | ) 57 | if pr_issue_match: 58 | # This is a PR, issue, or discussion URL, keep it as FILESYSTEM type so it's processed directly 59 | return SourceType.FILESYSTEM, source 60 | 61 | # Handle GitHub blob URLs (individual files) 62 | blob_match = re.search(r"github\.com/([^/]+/[^/]+)/blob/([^/]+)/(.*)", source) 63 | if blob_match: 64 | # This is a GitHub blob URL, keep it as FILESYSTEM type so it's processed directly 65 | return SourceType.FILESYSTEM, source 66 | 67 | # Regular GitHub repo URL 68 | if source and "github.com" in source: 69 | parts = source.split("github.com/", 1) 70 | if len(parts) == 2: 71 | return SourceType.GITHUB, parts[1] 72 | 73 | if source and source.startswith(("http://", "https://")): 74 | return SourceType.WEB, source 75 | 76 | return SourceType.FILESYSTEM, source 77 | 78 | 79 | def parse_github_item(item: str) -> tuple[str, int, str]: 80 | """Parse issue, PR, or discussion identifier into repo, number, and type.""" 81 | import re 82 | 83 | if item.startswith("http://") or item.startswith("https://"): 84 | m = re.search( 85 | r"github\.com/([^/]+/[^/]+)/(issues|pull|discussions)/([0-9]+)", item 86 | ) 87 | if not m: 88 | raise typer.BadParameter("Invalid GitHub URL") 89 | return ( 90 | m.group(1), 91 | int(m.group(3)), 92 | m.group(2).rstrip("s"), 93 | ) # Remove 's' from 'issues' -> 'issue' 94 | 95 | if "#" in item: 96 | repo, num = item.split("#", 1) 97 | return ( 98 | repo.strip(), 99 | int(num), 100 | "issue", 101 | ) # Default to issue for backward compatibility 102 | 103 | raise typer.BadParameter("Item must be in owner/repo#number format or URL") 104 | 105 | 106 | def parse_github_blob(item: str) -> tuple[str, str, str]: 107 | """Parse GitHub blob URL into repo, ref, and file path.""" 108 | import re 109 | 110 | match = re.search(r"github\.com/([^/]+/[^/]+)/blob/([^/]+)/(.*)", item) 111 | if not match: 112 | raise typer.BadParameter("Invalid GitHub blob URL") 113 | 114 | return match.group(1), match.group(2), match.group(3) # repo, ref, file_path 115 | 116 | 117 | def diff_mode_callback(value: str) -> DiffMode: 118 | """Convert string value to DiffMode enum.""" 119 | try: 120 | if isinstance(value, DiffMode): 121 | return value 122 | return DiffMode(value) 123 | except ValueError: 124 | valid_values = [mode.value for mode in DiffMode] 125 | raise typer.BadParameter(f"Must be one of: {', '.join(valid_values)}") 126 | 127 | 128 | app = typer.Typer( 129 | no_args_is_help=True, # Show help when no args provided 130 | add_completion=False, # Disable shell completion for simplicity 131 | ) 132 | console = Console() 133 | error_console = Console(stderr=True) 134 | 135 | 136 | @app.command() 137 | def main( 138 | paths: list[str] = typer.Argument( 139 | None, 140 | help="Paths to process within the source (defaults to current directory)", 141 | ), 142 | version: bool = typer.Option( 143 | None, 144 | "--version", 145 | help="Show version and exit.", 146 | is_eager=True, 147 | ), 148 | source: Optional[str] = typer.Option( 149 | None, 150 | "--source", 151 | "-s", 152 | help="Source to scan (filesystem path, github:owner/repo, or URL)", 153 | ), 154 | outfile: Optional[Path] = typer.Option( 155 | None, 156 | "--out", 157 | "-o", 158 | help="Write output to file. If provided, output will not be copied to clipboard.", 159 | ), 160 | append: bool = typer.Option( 161 | False, 162 | "--append", 163 | "-a", 164 | help="Append output instead of overwriting", 165 | ), 166 | print_output: bool = typer.Option( 167 | False, 168 | "--print", 169 | "-p", 170 | help="Print output to screen", 171 | ), 172 | verbose: bool = typer.Option( 173 | False, 174 | "--verbose", 175 | "-v", 176 | help="Show detailed file information in output", 177 | ), 178 | include: Optional[str] = typer.Option( 179 | None, 180 | "--include", 181 | "-i", 182 | help="Extensions to include (comma-separated, e.g. 'py,js,ts')", 183 | ), 184 | exclude: Optional[List[str]] = typer.Option( 185 | None, 186 | "--exclude", 187 | "-x", 188 | help="Glob patterns to exclude", 189 | ), 190 | diff_mode: str = typer.Option( 191 | "full", # Pass the string value instead of enum 192 | "--diff-mode", 193 | help="How to handle git diffs", 194 | callback=diff_mode_callback, 195 | ), 196 | depth: Optional[int] = typer.Option( 197 | None, 198 | "--depth", 199 | "-d", 200 | help="Maximum directory depth to scan (0 = current dir only)", 201 | ), 202 | debug: bool = typer.Option( 203 | False, 204 | "--debug", 205 | help="Debug mode for development", 206 | ), 207 | compare_branch: Optional[str] = typer.Option( 208 | None, 209 | "--diff-branch", 210 | help="Compare changes against specified branch instead of working directory", 211 | ), 212 | token: Optional[str] = typer.Option( 213 | None, 214 | "--token", 215 | envvar="GITHUB_TOKEN", 216 | help="GitHub token for issue and PR access", 217 | ), 218 | ) -> None: 219 | """Convert source code files to markdown format for LLM context.""" 220 | if version: 221 | console.print(f"copychat version {get_version('copychat')}") 222 | raise typer.Exit() 223 | 224 | try: 225 | # Parse source type and location 226 | source_type, source_loc = ( 227 | parse_source(source) if source else (SourceType.FILESYSTEM, ".") 228 | ) 229 | 230 | if debug: 231 | error_console.print( 232 | f"[magenta]Source type:[/] {source_type}, location: {source_loc}" 233 | ) 234 | error_console.print(f"[magenta]Paths to process:[/] {paths}") 235 | 236 | # Handle different source types 237 | if source_type == SourceType.GITHUB: 238 | try: 239 | github_source = GitHubSource(source_loc) 240 | source_dir = github_source.fetch() 241 | except Exception as e: 242 | if debug: 243 | raise 244 | error_console.print( 245 | f"[red]Error fetching GitHub repository:[/] {str(e)}" 246 | ) 247 | raise typer.Exit(1) 248 | elif source_type == SourceType.WEB: 249 | error_console.print("[red]Web sources not yet implemented[/]") 250 | raise typer.Exit(1) 251 | else: 252 | source_dir = Path(source_loc) 253 | 254 | # Handle file vs directory source 255 | if source_dir.is_file(): 256 | content = get_file_content( 257 | source_dir, diff_mode, compare_branch=compare_branch 258 | ) 259 | all_files = {source_dir: content} if content is not None else {} 260 | else: 261 | # For directories, scan all paths 262 | if not paths: 263 | paths = ["."] 264 | 265 | # Handle paths 266 | all_files = {} 267 | for path in paths: 268 | if debug: 269 | error_console.print(f"[cyan]Processing path:[/] {path}") 270 | 271 | # Allow GitHub issues/PRs/discussions as direct arguments 272 | try: 273 | repo, num, item_type = parse_github_item(path) 274 | if debug: 275 | error_console.print( 276 | f"[blue]Processing GitHub {item_type}:[/] {repo}#{num}" 277 | ) 278 | gh_item = GitHubItem(repo, num, token, item_type) 279 | p, content = gh_item.fetch() 280 | all_files[p] = content 281 | if debug: 282 | error_console.print( 283 | f"[green]Successfully fetched GitHub {item_type}[/]" 284 | ) 285 | continue 286 | except Exception as e: 287 | if debug: 288 | error_console.print( 289 | f"[yellow]Failed to process as GitHub item:[/] {str(e)}" 290 | ) 291 | pass 292 | 293 | # Allow GitHub blob URLs (individual files) 294 | try: 295 | repo, ref, file_path = parse_github_blob(path) 296 | if debug: 297 | error_console.print( 298 | f"[blue]Processing GitHub file:[/] {repo}/{file_path}@{ref}" 299 | ) 300 | gh_file = GitHubFile(path, token) 301 | p, content = gh_file.fetch() 302 | all_files[p] = content 303 | if debug: 304 | error_console.print( 305 | "[green]Successfully fetched GitHub file[/]" 306 | ) 307 | continue 308 | except Exception as e: 309 | if debug: 310 | error_console.print( 311 | f"[yellow]Failed to process as GitHub blob:[/] {str(e)}" 312 | ) 313 | pass 314 | 315 | target = Path(path) 316 | if target.is_absolute(): 317 | # Use absolute paths as-is 318 | if target.is_file(): 319 | content = get_file_content( 320 | target, diff_mode, compare_branch=compare_branch 321 | ) 322 | if content is not None: 323 | all_files[target] = content 324 | else: 325 | files = scan_directory( 326 | target, 327 | include=include.split(",") if include else None, 328 | exclude_patterns=exclude, 329 | diff_mode=diff_mode, 330 | max_depth=depth, 331 | compare_branch=compare_branch, 332 | ) 333 | all_files.update(files) 334 | else: 335 | # For relative paths, try source dir first, then current dir 336 | targets = [] 337 | if source_type == SourceType.GITHUB: 338 | # For GitHub sources, only look in the source directory 339 | targets = [source_dir / path] 340 | else: 341 | # For filesystem sources, try both but prefer source dir 342 | if source_dir != Path("."): 343 | targets.append(source_dir / path) 344 | targets.append(Path.cwd() / path) 345 | 346 | for target in targets: 347 | if target.exists(): 348 | if target.is_file(): 349 | content = get_file_content( 350 | target, diff_mode, compare_branch=compare_branch 351 | ) 352 | if content is not None: 353 | all_files[target] = content 354 | break 355 | else: 356 | files = scan_directory( 357 | target, 358 | include=include.split(",") if include else None, 359 | exclude_patterns=exclude, 360 | diff_mode=diff_mode, 361 | max_depth=depth, 362 | compare_branch=compare_branch, 363 | ) 364 | all_files.update(files) 365 | break 366 | if not all_files: 367 | error_console.print("Found [red]0[/] matching files") 368 | return 369 | 370 | # Separate GitHub issues/PRs from regular files for better reporting 371 | github_items = [] 372 | filesystem_files = [] 373 | 374 | for path, content in all_files.items(): 375 | if ( 376 | str(path).endswith((".md", ".issue.md", ".pr.md", ".discussion.md")) 377 | and isinstance(path, Path) 378 | and not path.exists() 379 | ) or ( 380 | # Also detect GitHub files by checking if the filename contains repo info and doesn't exist locally 381 | isinstance(path, Path) 382 | and not path.exists() 383 | and "_" 384 | in str( 385 | path.name 386 | ) # GitHub files have underscores from repo/ref/path formatting 387 | and any(part in str(path.name) for part in ["github", "blob", "_"]) 388 | ): 389 | github_items.append((path, content)) 390 | else: 391 | filesystem_files.append((path, content)) 392 | 393 | # Format files - pass both paths and content 394 | format_result = format_files_xml( 395 | [(path, content) for path, content in all_files.items()] 396 | ) 397 | 398 | # Get the formatted content, conditionally including header 399 | if verbose: 400 | result = str(format_result) 401 | # Print the display header to stderr for visibility 402 | error_console.print( 403 | "\nFile summary:", 404 | style="bold blue", 405 | ) 406 | # Use the display-friendly header 407 | error_console.print(create_display_header(format_result)) 408 | error_console.print() # Add blank line after header 409 | else: 410 | # Skip the header by taking only the formatted files 411 | result = "\n".join(f.formatted_content for f in format_result.files) 412 | 413 | # Custom message based on content types 414 | if github_items and filesystem_files: 415 | error_console.print( 416 | f"Downloaded [green]{len(github_items)}[/] GitHub items and found [green]{len(filesystem_files)}[/] matching files" 417 | ) 418 | elif github_items: 419 | error_console.print( 420 | f"Downloaded [green]{len(github_items)}[/] GitHub {'item' if len(github_items) == 1 else 'items'}" 421 | ) 422 | else: 423 | error_console.print( 424 | f"Found [green]{len(format_result.files)}[/] matching files" 425 | ) 426 | 427 | # Handle outputs 428 | if outfile: 429 | if append and outfile.exists(): 430 | existing_content = outfile.read_text() 431 | result = existing_content + "\n\n" + result 432 | outfile.write_text(result) 433 | error_console.print( 434 | f"Output {'appended' if append else 'written'} to [green]{outfile}[/]" 435 | ) 436 | # Only use clipboard if not writing to file AND not just printing to stdout 437 | elif not print_output or append: 438 | if append: 439 | try: 440 | existing_clipboard = pyperclip.paste() 441 | result = existing_clipboard + "\n\n" + result 442 | except Exception: 443 | error_console.print( 444 | "[yellow]Warning: Could not read clipboard for append[/]" 445 | ) 446 | 447 | try: 448 | pyperclip.copy(result) 449 | # Calculate total lines outside the f-string 450 | total_lines = sum( 451 | f.content.count("\n") + 1 for f in format_result.files 452 | ) 453 | error_console.print( 454 | f"{'Appended' if append else 'Copied'} to clipboard " 455 | f"(~{format_result.total_tokens:,} tokens, {total_lines:,} lines)" 456 | ) 457 | except Exception as e: 458 | error_console.print( 459 | f"[yellow]Warning: Could not copy to clipboard: {str(e)}[/]" 460 | ) 461 | if not print_output: 462 | # If clipboard failed and we're not printing, show the content 463 | error_console.print("[cyan]Content would have been:[/]") 464 | print(result) 465 | 466 | # Print to stdout only if explicitly requested 467 | if print_output: 468 | print(result) 469 | 470 | except Exception as e: 471 | if debug: 472 | raise 473 | error_console.print(f"[red]Error:[/] {str(e)}") 474 | raise typer.Exit(1) 475 | -------------------------------------------------------------------------------- /src/copychat/core.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Optional 3 | import pathspec 4 | import subprocess 5 | from enum import Enum 6 | import os 7 | 8 | from .patterns import DEFAULT_EXTENSIONS, EXCLUDED_DIRS, EXCLUDED_PATTERNS 9 | 10 | 11 | class DiffMode(Enum): 12 | FULL = "full" # All files as-is 13 | FULL_WITH_DIFF = "full-with-diff" # All files with diff markers 14 | CHANGED_WITH_DIFF = "changed-with-diff" # Only changed files with diff markers 15 | DIFF_ONLY = "diff-only" # Only the diff chunks 16 | 17 | 18 | def is_glob_pattern(path: str) -> bool: 19 | """Check if a path contains glob patterns.""" 20 | return "*" in path 21 | 22 | 23 | def resolve_paths(paths: list[str], base_path: Path = Path(".")) -> list[Path]: 24 | """Resolve a mix of glob patterns and regular paths.""" 25 | resolved = [] 26 | base_path = base_path.resolve() 27 | 28 | # Get gitignore and ccignore specs once for all paths 29 | git_spec = get_gitignore_spec(base_path) 30 | cc_spec = get_ccignore_spec(base_path) 31 | 32 | for path in paths: 33 | if is_glob_pattern(path): 34 | matches = list(base_path.glob(path)) 35 | # Filter matches through gitignore and ccignore 36 | for match in matches: 37 | try: 38 | # Check if under base path 39 | rel_path = match.relative_to(base_path) 40 | # Skip if matches gitignore or ccignore patterns 41 | rel_path_str = str(rel_path) 42 | if git_spec.match_file(rel_path_str) or cc_spec.match_file( 43 | rel_path_str 44 | ): 45 | continue 46 | resolved.append(match) 47 | except ValueError: 48 | # If path is not relative to base_path, just use it as-is 49 | resolved.append(match) 50 | else: 51 | # For non-glob paths, use them as-is 52 | path_obj = Path(path) 53 | if path_obj.is_absolute(): 54 | resolved.append(path_obj) 55 | else: 56 | resolved.append(base_path / path) 57 | return resolved 58 | 59 | 60 | def find_gitignore(start_path: Path) -> Optional[Path]: 61 | """Search for .gitignore file in current and parent directories.""" 62 | current = start_path.absolute() 63 | while current != current.parent: 64 | gitignore = current / ".gitignore" 65 | if gitignore.is_file(): 66 | return gitignore 67 | current = current.parent 68 | return None 69 | 70 | 71 | def find_ccignore_files(start_path: Path) -> list[tuple[Path, Path]]: 72 | """ 73 | Find all .ccignore files that apply to the given path. 74 | 75 | Returns a list of tuples (ccignore_file, directory) where: 76 | - ccignore_file is the path to the .ccignore file 77 | - directory is the directory containing the .ccignore file 78 | 79 | The list is ordered from most specific (closest to start_path) to most general. 80 | """ 81 | ccignore_files = [] 82 | current = start_path.absolute() 83 | 84 | # Start from the given path and traverse up to the root 85 | while current != current.parent: 86 | ccignore = current / ".ccignore" 87 | if ccignore.is_file(): 88 | ccignore_files.append((ccignore, current)) 89 | current = current.parent 90 | 91 | return ccignore_files 92 | 93 | 94 | def get_gitignore_spec( 95 | path: Path, extra_patterns: Optional[list[str]] = None 96 | ) -> pathspec.PathSpec: 97 | """Load .gitignore patterns and combine with our default exclusions.""" 98 | patterns = list(EXCLUDED_PATTERNS) 99 | 100 | # Add directory exclusions 101 | dir_patterns = [f"{d}/" for d in EXCLUDED_DIRS] 102 | patterns.extend(dir_patterns) 103 | 104 | # Add any extra patterns provided 105 | if extra_patterns: 106 | patterns.extend(extra_patterns) 107 | 108 | # Add patterns from .gitignore if found 109 | gitignore_path = find_gitignore(path) 110 | if gitignore_path: 111 | with open(gitignore_path) as f: 112 | gitignore_patterns = [ 113 | line.strip() for line in f if line.strip() and not line.startswith("#") 114 | ] 115 | patterns.extend(gitignore_patterns) 116 | 117 | return pathspec.PathSpec.from_lines("gitwildmatch", patterns) 118 | 119 | 120 | def get_ccignore_spec( 121 | path: Path, extra_patterns: Optional[list[str]] = None 122 | ) -> pathspec.PathSpec: 123 | """ 124 | Load .ccignore patterns from all applicable directories. 125 | 126 | This function finds all .ccignore files that apply to the given path, 127 | from the most specific (closest to the path) to the most general (root). 128 | Patterns from more specific .ccignore files take precedence over more general ones. 129 | """ 130 | patterns = [] 131 | 132 | # Add any extra patterns provided 133 | if extra_patterns: 134 | patterns.extend(extra_patterns) 135 | 136 | # Get all applicable .ccignore files 137 | ccignore_files = find_ccignore_files(path) 138 | 139 | # Process files from most general to most specific 140 | # This way, more specific patterns override more general ones 141 | for ccignore_path, dir_path in reversed(ccignore_files): 142 | with open(ccignore_path) as f: 143 | ccignore_patterns = [ 144 | line.strip() for line in f if line.strip() and not line.startswith("#") 145 | ] 146 | patterns.extend(ccignore_patterns) 147 | 148 | return pathspec.PathSpec.from_lines("gitwildmatch", patterns) 149 | 150 | 151 | def get_git_diff(path: Path, compare_branch: Optional[str] = None) -> str: 152 | """Get git diff for the given path, optionally comparing against a specific branch.""" 153 | try: 154 | # First check if file is tracked by git 155 | result = subprocess.run( 156 | ["git", "ls-files", "--error-unmatch", str(path)], 157 | capture_output=True, 158 | text=True, 159 | check=False, # Don't raise error for untracked files 160 | ) 161 | if result.returncode != 0: 162 | return "" # File is not tracked by git 163 | 164 | # Get the diff, either against the index (default) or specified branch 165 | if compare_branch: 166 | # First get the merge base 167 | merge_base = subprocess.run( 168 | ["git", "merge-base", "HEAD", compare_branch], 169 | capture_output=True, 170 | text=True, 171 | check=True, 172 | ).stdout.strip() 173 | 174 | # Then do the diff against the merge base 175 | result = subprocess.run( 176 | ["git", "diff", merge_base, "--", str(path)], 177 | capture_output=True, 178 | text=True, 179 | check=False, 180 | ) 181 | else: 182 | result = subprocess.run( 183 | ["git", "diff", "--", str(path)], # Removed --exit-code 184 | capture_output=True, 185 | text=True, 186 | check=False, 187 | ) 188 | return result.stdout # Return output regardless of return code 189 | 190 | except subprocess.CalledProcessError: 191 | return "" 192 | 193 | 194 | def get_changed_files(compare_branch: Optional[str] = None) -> set[Path]: 195 | """Get set of files that have changes according to git.""" 196 | try: 197 | # First get the git root directory 198 | git_root = subprocess.run( 199 | ["git", "rev-parse", "--show-toplevel"], 200 | capture_output=True, 201 | text=True, 202 | check=True, 203 | ).stdout.strip() 204 | git_root_path = Path(git_root) 205 | 206 | if compare_branch: 207 | # Get all changes between current branch and compare branch 208 | result = subprocess.run( 209 | [ 210 | "git", 211 | "diff", 212 | "--name-status", 213 | f"{compare_branch}...HEAD", # Use triple dot to compare branches 214 | ], 215 | capture_output=True, 216 | text=True, 217 | check=True, 218 | ) 219 | # Also get any unstaged/uncommitted changes 220 | unstaged_result = subprocess.run( 221 | ["git", "status", "--porcelain"], 222 | capture_output=True, 223 | text=True, 224 | check=True, 225 | ) 226 | # Combine both results 227 | combined_output = result.stdout + unstaged_result.stdout 228 | else: 229 | # Get both staged and unstaged changes (current behavior) 230 | combined_output = subprocess.run( 231 | ["git", "status", "--porcelain"], 232 | capture_output=True, 233 | text=True, 234 | check=True, 235 | ).stdout 236 | 237 | changed = set() 238 | for line in combined_output.splitlines(): 239 | if not line.strip(): 240 | continue 241 | 242 | # Split on tab or space to handle both formats 243 | parts = line.split(None, 1) # Split on whitespace, max 1 split 244 | if len(parts) < 2: 245 | continue 246 | 247 | status, filepath = parts 248 | 249 | # Handle renamed files (they have arrow notation) 250 | if " -> " in filepath: 251 | filepath = filepath.split(" -> ")[-1] 252 | 253 | # Convert relative path to absolute using git root 254 | abs_path = (git_root_path / filepath).resolve() 255 | changed.add(abs_path) 256 | 257 | return changed 258 | except subprocess.CalledProcessError: 259 | return set() 260 | 261 | 262 | def get_file_content( 263 | path: Path, 264 | diff_mode: DiffMode, 265 | changed_files: Optional[set[Path]] = None, 266 | compare_branch: Optional[str] = None, 267 | ) -> Optional[str]: 268 | """Get file content based on diff mode.""" 269 | if not path.is_file(): 270 | return None 271 | 272 | # Get content 273 | content = path.read_text() 274 | 275 | # Return full content immediately if that's what we want 276 | if diff_mode == DiffMode.FULL: 277 | return content 278 | 279 | # Check if file has changes and get diff if needed 280 | if changed_files is not None: 281 | has_changes = path in changed_files 282 | # Get diff here so we can use it for all diff modes 283 | diff = get_git_diff(path, compare_branch) if has_changes else "" 284 | else: 285 | # Get diff first, then check if there are changes 286 | diff = get_git_diff(path, compare_branch) 287 | has_changes = bool(diff) 288 | 289 | # Handle different modes 290 | if diff_mode == DiffMode.DIFF_ONLY: 291 | return diff if has_changes else None 292 | elif diff_mode == DiffMode.CHANGED_WITH_DIFF: 293 | if not has_changes: 294 | return None 295 | return f"{content}\n\n# Git Diff:\n{diff}" 296 | elif diff_mode == DiffMode.FULL_WITH_DIFF: 297 | if not has_changes: 298 | return content 299 | return f"{content}\n\n# Git Diff:\n{diff}" 300 | 301 | return None 302 | 303 | 304 | def scan_directory( 305 | path: Path, 306 | include: Optional[list[str]] = None, 307 | exclude_patterns: Optional[list[str]] = None, 308 | diff_mode: DiffMode = DiffMode.FULL, 309 | max_depth: Optional[int] = None, 310 | compare_branch: Optional[str] = None, 311 | ) -> dict[Path, str]: 312 | """Scan directory for files to process.""" 313 | # Get changed files upfront if we're using a diff mode 314 | changed_files = ( 315 | get_changed_files(compare_branch) if diff_mode != DiffMode.FULL else None 316 | ) 317 | 318 | # Convert string paths to Path objects and handle globs 319 | if isinstance(path, str): 320 | if is_glob_pattern(path): 321 | paths = resolve_paths([path]) 322 | else: 323 | paths = [Path(path)] 324 | else: 325 | paths = [path] 326 | 327 | result = {} 328 | 329 | # Pre-compute extension set 330 | include_set = {f".{ext.lstrip('.')}" for ext in (include or DEFAULT_EXTENSIONS)} 331 | 332 | for current_path in paths: 333 | if current_path.is_file(): 334 | # For single files, just check if it matches filters 335 | if include and current_path.suffix.lstrip(".") not in include: 336 | continue 337 | content = get_file_content( 338 | current_path, diff_mode, changed_files, compare_branch 339 | ) 340 | if content is not None: 341 | result[current_path] = content 342 | continue 343 | 344 | # Convert to absolute path once 345 | abs_path = current_path.resolve() 346 | if not abs_path.exists(): 347 | continue 348 | 349 | # Get gitignore spec once for the starting directory 350 | git_spec = get_gitignore_spec(abs_path, exclude_patterns) 351 | 352 | # Use os.walk for better performance than rglob 353 | for root, _, files in os.walk(abs_path): 354 | root_path = Path(root) 355 | 356 | # Check depth if max_depth is specified 357 | if max_depth is not None: 358 | try: 359 | # Calculate current depth relative to the starting path 360 | rel_path = root_path.relative_to(abs_path) 361 | current_depth = len(rel_path.parts) 362 | if current_depth > max_depth: 363 | continue 364 | except ValueError: 365 | continue 366 | 367 | # Get relative path once per directory 368 | try: 369 | rel_root = str(root_path.relative_to(abs_path)) 370 | if rel_root == ".": 371 | rel_root = "" 372 | except ValueError: 373 | continue 374 | 375 | # Get ccignore spec for the current directory (to handle hierarchical patterns) 376 | cc_spec = get_ccignore_spec(root_path, exclude_patterns) 377 | 378 | # Check if directory should be skipped (via gitignore or ccignore) 379 | if rel_root: 380 | dir_path = rel_root + "/" 381 | if git_spec.match_file(dir_path) or cc_spec.match_file(dir_path): 382 | continue 383 | 384 | for filename in files: 385 | # Quick extension check before more expensive operations 386 | ext = Path(filename).suffix.lower() 387 | if ext not in include_set: 388 | continue 389 | 390 | # Build relative path string directly 391 | rel_path_str = ( 392 | os.path.join(rel_root, filename) if rel_root else filename 393 | ) 394 | 395 | # Check both gitignore and ccignore patterns 396 | if git_spec.match_file(rel_path_str) or cc_spec.match_file( 397 | rel_path_str 398 | ): 399 | continue 400 | 401 | # Only create Path object if file passes all filters 402 | file_path = root_path / filename 403 | 404 | # Get content based on diff mode 405 | content = get_file_content( 406 | file_path, diff_mode, changed_files, compare_branch 407 | ) 408 | if content is not None: 409 | result[file_path] = content 410 | 411 | return result 412 | 413 | 414 | def scan_files(patterns: list[str], root: Path) -> set[Path]: 415 | """Scan directory for files matching glob patterns.""" 416 | files = set() 417 | for pattern in patterns: 418 | files.update(root.glob(pattern)) 419 | return files 420 | -------------------------------------------------------------------------------- /src/copychat/format.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Optional 3 | from os.path import commonpath 4 | from datetime import datetime, timezone 5 | import tiktoken 6 | from dataclasses import dataclass 7 | 8 | 9 | @dataclass 10 | class FileStats: 11 | """Statistics for a single file.""" 12 | 13 | chars: int 14 | tokens: int 15 | 16 | 17 | @dataclass 18 | class FormattedFile: 19 | """A formatted file with its stats.""" 20 | 21 | path: Path 22 | content: str 23 | stats: FileStats 24 | formatted_content: str 25 | 26 | 27 | @dataclass 28 | class FormatResult: 29 | """Result of formatting one or more files.""" 30 | 31 | files: list[FormattedFile] 32 | root_path: Path 33 | timestamp: datetime 34 | formatted_content: str 35 | total_chars: int = 0 36 | total_tokens: int = 0 37 | has_header: bool = True 38 | 39 | def __str__(self) -> str: 40 | """Return the formatted content.""" 41 | return self.formatted_content 42 | 43 | 44 | def format_file( 45 | file_path: Path, root_path: Path, content: Optional[str] = None 46 | ) -> FormattedFile: 47 | """Format a single file as XML-style markdown and return structured result.""" 48 | try: 49 | # Use provided content or read from file 50 | if content is None: 51 | content = file_path.read_text() 52 | 53 | # Calculate stats 54 | stats = FileStats(chars=len(content), tokens=estimate_tokens(content)) 55 | 56 | # Use string paths for comparison to handle symlinks and different path formats 57 | file_str = str(file_path.resolve()) 58 | root_str = str(root_path.resolve()) 59 | 60 | # Remove the root path and any leading slashes 61 | if file_str.startswith(root_str): 62 | rel_path = file_str[len(root_str) :].lstrip("/\\") 63 | else: 64 | rel_path = file_str # Fallback to full path if not a subpath 65 | 66 | language = guess_language(file_path) 67 | 68 | # Build the XML tag with attributes 69 | tag_attrs = [f'path="{rel_path}"'] 70 | if language: 71 | tag_attrs.append(f'language="{language}"') 72 | 73 | attrs_str = " ".join(tag_attrs) 74 | 75 | formatted_content = f""" 76 | {content} 77 | """ 78 | 79 | return FormattedFile( 80 | path=file_path, 81 | content=content, 82 | stats=stats, 83 | formatted_content=formatted_content, 84 | ) 85 | 86 | except Exception as e: 87 | # Return empty stats for failed files 88 | return FormattedFile( 89 | path=file_path, 90 | content=f"", 91 | stats=FileStats(chars=0, tokens=0), 92 | formatted_content=f"", 93 | ) 94 | 95 | 96 | def create_header(result: FormatResult) -> str: 97 | """Create a header with metadata about the export.""" 98 | timestamp = result.timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") 99 | 100 | # Create a table-like format for files 101 | rel_paths = [] 102 | for f in result.files: 103 | try: 104 | rel_path = str(f.path.relative_to(result.root_path)) 105 | # Make sure path is not empty or just "." 106 | if not rel_path or rel_path == ".": 107 | # For GitHub items, use a more descriptive name 108 | if ( 109 | isinstance(f.path, Path) 110 | and f.path.name 111 | and ( 112 | "_pr_" in f.path.name 113 | or "_issue_" in f.path.name 114 | or "_discussion_" in f.path.name 115 | ) 116 | ) or ( 117 | # Also check for GitHub blob files (they have repo_ref_filepath pattern) 118 | isinstance(f.path, Path) 119 | and f.path.name 120 | and "_" in f.path.name 121 | and len(f.path.name.split("_")) 122 | >= 3 # repo_ref_filepath has at least 3 parts 123 | ): 124 | # This appears to be a GitHub item, use a more descriptive name 125 | rel_path = f.path.name 126 | else: 127 | rel_path = f.path.name or str(f.path) 128 | except ValueError: 129 | rel_path = str(f.path) # Fallback to full path if not a subpath 130 | rel_paths.append(rel_path) 131 | 132 | # Use the minimum of the longest path or 50 chars 133 | max_path_len = ( 134 | max(len(path) for path in rel_paths) if rel_paths else 4 135 | ) # Min "Path" header width 136 | max_path_len = max(max_path_len, 4) # Ensure min width for "Path" header 137 | max_path_len = min(max_path_len, 50) # Cap path length for readability 138 | 139 | # Calculate line counts 140 | file_lines = {f.path: f.content.count("\n") + 1 for f in result.files} 141 | total_lines = sum(file_lines.values()) 142 | 143 | header = [ 144 | "", 171 | "", 172 | ] 173 | ) 174 | 175 | return "\n".join(header) 176 | 177 | 178 | def create_display_header(result: FormatResult) -> str: 179 | """Create a display-friendly header without XML comments.""" 180 | timestamp = result.timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") 181 | 182 | # Create a table-like format for files 183 | rel_paths = [] 184 | for f in result.files: 185 | try: 186 | rel_path = str(f.path.relative_to(result.root_path)) 187 | # Make sure path is not empty or just "." 188 | if not rel_path or rel_path == ".": 189 | # For GitHub items, use a more descriptive name 190 | if ( 191 | isinstance(f.path, Path) 192 | and f.path.name 193 | and ( 194 | "_pr_" in f.path.name 195 | or "_issue_" in f.path.name 196 | or "_discussion_" in f.path.name 197 | ) 198 | ) or ( 199 | # Also check for GitHub blob files (they have repo_ref_filepath pattern) 200 | isinstance(f.path, Path) 201 | and f.path.name 202 | and "_" in f.path.name 203 | and len(f.path.name.split("_")) 204 | >= 3 # repo_ref_filepath has at least 3 parts 205 | ): 206 | # This appears to be a GitHub item, use a more descriptive name 207 | rel_path = f.path.name 208 | else: 209 | rel_path = f.path.name or str(f.path) 210 | except ValueError: 211 | rel_path = str(f.path) # Fallback to full path if not a subpath 212 | rel_paths.append(rel_path) 213 | 214 | # Use the minimum of the longest path or 50 chars 215 | max_path_len = ( 216 | max(len(path) for path in rel_paths) if rel_paths else 4 217 | ) # Min "Path" header width 218 | max_path_len = max(max_path_len, 4) # Ensure min width for "Path" header 219 | max_path_len = min(max_path_len, 50) # Cap path length for readability 220 | 221 | # Calculate line counts 222 | file_lines = {f.path: f.content.count("\n") + 1 for f in result.files} 223 | total_lines = sum(file_lines.values()) 224 | 225 | header = [ 226 | f"Generated by copychat on {timestamp}", 227 | f"Root path: {result.root_path}", 228 | f"Summary: {len(result.files)} files, ~{result.total_tokens:,} tokens, {total_lines:,} lines", 229 | "", 230 | "┌" + "─" * (max_path_len + 2) + "┬" + "─" * 12 + "┬" + "─" * 10 + "┐", 231 | f"│ {'Path':<{max_path_len}} │ {'Tokens':>10} │ {'Lines':>8} │", 232 | "├" + "─" * (max_path_len + 2) + "┼" + "─" * 12 + "┼" + "─" * 10 + "┤", 233 | ] 234 | 235 | # Format each file as a table row 236 | for i, f in enumerate(sorted(result.files, key=lambda x: str(x.path))): 237 | rel_path = rel_paths[i] 238 | if len(rel_path) > max_path_len: 239 | trunc_len = max_path_len - 3 240 | rel_path = "..." + rel_path[-trunc_len:] 241 | 242 | lines = file_lines[f.path] 243 | header.append( 244 | f"│ {rel_path:<{max_path_len}} │ {f.stats.tokens:>10,} │ {lines:>8,} │" 245 | ) 246 | 247 | header.append( 248 | "└" + "─" * (max_path_len + 2) + "┴" + "─" * 12 + "┴" + "─" * 10 + "┘" 249 | ) 250 | 251 | return "\n".join(header) 252 | 253 | 254 | def format_files(files: list[tuple[Path, str]]) -> FormatResult: 255 | """Format files into markdown with XML-style tags. 256 | 257 | Args: 258 | files: List of (path, content) tuples to format 259 | 260 | Returns: 261 | FormatResult containing all formatting information 262 | """ 263 | if not files: 264 | return FormatResult( 265 | files=[], 266 | root_path=Path("."), 267 | timestamp=datetime.now(timezone.utc), 268 | formatted_content="\n", 269 | has_header=False, 270 | ) 271 | 272 | # Find common root path using os.path.commonpath 273 | paths = [f[0] for f in files] 274 | str_paths = [str(f.absolute()) for f in paths] 275 | root_path = Path(commonpath(str_paths)) 276 | 277 | # Format each file 278 | formatted_files = [] 279 | total_chars = 0 280 | total_tokens = 0 281 | 282 | for file_path, content in files: 283 | formatted = format_file(file_path, root_path, content) 284 | formatted_files.append(formatted) 285 | total_chars += formatted.stats.chars 286 | total_tokens += formatted.stats.tokens 287 | 288 | result = FormatResult( 289 | files=formatted_files, 290 | root_path=root_path, 291 | timestamp=datetime.now(timezone.utc), 292 | total_chars=total_chars, 293 | total_tokens=total_tokens, 294 | formatted_content="", # Will be set after header 295 | ) 296 | 297 | # Create header and combine all parts 298 | header = create_header(result) 299 | formatted_content = "\n".join( 300 | [header] + [f.formatted_content for f in formatted_files] 301 | ) 302 | 303 | # Update the formatted content 304 | result.formatted_content = formatted_content 305 | 306 | return result 307 | 308 | 309 | # Keep existing helper functions unchanged 310 | def estimate_tokens(text: str) -> int: 311 | """Estimate the number of tokens in the text using GPT tokenizer.""" 312 | try: 313 | # Using cl100k_base (used by GPT-4, Claude) 314 | encoding = tiktoken.get_encoding("cl100k_base") 315 | return len(encoding.encode(text)) 316 | except Exception: 317 | # Fallback to rough estimate if tiktoken fails 318 | return len(text) // 4 # Rough estimate: ~4 chars per token 319 | 320 | 321 | def guess_language(file_path: Path) -> Optional[str]: 322 | """Guess the programming language based on file extension.""" 323 | ext = file_path.suffix.lower() 324 | 325 | # Common language mappings 326 | language_map = { 327 | ".py": "python", 328 | ".js": "javascript", 329 | ".ts": "typescript", 330 | ".jsx": "jsx", 331 | ".tsx": "tsx", 332 | ".html": "html", 333 | ".css": "css", 334 | ".scss": "scss", 335 | ".rs": "rust", 336 | ".go": "go", 337 | ".java": "java", 338 | ".cpp": "cpp", 339 | ".c": "c", 340 | ".h": "c", 341 | ".hpp": "cpp", 342 | ".rb": "ruby", 343 | ".php": "php", 344 | ".sh": "bash", 345 | ".yaml": "yaml", 346 | ".yml": "yaml", 347 | ".json": "json", 348 | ".md": "markdown", 349 | ".sql": "sql", 350 | ".r": "r", 351 | ".swift": "swift", 352 | ".kt": "kotlin", 353 | ".kts": "kotlin", 354 | ".scala": "scala", 355 | ".pl": "perl", 356 | ".pm": "perl", 357 | } 358 | 359 | return language_map.get(ext) 360 | -------------------------------------------------------------------------------- /src/copychat/patterns.py: -------------------------------------------------------------------------------- 1 | """Default patterns and extensions for file filtering.""" 2 | 3 | # Default extensions we care about (without dots) 4 | DEFAULT_EXTENSIONS = { 5 | # Web 6 | "html", 7 | "css", 8 | "scss", 9 | "js", 10 | "jsx", 11 | "ts", 12 | "tsx", 13 | "json", 14 | # Python 15 | "py", 16 | "pyi", 17 | "pyw", 18 | # Ruby 19 | "rb", 20 | "erb", 21 | # JVM 22 | "java", 23 | "kt", 24 | "scala", 25 | "gradle", 26 | # Systems 27 | "c", 28 | "h", 29 | "cpp", 30 | "hpp", 31 | "rs", 32 | "go", 33 | # Shell 34 | "sh", 35 | "bash", 36 | "zsh", 37 | "fish", 38 | # Config 39 | "yaml", 40 | "yml", 41 | "toml", 42 | "ini", 43 | "conf", 44 | # Docs 45 | "md", 46 | "mdx", 47 | "rst", 48 | "txt", 49 | # Other 50 | "sql", 51 | "graphql", 52 | "xml", 53 | "dockerfile", 54 | "gitignore", 55 | } 56 | 57 | # Directories that should always be excluded 58 | EXCLUDED_DIRS = { 59 | # Version Control 60 | ".git", 61 | ".svn", 62 | ".hg", 63 | # Dependencies 64 | "node_modules", 65 | "venv", 66 | ".venv", 67 | "env", 68 | "__pycache__", 69 | ".pytest_cache", 70 | ".ruff_cache", 71 | "target", 72 | "build", 73 | "dist", 74 | # IDE 75 | ".idea", 76 | ".vscode", 77 | # Other 78 | ".next", 79 | ".nuxt", 80 | ".output", 81 | "coverage", 82 | } 83 | 84 | # Files or patterns that should always be excluded 85 | EXCLUDED_PATTERNS = { 86 | # Build artifacts 87 | "*.pyc", 88 | "*.pyo", 89 | "*.pyd", 90 | "*.so", 91 | "*.dll", 92 | "*.dylib", 93 | "*.class", 94 | "*.jar", 95 | "*.war", 96 | "*.min.js", 97 | "*.min.css", 98 | # Logs and databases 99 | "*.log", 100 | "*.sqlite", 101 | "*.db", 102 | # OS files 103 | ".DS_Store", 104 | "Thumbs.db", 105 | "desktop.ini", 106 | # Package files 107 | "package-lock.json", 108 | "yarn.lock", 109 | "poetry.lock", 110 | # Environment and secrets 111 | ".env", 112 | ".env.*", 113 | "*.env", 114 | # Other 115 | "*.bak", 116 | "*.swp", 117 | "*.swo", 118 | "*~", 119 | } 120 | -------------------------------------------------------------------------------- /src/copychat/sources.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | from typing import Optional 4 | import git 5 | from rich.console import Console 6 | import tempfile 7 | 8 | error_console = Console(stderr=True) 9 | 10 | # Shared temporary directory for GitHub items 11 | _github_temp_dir = None 12 | 13 | 14 | def get_github_temp_dir() -> Path: 15 | """Get a temporary directory for GitHub items that persists for the process.""" 16 | global _github_temp_dir 17 | if _github_temp_dir is None: 18 | _github_temp_dir = Path(tempfile.mkdtemp(prefix="copychat_github_")) 19 | return _github_temp_dir 20 | 21 | 22 | class GitHubSource: 23 | """Handle GitHub repositories as sources.""" 24 | 25 | def __init__(self, repo_path: str, cache_dir: Optional[Path] = None): 26 | """Initialize GitHub source.""" 27 | self.repo_path = repo_path.strip("/") 28 | self.cache_dir = cache_dir or Path.home() / ".cache" / "copychat" / "github" 29 | self.cache_dir.mkdir(parents=True, exist_ok=True) 30 | 31 | @property 32 | def clone_url(self) -> str: 33 | """Get HTTPS clone URL for repository.""" 34 | return f"https://github.com/{self.repo_path}.git" 35 | 36 | @property 37 | def repo_dir(self) -> Path: 38 | """Get path to cached repository.""" 39 | return self.cache_dir / self.repo_path.replace("/", "_") 40 | 41 | def fetch(self) -> Path: 42 | """Fetch repository and return path to files.""" 43 | try: 44 | if self.repo_dir.exists(): 45 | # Update existing repo 46 | repo = git.Repo(self.repo_dir) 47 | repo.remotes.origin.fetch() 48 | repo.remotes.origin.pull() 49 | else: 50 | # Clone new repo 51 | git.Repo.clone_from(self.clone_url, self.repo_dir, depth=1) 52 | 53 | return self.repo_dir 54 | 55 | except git.GitCommandError as e: 56 | error_console.print(f"[red]Error accessing repository:[/] {str(e)}") 57 | raise 58 | 59 | def cleanup(self) -> None: 60 | """Remove cached repository.""" 61 | if self.repo_dir.exists(): 62 | shutil.rmtree(self.repo_dir) 63 | 64 | 65 | class GitHubItem: 66 | """Fetch a GitHub issue, pull request, or discussion with comments.""" 67 | 68 | def __init__( 69 | self, 70 | repo_path: str, 71 | number: int, 72 | token: Optional[str] = None, 73 | item_type: str = "issue", 74 | ): 75 | self.repo_path = repo_path.strip("/") 76 | self.number = number 77 | self.token = token 78 | self.item_type = item_type # 'issue', 'pull', or 'discussion' 79 | self.api_base = "https://api.github.com" 80 | 81 | def _headers(self) -> dict[str, str]: 82 | headers = {"Accept": "application/vnd.github+json"} 83 | if self.token: 84 | headers["Authorization"] = f"Bearer {self.token}" 85 | return headers 86 | 87 | def _graphql_headers(self) -> dict[str, str]: 88 | headers = {"Content-Type": "application/json"} 89 | if self.token: 90 | headers["Authorization"] = f"Bearer {self.token}" 91 | return headers 92 | 93 | def _fetch_discussion(self) -> tuple[dict, list]: 94 | """Fetch discussion data using GraphQL API.""" 95 | import requests 96 | 97 | if not self.token: 98 | error_console.print( 99 | "[yellow]Warning: GitHub token recommended for discussions. Some rate limits may apply.[/]" 100 | ) 101 | 102 | # GraphQL query to fetch discussion 103 | query = """ 104 | query($owner: String!, $name: String!, $number: Int!) { 105 | repository(owner: $owner, name: $name) { 106 | discussion(number: $number) { 107 | title 108 | body 109 | url 110 | createdAt 111 | updatedAt 112 | author { 113 | login 114 | } 115 | category { 116 | name 117 | } 118 | comments(first: 100) { 119 | nodes { 120 | body 121 | createdAt 122 | author { 123 | login 124 | } 125 | replies(first: 50) { 126 | nodes { 127 | body 128 | createdAt 129 | author { 130 | login 131 | } 132 | } 133 | } 134 | } 135 | } 136 | } 137 | } 138 | } 139 | """ 140 | 141 | owner, repo = self.repo_path.split("/") 142 | variables = {"owner": owner, "name": repo, "number": self.number} 143 | 144 | try: 145 | resp = requests.post( 146 | "https://api.github.com/graphql", 147 | headers=self._graphql_headers(), 148 | json={"query": query, "variables": variables}, 149 | timeout=30, 150 | ) 151 | resp.raise_for_status() 152 | data = resp.json() 153 | 154 | if "errors" in data: 155 | error_console.print(f"[red]GraphQL errors:[/] {data['errors']}") 156 | raise Exception(f"GraphQL errors: {data['errors']}") 157 | 158 | discussion = data["data"]["repository"]["discussion"] 159 | if not discussion: 160 | raise Exception(f"Discussion #{self.number} not found") 161 | 162 | # Flatten comments and replies 163 | comments = [] 164 | for comment in discussion["comments"]["nodes"]: 165 | comments.append(comment) 166 | # Add replies as nested comments 167 | for reply in comment["replies"]["nodes"]: 168 | comments.append(reply) 169 | 170 | return discussion, comments 171 | 172 | except Exception as e: 173 | error_console.print( 174 | f"[yellow]Warning: Failed to fetch discussion: {str(e)}[/]" 175 | ) 176 | raise 177 | 178 | def _fetch_pr_diff(self) -> Optional[str]: 179 | """Fetch the PR diff from GitHub.""" 180 | import requests 181 | 182 | if not self.token: 183 | error_console.print( 184 | "[yellow]Warning: GitHub token not provided. Some rate limits may apply.[/]" 185 | ) 186 | 187 | # Get the diff using the GitHub API 188 | diff_url = f"{self.api_base}/repos/{self.repo_path}/pulls/{self.number}" 189 | headers = self._headers() 190 | headers["Accept"] = "application/vnd.github.diff" 191 | try: 192 | diff_resp = requests.get(diff_url, headers=headers, timeout=30) 193 | diff_resp.raise_for_status() 194 | return diff_resp.text 195 | except Exception as e: 196 | error_console.print( 197 | f"[yellow]Warning: Failed to fetch PR diff: {str(e)}[/]" 198 | ) 199 | return None 200 | 201 | def fetch(self) -> tuple[Path, str]: 202 | """Return (path, content) for the issue, PR, or discussion.""" 203 | if self.item_type == "discussion": 204 | return self._fetch_discussion_content() 205 | else: 206 | return self._fetch_issue_or_pr_content() 207 | 208 | def _fetch_discussion_content(self) -> tuple[Path, str]: 209 | """Fetch and format discussion content.""" 210 | discussion, comments = self._fetch_discussion() 211 | 212 | lines = [f"# {discussion.get('title', '')} (#{self.number})", ""] 213 | 214 | # Add metadata section 215 | html_url = discussion.get( 216 | "url", f"https://github.com/{self.repo_path}/discussions/{self.number}" 217 | ) 218 | user = discussion.get("author", {}).get("login", "unknown") 219 | created_at = discussion.get("createdAt", "") 220 | updated_at = discussion.get("updatedAt", "") 221 | category = discussion.get("category", {}).get("name", "") 222 | 223 | lines.extend( 224 | [ 225 | f"> **Discussion**: [{self.repo_path}#{self.number}]({html_url})", 226 | f"> **Category**: {category}", 227 | f"> **Author**: {user}", 228 | f"> **Created**: {created_at}", 229 | f"> **Updated**: {updated_at}", 230 | "", 231 | ] 232 | ) 233 | 234 | body = discussion.get("body") or "" 235 | if body: 236 | lines.append(body) 237 | lines.append("") 238 | 239 | # Add comments 240 | for comment in comments: 241 | user = comment.get("author", {}).get("login", "unknown") 242 | created = comment.get("createdAt", "") 243 | lines.append(f"## {user} - {created}") 244 | if comment.get("body"): 245 | lines.append(comment["body"]) 246 | lines.append("") 247 | 248 | content = "\n".join(lines).strip() + "\n" 249 | 250 | # Use temporary directory 251 | filename = f"{self.repo_path.replace('/', '_')}_discussion_{self.number}.md" 252 | temp_dir = get_github_temp_dir() 253 | path = temp_dir / filename 254 | 255 | return path, content 256 | 257 | def _fetch_issue_or_pr_content(self) -> tuple[Path, str]: 258 | """Fetch and format issue or PR content.""" 259 | import requests 260 | 261 | issue_url = f"{self.api_base}/repos/{self.repo_path}/issues/{self.number}" 262 | resp = requests.get(issue_url, headers=self._headers(), timeout=30) 263 | resp.raise_for_status() 264 | data = resp.json() 265 | 266 | comments_resp = requests.get( 267 | data.get("comments_url"), headers=self._headers(), timeout=30 268 | ) 269 | comments_resp.raise_for_status() 270 | comments = comments_resp.json() 271 | 272 | review_comments = [] 273 | is_pr = "pull_request" in data 274 | diff_content = None 275 | 276 | if is_pr: 277 | # Fetch review comments 278 | review_url = ( 279 | f"{self.api_base}/repos/{self.repo_path}/pulls/{self.number}/comments" 280 | ) 281 | rc = requests.get(review_url, headers=self._headers(), timeout=30) 282 | if rc.ok: 283 | review_comments = rc.json() 284 | 285 | # Get the PR diff 286 | diff_content = self._fetch_pr_diff() 287 | 288 | lines = [f"# {data.get('title', '')} (#{self.number})", ""] 289 | body = data.get("body") or "" 290 | 291 | # Add metadata section 292 | item_type = "Pull Request" if is_pr else "Issue" 293 | html_url = data.get( 294 | "html_url", f"https://github.com/{self.repo_path}/issues/{self.number}" 295 | ) 296 | user = data.get("user", {}).get("login", "unknown") 297 | created_at = data.get("created_at", "") 298 | updated_at = data.get("updated_at", "") 299 | state = data.get("state", "").upper() 300 | 301 | # Create a metadata header 302 | lines.extend( 303 | [ 304 | f"> **{item_type}**: [{self.repo_path}#{self.number}]({html_url})", 305 | f"> **Status**: {state}", 306 | f"> **Author**: {user}", 307 | f"> **Created**: {created_at}", 308 | f"> **Updated**: {updated_at}", 309 | "", 310 | ] 311 | ) 312 | 313 | if body: 314 | lines.append(body) 315 | lines.append("") 316 | 317 | # Add PR diff if available 318 | if is_pr and diff_content: 319 | lines.extend( 320 | [ 321 | "## PR Diff", 322 | "", 323 | "```diff", 324 | diff_content, 325 | "```", 326 | "", 327 | ] 328 | ) 329 | 330 | for c in comments: 331 | user = c.get("user", {}).get("login", "unknown") 332 | created = c.get("created_at", "") 333 | lines.append(f"## {user} - {created}") 334 | if c.get("body"): 335 | lines.append(c["body"]) 336 | lines.append("") 337 | 338 | for c in review_comments: 339 | user = c.get("user", {}).get("login", "unknown") 340 | created = c.get("created_at", "") 341 | path = c.get("path", "") 342 | lines.append(f"## Review by {user} on {path} - {created}") 343 | if c.get("body"): 344 | lines.append(c["body"]) 345 | lines.append("") 346 | 347 | content = "\n".join(lines).strip() + "\n" 348 | item_type_filename = "pr" if is_pr else "issue" 349 | 350 | # Use temporary directory 351 | filename = ( 352 | f"{self.repo_path.replace('/', '_')}_{item_type_filename}_{self.number}.md" 353 | ) 354 | temp_dir = get_github_temp_dir() 355 | path = temp_dir / filename 356 | 357 | return path, content 358 | 359 | 360 | class GitHubFile: 361 | """Fetch a single file from GitHub via blob URL.""" 362 | 363 | def __init__(self, blob_url: str, token: Optional[str] = None): 364 | self.blob_url = blob_url 365 | self.token = token 366 | 367 | # Parse the blob URL to extract repo, ref, and file path 368 | import re 369 | 370 | match = re.search(r"github\.com/([^/]+/[^/]+)/blob/([^/]+)/(.*)", blob_url) 371 | if not match: 372 | raise ValueError(f"Invalid GitHub blob URL: {blob_url}") 373 | 374 | self.repo_path = match.group(1) 375 | self.ref = match.group(2) 376 | self.file_path = match.group(3) 377 | 378 | def _headers(self) -> dict[str, str]: 379 | headers = {"Accept": "application/vnd.github+json"} 380 | if self.token: 381 | headers["Authorization"] = f"Bearer {self.token}" 382 | return headers 383 | 384 | def fetch(self) -> tuple[Path, str]: 385 | """Fetch the file content and return (path, content).""" 386 | import requests 387 | 388 | # Use the raw.githubusercontent.com URL for direct file access 389 | raw_url = f"https://raw.githubusercontent.com/{self.repo_path}/{self.ref}/{self.file_path}" 390 | 391 | try: 392 | resp = requests.get(raw_url, timeout=30) 393 | resp.raise_for_status() 394 | content = resp.text 395 | except Exception as e: 396 | error_console.print( 397 | f"[yellow]Warning: Failed to fetch from raw URL, trying API:[/] {str(e)}" 398 | ) 399 | 400 | # Fallback to GitHub API 401 | api_url = f"https://api.github.com/repos/{self.repo_path}/contents/{self.file_path}" 402 | params = {"ref": self.ref} 403 | 404 | try: 405 | resp = requests.get( 406 | api_url, headers=self._headers(), params=params, timeout=30 407 | ) 408 | resp.raise_for_status() 409 | data = resp.json() 410 | 411 | if data.get("type") != "file": 412 | raise Exception( 413 | f"URL points to a {data.get('type', 'unknown')}, not a file" 414 | ) 415 | 416 | # Decode base64 content 417 | import base64 418 | 419 | content = base64.b64decode(data["content"]).decode("utf-8") 420 | except Exception as api_error: 421 | error_console.print(f"[red]Failed to fetch file:[/] {str(api_error)}") 422 | raise 423 | 424 | # Create a meaningful filename in temp directory 425 | filename = f"{self.repo_path.replace('/', '_')}_{self.ref}_{self.file_path.replace('/', '_')}" 426 | temp_dir = get_github_temp_dir() 427 | path = temp_dir / filename 428 | 429 | return path, content 430 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pytest 3 | import shutil 4 | 5 | 6 | @pytest.fixture 7 | def sample_project(tmp_path) -> Path: 8 | """Create a copy of the fixture project in a temporary directory.""" 9 | fixtures_dir = Path(__file__).parent / "fixtures" 10 | project_dir = tmp_path / "test_project" 11 | 12 | # Copy all fixtures to temporary directory 13 | shutil.copytree(fixtures_dir, project_dir, dirs_exist_ok=True) 14 | 15 | return project_dir 16 | 17 | 18 | @pytest.fixture 19 | def sample_project_files(sample_project) -> list[Path]: 20 | """Get a list of all files in the sample project.""" 21 | return list(sample_project.rglob("*")) 22 | 23 | 24 | def test_fixture_structure(sample_project): 25 | """Verify the fixture structure is correct.""" 26 | assert (sample_project / "src" / "main.py").exists() 27 | assert (sample_project / "src" / "app.js").exists() 28 | assert (sample_project / "src" / "styles" / "main.css").exists() 29 | assert (sample_project / "docs" / "README.md").exists() 30 | assert (sample_project / "config" / "settings.yml").exists() 31 | assert (sample_project / "db" / "schema.sql").exists() 32 | assert (sample_project / ".gitignore").exists() 33 | assert (sample_project / ".env").exists() 34 | -------------------------------------------------------------------------------- /tests/data/test1.txt: -------------------------------------------------------------------------------- 1 | This is a test file -------------------------------------------------------------------------------- /tests/data/test2.md: -------------------------------------------------------------------------------- 1 | This is another test file -------------------------------------------------------------------------------- /tests/fixtures/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__/ 3 | .env 4 | node_modules/ 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /tests/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | """Test fixtures package.""" 2 | -------------------------------------------------------------------------------- /tests/fixtures/config/settings.yml: -------------------------------------------------------------------------------- 1 | app: 2 | name: TestApp 3 | version: 1.0.0 4 | 5 | database: 6 | host: localhost 7 | port: 5432 8 | -------------------------------------------------------------------------------- /tests/fixtures/db/schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE users ( 2 | id SERIAL PRIMARY KEY, 3 | username VARCHAR(50) NOT NULL, 4 | email VARCHAR(255) NOT NULL 5 | ); 6 | 7 | CREATE INDEX idx_username ON users(username); -------------------------------------------------------------------------------- /tests/fixtures/docs/README.md: -------------------------------------------------------------------------------- 1 | # Test Project 2 | 3 | This is a test project with various file types. 4 | 5 | ## Structure 6 | - src/ 7 | - main.py 8 | - app.js 9 | - styles/ 10 | - utils/ 11 | - docs/ 12 | - tests/ 13 | -------------------------------------------------------------------------------- /tests/fixtures/src/app.js: -------------------------------------------------------------------------------- 1 | function App() { 2 | return ( 3 |
4 |

Hello World

5 |

This is a test component

6 |
7 | ); 8 | } 9 | 10 | export default App; 11 | -------------------------------------------------------------------------------- /tests/fixtures/src/main.py: -------------------------------------------------------------------------------- 1 | def main(): 2 | """Example main function.""" 3 | print("Hello from main!") 4 | return True 5 | 6 | 7 | if __name__ == "__main__": 8 | main() 9 | -------------------------------------------------------------------------------- /tests/fixtures/src/styles/main.css: -------------------------------------------------------------------------------- 1 | .app { 2 | margin: 0; 3 | padding: 20px; 4 | font-family: sans-serif; 5 | } 6 | 7 | .header { 8 | color: #333; 9 | font-size: 24px; 10 | } 11 | -------------------------------------------------------------------------------- /tests/fixtures/src/types.ts: -------------------------------------------------------------------------------- 1 | interface User { 2 | id: number; 3 | name: string; 4 | email: string; 5 | } 6 | 7 | type UserRole = "admin" | "user" | "guest"; 8 | 9 | export { User, UserRole }; 10 | -------------------------------------------------------------------------------- /tests/fixtures/src/utils/helpers.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | 4 | def format_string(value: Any) -> str: 5 | """Format any value as a string.""" 6 | return str(value).strip() 7 | 8 | 9 | def calculate_total(numbers: list[float]) -> float: 10 | """Calculate sum of numbers.""" 11 | return sum(numbers) 12 | -------------------------------------------------------------------------------- /tests/test_ccignore.py: -------------------------------------------------------------------------------- 1 | """Tests for .ccignore functionality.""" 2 | 3 | import pytest 4 | from copychat.core import ( 5 | find_ccignore_files, 6 | get_ccignore_spec, 7 | scan_directory, 8 | ) 9 | 10 | 11 | @pytest.fixture 12 | def ccignore_test_dir(tmp_path): 13 | """Create a test directory structure with .ccignore files.""" 14 | # Root directory with .ccignore 15 | root_ccignore = tmp_path / ".ccignore" 16 | root_ccignore.write_text("*.log\n") 17 | 18 | # Create subdirectory with its own .ccignore 19 | subdir = tmp_path / "subdir" 20 | subdir.mkdir() 21 | subdir_ccignore = subdir / ".ccignore" 22 | subdir_ccignore.write_text("*.json\n") 23 | 24 | # Create nested subdirectory with its own .ccignore 25 | nested_subdir = subdir / "nested" 26 | nested_subdir.mkdir() 27 | nested_ccignore = nested_subdir / ".ccignore" 28 | nested_ccignore.write_text("*.md\n") 29 | 30 | # Create test files 31 | (tmp_path / "root.txt").write_text("root text file") 32 | (tmp_path / "root.log").write_text("root log file") 33 | (tmp_path / "root.json").write_text("root json file") 34 | (tmp_path / "root.md").write_text("root md file") 35 | 36 | (subdir / "subdir.txt").write_text("subdir text file") 37 | (subdir / "subdir.log").write_text("subdir log file") 38 | (subdir / "subdir.json").write_text("subdir json file") 39 | (subdir / "subdir.md").write_text("subdir md file") 40 | 41 | (nested_subdir / "nested.txt").write_text("nested text file") 42 | (nested_subdir / "nested.log").write_text("nested log file") 43 | (nested_subdir / "nested.json").write_text("nested json file") 44 | (nested_subdir / "nested.md").write_text("nested md file") 45 | 46 | return tmp_path 47 | 48 | 49 | def test_find_ccignore_files(ccignore_test_dir): 50 | """Test finding all .ccignore files that apply to a path.""" 51 | nested_dir = ccignore_test_dir / "subdir" / "nested" 52 | 53 | # Should find 3 .ccignore files, from most specific to most general 54 | result = find_ccignore_files(nested_dir) 55 | assert len(result) == 3 56 | 57 | # Check the order - should be from most specific to most general 58 | assert result[0][0] == nested_dir / ".ccignore" 59 | assert result[1][0] == ccignore_test_dir / "subdir" / ".ccignore" 60 | assert result[2][0] == ccignore_test_dir / ".ccignore" 61 | 62 | # Test with path that has no .ccignore 63 | empty_dir = ccignore_test_dir / "empty_dir" 64 | empty_dir.mkdir() 65 | result = find_ccignore_files(empty_dir) 66 | assert len(result) == 1 67 | assert result[0][0] == ccignore_test_dir / ".ccignore" 68 | 69 | 70 | def test_get_ccignore_spec(ccignore_test_dir): 71 | """Test generating PathSpec from .ccignore files.""" 72 | # Root directory should only exclude .log files 73 | root_spec = get_ccignore_spec(ccignore_test_dir) 74 | assert root_spec.match_file("test.log") 75 | assert not root_spec.match_file("test.json") 76 | assert not root_spec.match_file("test.md") 77 | 78 | # Subdirectory should exclude .log and .json files 79 | subdir_spec = get_ccignore_spec(ccignore_test_dir / "subdir") 80 | assert subdir_spec.match_file("test.log") 81 | assert subdir_spec.match_file("test.json") 82 | assert not subdir_spec.match_file("test.md") 83 | 84 | # Nested subdirectory should exclude .log, .json, and .md files 85 | nested_spec = get_ccignore_spec(ccignore_test_dir / "subdir" / "nested") 86 | assert nested_spec.match_file("test.log") 87 | assert nested_spec.match_file("test.json") 88 | assert nested_spec.match_file("test.md") 89 | 90 | 91 | def test_scan_directory_with_ccignore(ccignore_test_dir): 92 | """Test that scan_directory respects .ccignore patterns.""" 93 | # Scan the root directory - should exclude .log files 94 | files = scan_directory(ccignore_test_dir, include=["txt", "json", "md", "log"]) 95 | paths = {str(f) for f in files} 96 | 97 | # Root dir - .log should be excluded, others included 98 | assert not any(p.endswith("root.log") for p in paths) 99 | assert any(p.endswith("root.txt") for p in paths) 100 | assert any(p.endswith("root.json") for p in paths) 101 | assert any(p.endswith("root.md") for p in paths) 102 | 103 | # Subdir - .log and .json should be excluded, others included 104 | assert not any(p.endswith("subdir.log") for p in paths) 105 | assert not any(p.endswith("subdir.json") for p in paths) 106 | assert any(p.endswith("subdir.txt") for p in paths) 107 | assert any(p.endswith("subdir.md") for p in paths) 108 | 109 | # Nested subdir - .log, .json, and .md should be excluded, others included 110 | assert not any(p.endswith("nested.log") for p in paths) 111 | assert not any(p.endswith("nested.json") for p in paths) 112 | assert not any(p.endswith("nested.md") for p in paths) 113 | assert any(p.endswith("nested.txt") for p in paths) 114 | 115 | 116 | def test_ccignore_with_extra_patterns(ccignore_test_dir): 117 | """Test that extra exclude patterns work with .ccignore.""" 118 | # Add extra exclude pattern for .txt files 119 | spec = get_ccignore_spec(ccignore_test_dir, extra_patterns=["*.txt"]) 120 | 121 | # Should exclude both .log files (from .ccignore) and .txt files (from extra patterns) 122 | assert spec.match_file("test.log") 123 | assert spec.match_file("test.txt") 124 | assert not spec.match_file("test.json") 125 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | from copychat.cli import app 3 | import pyperclip 4 | import re 5 | from pathlib import Path 6 | 7 | runner = CliRunner() 8 | 9 | 10 | def strip_ansi(text: str) -> str: 11 | """Remove ANSI escape codes from text.""" 12 | ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") 13 | return ansi_escape.sub("", text) 14 | 15 | 16 | def test_cli_default_behavior(tmp_path, monkeypatch): 17 | """Test that default behavior copies to clipboard.""" 18 | # Create a test file 19 | test_file = tmp_path / "test.py" 20 | test_file.write_text("print('hello')") 21 | 22 | # Mock pyperclip.copy 23 | copied_content = [] 24 | 25 | def mock_copy(text): 26 | copied_content.append(text) 27 | 28 | monkeypatch.setattr(pyperclip, "copy", mock_copy) 29 | 30 | # Run CLI 31 | result = runner.invoke(app, [str(tmp_path)]) 32 | 33 | assert result.exit_code == 0 34 | assert len(copied_content) == 1 35 | assert 'language="python"' in copied_content[0] 36 | assert "print('hello')" in copied_content[0] 37 | 38 | 39 | def test_cli_output_file(tmp_path, monkeypatch): 40 | """Test writing output to file.""" 41 | # Create a test file 42 | test_file = tmp_path / "test.py" 43 | test_file.write_text("print('hello')") 44 | 45 | # Create output file path 46 | out_file = tmp_path / "output.md" 47 | 48 | # Mock pyperclip.copy 49 | monkeypatch.setattr(pyperclip, "copy", lambda x: None) 50 | 51 | # Run CLI 52 | result = runner.invoke(app, [str(tmp_path), "--out", str(out_file)]) 53 | 54 | assert result.exit_code == 0 55 | assert out_file.exists() 56 | content = out_file.read_text() 57 | assert 'language="python"' in content 58 | assert "print('hello')" in content 59 | 60 | 61 | def test_cli_print_output(tmp_path, monkeypatch): 62 | """Test printing output to screen.""" 63 | # Create a test file 64 | test_file = tmp_path / "test.py" 65 | test_file.write_text("print('hello')") 66 | 67 | # Mock pyperclip.copy 68 | monkeypatch.setattr(pyperclip, "copy", lambda x: None) 69 | 70 | # Run CLI 71 | result = runner.invoke(app, [str(tmp_path), "--print"]) 72 | 73 | assert result.exit_code == 0 74 | assert 'language="python"' in result.stdout 75 | assert "print('hello')" in result.stdout 76 | 77 | 78 | def test_cli_no_files_found(tmp_path): 79 | """Test behavior when no matching files are found.""" 80 | # Create a non-matching file 81 | test_file = tmp_path / "test.txt" 82 | test_file.write_text("hello") 83 | 84 | # Run CLI with filter for .py files only 85 | result = runner.invoke(app, [str(tmp_path), "--include", "py"]) 86 | 87 | # Since this is expected behavior, CLI should exit with code 0 rather than 1 88 | assert result.exit_code == 0 89 | assert "Found 0 matching files" in strip_ansi(result.stderr) 90 | 91 | 92 | def test_cli_multiple_outputs(tmp_path, monkeypatch): 93 | """Test combining output options.""" 94 | # Create a test file 95 | test_file = tmp_path / "test.py" 96 | test_file.write_text("print('hello')") 97 | 98 | # Create output file path 99 | out_file = tmp_path / "output.md" 100 | 101 | # Mock pyperclip.copy and paste 102 | copied_content = [] 103 | 104 | def mock_copy(text): 105 | copied_content.append(text) 106 | 107 | # Since we're using output file, clipboard copy won't happen 108 | # Instead just check the file output and stdout 109 | monkeypatch.setattr(pyperclip, "copy", mock_copy) 110 | 111 | # Run CLI with both file output and print 112 | result = runner.invoke(app, [str(tmp_path), "--out", str(out_file), "--print"]) 113 | 114 | assert result.exit_code == 0 115 | 116 | # Check file 117 | assert out_file.exists() 118 | file_content = out_file.read_text() 119 | assert 'language="python"' in file_content 120 | 121 | # Check stdout 122 | assert 'language="python"' in result.stdout 123 | 124 | 125 | def test_cli_append_file(tmp_path, monkeypatch): 126 | """Test appending output to an existing file.""" 127 | # Create a test file to scan 128 | test_file = tmp_path / "test.py" 129 | test_file.write_text("print('hello')") 130 | 131 | # Create existing output file with content 132 | out_file = tmp_path / "output.md" 133 | out_file.write_text("existing content\n") 134 | 135 | # Mock pyperclip.copy 136 | monkeypatch.setattr(pyperclip, "copy", lambda x: None) 137 | 138 | # Run CLI with append flag 139 | result = runner.invoke(app, [str(tmp_path), "--out", str(out_file), "--append"]) 140 | 141 | assert result.exit_code == 0 142 | content = out_file.read_text() 143 | assert "existing content" in content 144 | assert 'language="python"' in content 145 | assert "print('hello')" in content 146 | 147 | 148 | def test_cli_append_clipboard(tmp_path, monkeypatch): 149 | """Test appending output to clipboard content.""" 150 | # Create a test file 151 | test_file = tmp_path / "test.py" 152 | test_file.write_text("print('new content')") 153 | 154 | # Mock clipboard content and operations 155 | clipboard_content = ["existing clipboard content"] 156 | 157 | def mock_copy(text): 158 | clipboard_content[0] = text 159 | 160 | def mock_paste(): 161 | return clipboard_content[0] 162 | 163 | monkeypatch.setattr(pyperclip, "copy", mock_copy) 164 | monkeypatch.setattr(pyperclip, "paste", mock_paste) 165 | 166 | # Run CLI with append flag 167 | result = runner.invoke(app, [str(tmp_path), "--append"]) 168 | 169 | assert result.exit_code == 0 170 | assert "existing clipboard content" in clipboard_content[0] 171 | assert 'language="python"' in clipboard_content[0] 172 | assert "print('new content')" in clipboard_content[0] 173 | 174 | 175 | def test_cli_exclude_pattern(tmp_path, monkeypatch): 176 | """Test excluding files with patterns.""" 177 | # Create test files 178 | py_file = tmp_path / "code.py" 179 | py_file.write_text("print('include me')") 180 | 181 | js_file = tmp_path / "script.js" 182 | js_file.write_text("console.log('exclude me')") 183 | 184 | # Mock pyperclip.copy 185 | copied_content = [] 186 | 187 | def mock_copy(text): 188 | copied_content.append(text) 189 | 190 | monkeypatch.setattr(pyperclip, "copy", mock_copy) 191 | 192 | # Run CLI with exclude pattern for JS files 193 | result = runner.invoke(app, [str(tmp_path), "--exclude", "*.js"]) 194 | 195 | assert result.exit_code == 0 196 | assert len(copied_content) == 1 197 | assert "print('include me')" in copied_content[0] 198 | assert "console.log('exclude me')" not in copied_content[0] 199 | 200 | 201 | def test_cli_directory_depth(tmp_path, monkeypatch): 202 | """Test limiting directory scan depth.""" 203 | # Create nested directory structure 204 | level1 = tmp_path / "level1" 205 | level1.mkdir() 206 | level1_file = level1 / "level1.py" 207 | level1_file.write_text("print('level1')") 208 | 209 | level2 = level1 / "level2" 210 | level2.mkdir() 211 | level2_file = level2 / "level2.py" 212 | level2_file.write_text("print('level2')") 213 | 214 | # Mock pyperclip.copy 215 | copied_content = [] 216 | 217 | def mock_copy(text): 218 | copied_content.append(text) 219 | 220 | monkeypatch.setattr(pyperclip, "copy", mock_copy) 221 | 222 | # Run CLI with depth=1 (should only include level1 directory) 223 | result = runner.invoke(app, [str(tmp_path), "--depth", "1"]) 224 | 225 | assert result.exit_code == 0 226 | assert len(copied_content) == 1 227 | assert "print('level1')" in copied_content[0] 228 | assert "print('level2')" not in copied_content[0] 229 | 230 | 231 | def test_cli_verbose_output(tmp_path, monkeypatch): 232 | """Test verbose output includes file metadata.""" 233 | # Create a test file 234 | test_file = tmp_path / "test.py" 235 | test_file.write_text("print('hello')") 236 | 237 | # Mock pyperclip.copy 238 | copied_content = [] 239 | 240 | def mock_copy(text): 241 | copied_content.append(text) 242 | 243 | monkeypatch.setattr(pyperclip, "copy", mock_copy) 244 | 245 | # Run CLI with verbose flag 246 | result = runner.invoke(app, [str(tmp_path), "--verbose"]) 247 | 248 | assert result.exit_code == 0 249 | assert len(copied_content) == 1 250 | 251 | # Verbose output should include file metadata header with summary 252 | # header_content = copied_content[0].split("```")[0] 253 | assert "File summary" in strip_ansi(result.stderr) 254 | assert ( 255 | "Files: 1" in strip_ansi(result.stderr) 256 | or "1 file" in strip_ansi(result.stderr).lower() 257 | ) 258 | 259 | 260 | def test_cli_github_item_basic(monkeypatch): 261 | """Basic test for GitHub item handling that doesn't rely on internal implementation.""" 262 | runner = CliRunner() 263 | 264 | # Instead of mocking complex internals, just provide a simple mock for the scan_directory function 265 | # so it returns a known result when the CLI processes a GitHub item 266 | def mock_scan_empty(directory, **kwargs): 267 | """Return empty dict to ensure our mock item is the only one processed.""" 268 | return {} 269 | 270 | # Mock clipboard operations 271 | copied = [] 272 | monkeypatch.setattr(pyperclip, "copy", lambda x: copied.append(x)) 273 | 274 | # Replace scan_directory with our mock to avoid file system dependencies 275 | monkeypatch.setattr("copychat.cli.scan_directory", mock_scan_empty) 276 | 277 | # Run the CLI with a mocked item 278 | # The exact format doesn't matter as we're not testing the GitHub API integration 279 | result = runner.invoke(app, ["owner/repo#123"], catch_exceptions=False) 280 | 281 | # We expect either: 282 | # 1. Success (exit_code=0) if the mock returns results, or 283 | # 2. "Found 0 matching files" message (exit_code=0) if mocking couldn't succeed 284 | # Either way, we've tested that the CLI can handle the GitHub item format 285 | assert result.exit_code == 0 or "No module named 'requests'" in result.stderr 286 | 287 | # If we failed to fetch anything due to missing requests library 288 | # at least make sure we attempted to parse the GitHub item format 289 | if result.exit_code != 0: 290 | assert "owner/repo#123" in result.stderr or "GitHub" in result.stderr 291 | 292 | 293 | def test_table_alignment_with_dot_path(tmp_path, monkeypatch): 294 | """Test table alignment when path resolves to '.'""" 295 | # Create a test file 296 | test_file = tmp_path / "test.md" 297 | test_file.write_text("# Test content") 298 | 299 | # Mock relative_to so it returns "." path 300 | original_relative_to = Path.relative_to 301 | 302 | def mock_relative_to(self, other): 303 | # Always return a path that is just "." 304 | if str(self) == str(test_file): 305 | return Path(".") 306 | return original_relative_to(self, other) 307 | 308 | monkeypatch.setattr(Path, "relative_to", mock_relative_to) 309 | 310 | # Mock pyperclip.copy 311 | copied_content = [] 312 | 313 | def mock_copy(text): 314 | copied_content.append(text) 315 | 316 | monkeypatch.setattr(pyperclip, "copy", mock_copy) 317 | 318 | # Run CLI with verbose flag 319 | result = runner.invoke(app, [str(test_file), "--verbose"]) 320 | 321 | assert result.exit_code == 0 322 | 323 | # Ensure table is properly aligned in the output 324 | table_output = strip_ansi(result.stderr) 325 | 326 | # The "Path" header and first column content should be aligned 327 | path_header_idx = table_output.find("│ Path") 328 | assert path_header_idx > 0, "Path header not found in table" 329 | 330 | # Extract the table rows by looking for lines with │ characters 331 | table_lines = [line for line in table_output.split("\n") if "│" in line] 332 | 333 | # Verify there are at least a header row and a data row 334 | assert len(table_lines) >= 2, "Table should have header and data rows" 335 | 336 | # Check that columns align vertically - the first │ should be at the same position in each row 337 | positions = [line.find("│") for line in table_lines] 338 | assert len(set(positions)) == 1, "Misaligned table columns (first pipe)" 339 | 340 | # Check that second │ (after Path column) aligns in all rows 341 | positions = [line.find("│", positions[0] + 1) for line in table_lines] 342 | assert len(set(positions)) == 1, "Misaligned table columns (second pipe)" 343 | 344 | # Confirm test.md appears in the table with proper alignment 345 | assert "test.md" in table_output, "Filename should appear in table output" 346 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from copychat.core import ( 3 | find_gitignore, 4 | DiffMode, 5 | is_glob_pattern, 6 | resolve_paths, 7 | scan_directory, 8 | scan_files, 9 | ) 10 | from pathlib import Path 11 | 12 | 13 | def test_diff_mode_enum(): 14 | """Test DiffMode enum values.""" 15 | assert DiffMode.FULL.value == "full" 16 | assert DiffMode.FULL_WITH_DIFF.value == "full-with-diff" 17 | assert DiffMode.CHANGED_WITH_DIFF.value == "changed-with-diff" 18 | assert DiffMode.DIFF_ONLY.value == "diff-only" 19 | 20 | 21 | def test_is_glob_pattern(): 22 | """Test glob pattern detection.""" 23 | assert is_glob_pattern("*.py") 24 | assert is_glob_pattern("src/**/*.js") 25 | assert is_glob_pattern("test/*") 26 | assert not is_glob_pattern("src/main.py") 27 | assert not is_glob_pattern("path/to/file") 28 | 29 | 30 | def test_resolve_paths(tmp_path): 31 | """Test path resolution with glob patterns.""" 32 | # Create test files 33 | (tmp_path / "test1.py").touch() 34 | (tmp_path / "test2.py").touch() 35 | (tmp_path / "src").mkdir() 36 | (tmp_path / "src" / "main.py").touch() 37 | (tmp_path / "src" / "util.js").touch() 38 | 39 | # Test glob resolution 40 | paths = resolve_paths(["*.py", "src/**/*.py"], base_path=tmp_path) 41 | assert len(paths) == 3 42 | assert tmp_path / "test1.py" in paths 43 | assert tmp_path / "test2.py" in paths 44 | assert tmp_path / "src" / "main.py" in paths 45 | 46 | # Test mixed glob and regular paths 47 | paths = resolve_paths(["src", "*.py"], base_path=tmp_path) 48 | assert len(paths) == 3 49 | assert tmp_path / "src" in paths 50 | 51 | 52 | @pytest.fixture 53 | def git_repo(tmp_path): 54 | """Create a temporary git repository with a .gitignore file.""" 55 | gitignore = tmp_path / ".gitignore" 56 | gitignore.write_text("*.pyc\n__pycache__/\n") 57 | return tmp_path 58 | 59 | 60 | def test_scan_with_glob_patterns(): 61 | # Create test directory and files if they don't exist 62 | test_dir = Path("tests/data") 63 | test_dir.mkdir(parents=True, exist_ok=True) 64 | 65 | with open(test_dir / "test1.txt", "w") as f: 66 | f.write("This is a test file") 67 | with open(test_dir / "test2.md", "w") as f: 68 | f.write("This is another test file") 69 | 70 | files = scan_files(["*.txt", "*.md"], test_dir) 71 | assert len(files) == 2 72 | 73 | 74 | def test_find_gitignore_exists(git_repo): 75 | """Test finding .gitignore in current directory.""" 76 | result = find_gitignore(git_repo) 77 | assert result == git_repo / ".gitignore" 78 | 79 | 80 | def test_find_gitignore_parent(git_repo): 81 | """Test finding .gitignore in parent directory.""" 82 | child_dir = git_repo / "subdir" 83 | child_dir.mkdir() 84 | result = find_gitignore(child_dir) 85 | assert result == git_repo / ".gitignore" 86 | 87 | 88 | def test_find_gitignore_not_found(tmp_path): 89 | """Test behavior when no .gitignore is found.""" 90 | result = find_gitignore(tmp_path) 91 | assert result is None 92 | 93 | 94 | def test_scan_with_recursive_glob(tmp_path): 95 | """Test scanning with recursive glob patterns.""" 96 | # Create nested test files 97 | (tmp_path / "test1.py").write_text("print('test1')") 98 | deep_dir = tmp_path / "very" / "deep" / "nested" 99 | deep_dir.mkdir(parents=True) 100 | (deep_dir / "test2.py").write_text("print('test2')") 101 | (deep_dir / "test.js").write_text("console.log('test')") 102 | 103 | # Test recursive glob pattern 104 | files = scan_directory( 105 | tmp_path, include=["py"] 106 | ) # Changed from tmp_path / "**/*.py" 107 | assert len(files) == 2 108 | assert any("test1.py" in str(p) for p in files) 109 | assert any("test2.py" in str(p) for p in files) 110 | 111 | # Test from within subdirectory 112 | subdir_files = scan_directory( 113 | tmp_path / "very", include=["py"] 114 | ) # Changed from tmp_path / "very" / "**/*.py" 115 | assert len(subdir_files) == 1 116 | assert any("test2.py" in str(p) for p in subdir_files) 117 | 118 | 119 | def test_scan_single_file(tmp_path): 120 | """Test scanning a single file.""" 121 | # Create a test file 122 | test_file = tmp_path / "test.py" 123 | test_file.write_text("print('hello world')") 124 | 125 | # Create some other files that shouldn't be included 126 | (tmp_path / "other.py").write_text("print('other')") 127 | (tmp_path / "test.js").write_text("console.log('test')") 128 | 129 | # Test scanning just the single file 130 | files = scan_directory(test_file, include=["py"]) 131 | 132 | # Should only contain our specific file 133 | assert len(files) == 1 134 | assert test_file in files 135 | assert files[test_file] == "print('hello world')" 136 | 137 | # Test with non-matching extension filter 138 | files = scan_directory(test_file, include=["js"]) 139 | assert len(files) == 0 140 | 141 | # Test with non-existent file 142 | files = scan_directory(tmp_path / "nonexistent.py", include=["py"]) 143 | assert len(files) == 0 144 | -------------------------------------------------------------------------------- /tests/test_format.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pytest 3 | from copychat.format import ( 4 | guess_language, 5 | format_file, 6 | create_header, 7 | estimate_tokens, 8 | format_files, 9 | ) 10 | 11 | 12 | @pytest.fixture 13 | def temp_files(tmp_path): 14 | """Create temporary test files.""" 15 | # Create a python file 16 | py_file = tmp_path / "test.py" 17 | py_file.write_text("def hello():\n print('world')") 18 | 19 | # Create a javascript file 20 | js_file = tmp_path / "test.js" 21 | js_file.write_text("function hello() {\n console.log('world');\n}") 22 | 23 | return tmp_path, [py_file, js_file] 24 | 25 | 26 | def test_guess_language(): 27 | """Test language detection from file extensions.""" 28 | assert guess_language(Path("test.py")) == "python" 29 | assert guess_language(Path("test.js")) == "javascript" 30 | assert guess_language(Path("test.tsx")) == "tsx" 31 | assert guess_language(Path("test.unknown")) is None 32 | 33 | 34 | def test_format_file(temp_files): 35 | """Test single file formatting.""" 36 | root_path, (py_file, _) = temp_files 37 | 38 | formatted_file = format_file(py_file, root_path) 39 | result = formatted_file.formatted_content 40 | 41 | assert " 0 70 | assert isinstance(tokens, int) 71 | 72 | 73 | def test_format_files(temp_files): 74 | """Test formatting multiple files.""" 75 | root_path, files = temp_files 76 | 77 | # Pass a list of tuples (Path, str) to format_files 78 | file_contents = [(f, f.read_text()) for f in files] 79 | format_result = format_files(file_contents) 80 | result = str(format_result) 81 | 82 | # Check header 83 | assert "Generated by copychat" in result 84 | 85 | # Check both files are included 86 | assert 'path="test.py"' in result 87 | assert 'path="test.js"' in result 88 | 89 | # Check content 90 | assert "def hello():" in result 91 | assert "console.log('world');" in result 92 | 93 | 94 | def test_format_files_empty(): 95 | """Test formatting with no files.""" 96 | format_result = format_files([]) 97 | result = str(format_result) 98 | assert "No files found" in result 99 | 100 | 101 | def test_format_file_error(tmp_path): 102 | """Test handling of file read errors.""" 103 | non_existent = tmp_path / "does_not_exist.py" 104 | formatted_file = format_file(non_existent, tmp_path) 105 | result = formatted_file.formatted_content 106 | assert "Error processing" in result 107 | -------------------------------------------------------------------------------- /tests/test_github_item.py: -------------------------------------------------------------------------------- 1 | from copychat.sources import GitHubItem 2 | 3 | 4 | class DummyResponse: 5 | def __init__(self, data, status=200, is_text=False): 6 | self._data = data 7 | self.status_code = status 8 | self.ok = status == 200 9 | self._is_text = is_text 10 | 11 | def raise_for_status(self): 12 | if not self.ok: 13 | raise Exception("status") 14 | 15 | def json(self): 16 | return self._data 17 | 18 | @property 19 | def text(self): 20 | return self._data if self._is_text else "" 21 | 22 | 23 | def test_github_item_fetch(monkeypatch): 24 | """GitHubItem should format issue and comments.""" 25 | 26 | issue_data = { 27 | "title": "Test issue", 28 | "body": "Body text", 29 | "comments_url": "http://example.com/comments", 30 | "pull_request": {}, 31 | "html_url": "https://github.com/owner/repo/pull/1", 32 | "user": {"login": "testuser"}, 33 | "created_at": "2024-01-01", 34 | "updated_at": "2024-01-02", 35 | "state": "open", 36 | } 37 | comments = [{"user": {"login": "alice"}, "created_at": "2024-01-01", "body": "hi"}] 38 | reviews = [ 39 | { 40 | "user": {"login": "bob"}, 41 | "created_at": "2024-01-02", 42 | "path": "file.py", 43 | "body": "looks good", 44 | } 45 | ] 46 | 47 | calls = [] 48 | 49 | def fake_get(url, headers=None, timeout=0): 50 | calls.append(url) 51 | if "comments" in url and "pulls" in url: 52 | return DummyResponse(reviews) 53 | if "comments" in url: 54 | return DummyResponse(comments) 55 | return DummyResponse(issue_data) 56 | 57 | monkeypatch.setattr("requests.get", fake_get) 58 | 59 | item = GitHubItem("owner/repo", 1) 60 | path, content = item.fetch() 61 | 62 | assert path.name == "owner_repo_pr_1.md" 63 | assert "Test issue" in content 64 | assert "alice" in content 65 | assert "looks good" in content 66 | assert "**Pull Request**" in content 67 | assert "**Status**: OPEN" in content 68 | assert "**Author**: testuser" in content 69 | assert "https://github.com/owner/repo/pull/1" in content 70 | assert any("pulls" in c for c in calls) 71 | 72 | 73 | def test_github_item_fetch_with_diff(monkeypatch): 74 | """GitHubItem should include PR diff when available.""" 75 | 76 | issue_data = { 77 | "title": "Test PR", 78 | "body": "PR description", 79 | "comments_url": "http://example.com/comments", 80 | "pull_request": {}, 81 | "html_url": "https://github.com/owner/repo/pull/2", 82 | "user": {"login": "testuser"}, 83 | "created_at": "2024-01-01", 84 | "updated_at": "2024-01-02", 85 | "state": "open", 86 | } 87 | comments = [] 88 | reviews = [] 89 | diff_content = """diff --git a/file.txt b/file.txt 90 | index abc123..def456 100644 91 | --- a/file.txt 92 | +++ b/file.txt 93 | @@ -1,3 +1,3 @@ 94 | Line 1 95 | -Line 2 96 | +Line 2 modified 97 | Line 3""" 98 | 99 | calls = [] 100 | headers_received = {} 101 | 102 | def fake_get(url, headers=None, timeout=0): 103 | calls.append(url) 104 | if headers: 105 | headers_received[url] = headers 106 | 107 | if "diff" in headers.get("Accept", "") and "pulls" in url: 108 | return DummyResponse(diff_content, is_text=True) 109 | if "comments" in url and "pulls" in url: 110 | return DummyResponse(reviews) 111 | if "comments" in url: 112 | return DummyResponse(comments) 113 | return DummyResponse(issue_data) 114 | 115 | monkeypatch.setattr("requests.get", fake_get) 116 | 117 | item = GitHubItem("owner/repo", 2) 118 | path, content = item.fetch() 119 | 120 | assert path.name == "owner_repo_pr_2.md" 121 | assert "Test PR" in content 122 | assert "PR description" in content 123 | assert "**Pull Request**" in content 124 | assert "## PR Diff" in content 125 | assert "```diff" in content 126 | assert "+Line 2 modified" in content 127 | assert "application/vnd.github.diff" in headers_received.get( 128 | "https://api.github.com/repos/owner/repo/pulls/2", {} 129 | ).get("Accept", "") 130 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from copychat.core import scan_directory, DiffMode 3 | from copychat.format import format_files 4 | 5 | 6 | def test_basic_scan(sample_project): 7 | """Test basic file scanning functionality.""" 8 | files = scan_directory( 9 | sample_project, 10 | include=["py", "js", "css"], 11 | ) 12 | 13 | # Check we found the expected file types 14 | extensions = {f.suffix.lstrip(".") for f in files} 15 | assert extensions == {"py", "js", "css"} 16 | 17 | # Check we found files in nested directories 18 | assert any("utils" in str(f) for f in files) 19 | assert any("styles" in str(f) for f in files) 20 | 21 | 22 | def test_gitignore_handling(sample_project): 23 | """Test that .gitignore patterns are respected.""" 24 | files = scan_directory(sample_project, include=["py", "env"]) 25 | 26 | # These should be excluded by .gitignore 27 | paths = {str(f) for f in files} 28 | assert not any(f.endswith(".pyc") for f in paths) 29 | assert not any("__pycache__" in f for f in paths) 30 | assert not any(f.endswith(".env") for f in paths) 31 | 32 | 33 | def test_formatting_output(sample_project): 34 | """Test that output is formatted correctly.""" 35 | # Get files and format them 36 | files = scan_directory(sample_project, include=["py", "js"]) 37 | format_result = format_files([(f, f.read_text()) for f in files]) 38 | result = str(format_result) 39 | 40 | # Check for file content without line numbers 41 | assert "def main():" in result # Remove the "1|" prefix 42 | assert 'print("Hello from main!")' in result 43 | assert "function App()" in result 44 | assert "def calculate_total" in result 45 | 46 | 47 | def test_different_file_types(sample_project): 48 | """Test handling of different file types.""" 49 | files = scan_directory( 50 | sample_project, 51 | include=["yml", "sql", "ts", "md"], 52 | ) 53 | # Convert files to (path, content) tuples 54 | files_with_content = [(f, f.read_text()) for f in files] 55 | format_result = format_files(files_with_content) 56 | result = str(format_result) 57 | 58 | # Check various file types are properly formatted 59 | assert 'language="yaml"' in result 60 | assert 'language="sql"' in result 61 | assert 'language="typescript"' in result 62 | assert 'language="markdown"' in result 63 | 64 | # Check content snippets from each type 65 | assert "CREATE TABLE users" in result 66 | assert "interface User" in result 67 | assert "TestApp" in result 68 | assert "# Test Project" in result 69 | 70 | 71 | def test_exclusion_patterns(sample_project): 72 | """Test explicit exclusion patterns.""" 73 | files = scan_directory( 74 | sample_project, 75 | include=["py", "js"], 76 | exclude_patterns=["**/utils/*"], # Exclude utils directory 77 | ) 78 | 79 | paths = {str(f) for f in files} 80 | assert not any("utils" in p for p in paths) 81 | assert any("main.py" in p for p in paths) 82 | 83 | 84 | def test_empty_directory(tmp_path): 85 | """Test handling of empty directories.""" 86 | files = scan_directory(tmp_path) 87 | format_result = format_files([(f, f.read_text()) for f in list(files)]) 88 | result = str(format_result) 89 | assert "No files found" in result 90 | 91 | 92 | def test_header_metadata(sample_project): 93 | """Test header metadata in formatted output.""" 94 | files = scan_directory(sample_project, include=["py"]) 95 | # Convert files to (path, content) tuples 96 | files_with_content = [(f, f.read_text()) for f in files] 97 | format_result = format_files(files_with_content) 98 | result = str(format_result) 99 | 100 | # Check header contains important metadata 101 | assert "Generated by copychat on" in result 102 | assert "Root path:" in result 103 | assert "Summary:" in result # Changed from Files: 104 | 105 | # Check file path info in header table format 106 | assert "Path" in result 107 | assert "Tokens" in result 108 | assert "Lines" in result 109 | # Paths will be in table rows instead of list format 110 | 111 | 112 | @pytest.mark.parametrize( 113 | "diff_mode", 114 | [ 115 | DiffMode.FULL, 116 | DiffMode.FULL_WITH_DIFF, 117 | # Removing these modes for now as they require git setup 118 | # DiffMode.CHANGED_WITH_DIFF, 119 | # DiffMode.DIFF_ONLY, 120 | ], 121 | ) 122 | def test_diff_modes(sample_project, diff_mode): 123 | """Test different diff modes.""" 124 | files = scan_directory( 125 | sample_project, 126 | include=["py"], 127 | diff_mode=diff_mode, 128 | ) 129 | assert len(files) > 0 130 | 131 | 132 | def test_token_estimation(sample_project): 133 | """Test token estimation functionality.""" 134 | files = scan_directory(sample_project, include=["py", "js"]) 135 | # Convert files to (path, content) tuples 136 | files_with_content = [(f, f.read_text()) for f in files] 137 | format_result = format_files(files_with_content) 138 | result = str(format_result) 139 | 140 | # Result should include token info in header 141 | assert "tokens" in result.lower() 142 | 143 | # Basic sanity check - content should be non-empty 144 | assert len(result) > 0 145 | 146 | 147 | def test_error_handling(sample_project, tmp_path): 148 | """Test error handling for problematic files.""" 149 | try: 150 | # Create an unreadable file in the temporary directory 151 | bad_file = tmp_path / "bad.py" 152 | bad_file.write_text("def bad():\n pass\n") 153 | bad_file.chmod(0o000) # Remove read permissions 154 | 155 | # Include both the sample project and the tmp directory 156 | files = scan_directory(sample_project, include=["py"]) 157 | files = list(files) 158 | files.append(bad_file) 159 | 160 | # Convert files to (path, content) tuples, handling potential read errors 161 | files_with_content = [] 162 | for f in files: 163 | try: 164 | content = f.read_text() 165 | files_with_content.append((f, content)) 166 | except (PermissionError, OSError): 167 | # Still include the file, but with empty content 168 | files_with_content.append((f, "")) 169 | 170 | format_result = format_files(files_with_content) 171 | result = str(format_result) 172 | 173 | # Check that the bad file is mentioned in the result 174 | assert "bad.py" in result 175 | 176 | # Should still process good files 177 | assert "main.py" in result 178 | assert "def main():" in result 179 | finally: 180 | # Cleanup 181 | try: 182 | bad_file.chmod(0o666) 183 | bad_file.unlink() 184 | except Exception: 185 | pass 186 | -------------------------------------------------------------------------------- /tests/test_patterns.py: -------------------------------------------------------------------------------- 1 | from copychat.patterns import ( 2 | DEFAULT_EXTENSIONS, 3 | EXCLUDED_DIRS, 4 | EXCLUDED_PATTERNS, 5 | ) 6 | 7 | 8 | def test_default_extensions(): 9 | """Test default extensions are properly defined.""" 10 | assert isinstance(DEFAULT_EXTENSIONS, set) 11 | assert "py" in DEFAULT_EXTENSIONS 12 | assert "js" in DEFAULT_EXTENSIONS 13 | assert "md" in DEFAULT_EXTENSIONS 14 | 15 | 16 | def test_excluded_dirs(): 17 | """Test excluded directories are properly defined.""" 18 | assert isinstance(EXCLUDED_DIRS, set) 19 | assert ".git" in EXCLUDED_DIRS 20 | assert "node_modules" in EXCLUDED_DIRS 21 | assert "__pycache__" in EXCLUDED_DIRS 22 | 23 | 24 | def test_excluded_patterns(): 25 | """Test excluded patterns are properly defined.""" 26 | assert isinstance(EXCLUDED_PATTERNS, set) 27 | assert "*.pyc" in EXCLUDED_PATTERNS 28 | assert "*.log" in EXCLUDED_PATTERNS 29 | assert ".env" in EXCLUDED_PATTERNS 30 | -------------------------------------------------------------------------------- /tests/test_sources.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import shutil 3 | from copychat.sources import GitHubSource 4 | 5 | 6 | @pytest.fixture 7 | def temp_cache_dir(tmp_path): 8 | """Create temporary cache directory.""" 9 | cache_dir = tmp_path / "cache" 10 | cache_dir.mkdir() 11 | yield cache_dir 12 | # Cleanup 13 | if cache_dir.exists(): 14 | shutil.rmtree(cache_dir) 15 | 16 | 17 | def test_github_source_init(temp_cache_dir): 18 | """Test GitHubSource initialization.""" 19 | source = GitHubSource("owner/repo", cache_dir=temp_cache_dir) 20 | assert source.repo_path == "owner/repo" 21 | assert source.clone_url == "https://github.com/owner/repo.git" 22 | assert source.repo_dir == temp_cache_dir / "owner_repo" 23 | 24 | 25 | def test_github_source_fetch(temp_cache_dir): 26 | """Test fetching a real public repository.""" 27 | source = GitHubSource("prefecthq/prefect", cache_dir=temp_cache_dir) 28 | repo_dir = source.fetch() 29 | 30 | assert repo_dir.exists() 31 | assert (repo_dir / ".git").exists() 32 | assert (repo_dir / "README.md").exists() 33 | 34 | # Test update of existing repo 35 | repo_dir = source.fetch() # Should use cached version 36 | assert repo_dir.exists() 37 | 38 | 39 | def test_github_source_cleanup(temp_cache_dir): 40 | """Test repository cleanup.""" 41 | source = GitHubSource("prefecthq/prefect", cache_dir=temp_cache_dir) 42 | source.fetch() 43 | assert source.repo_dir.exists() 44 | 45 | source.cleanup() 46 | assert not source.repo_dir.exists() 47 | -------------------------------------------------------------------------------- /tests/tests/data/test1.txt: -------------------------------------------------------------------------------- 1 | This is a test file -------------------------------------------------------------------------------- /tests/tests/data/test2.md: -------------------------------------------------------------------------------- 1 | This is another test file --------------------------------------------------------------------------------