├── .python-version
├── tests
├── data
│ ├── test1.txt
│ └── test2.md
├── tests
│ └── data
│ │ ├── test1.txt
│ │ └── test2.md
├── fixtures
│ ├── __init__.py
│ ├── .gitignore
│ ├── config
│ │ └── settings.yml
│ ├── src
│ │ ├── main.py
│ │ ├── styles
│ │ │ └── main.css
│ │ ├── types.ts
│ │ ├── app.js
│ │ └── utils
│ │ │ └── helpers.py
│ ├── db
│ │ └── schema.sql
│ └── docs
│ │ └── README.md
├── test_patterns.py
├── conftest.py
├── test_sources.py
├── test_format.py
├── test_github_item.py
├── test_core.py
├── test_ccignore.py
├── test_integration.py
└── test_cli.py
├── src
└── copychat
│ ├── __init__.py
│ ├── patterns.py
│ ├── mcp_server.py
│ ├── cli_utilities.py
│ ├── format.py
│ ├── sources.py
│ ├── core.py
│ └── cli.py
├── .pre-commit-config.yaml
├── .ccignore
├── .github
└── workflows
│ ├── publish.yml
│ ├── run-static.yml
│ └── run-tests.yml
├── pyproject.toml
├── .gitignore
├── AGENTS.md
└── README.md
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 |
--------------------------------------------------------------------------------
/tests/data/test1.txt:
--------------------------------------------------------------------------------
1 | This is a test file
--------------------------------------------------------------------------------
/tests/data/test2.md:
--------------------------------------------------------------------------------
1 | This is another test file
--------------------------------------------------------------------------------
/tests/tests/data/test1.txt:
--------------------------------------------------------------------------------
1 | This is a test file
--------------------------------------------------------------------------------
/tests/tests/data/test2.md:
--------------------------------------------------------------------------------
1 | This is another test file
--------------------------------------------------------------------------------
/tests/fixtures/__init__.py:
--------------------------------------------------------------------------------
1 | """Test fixtures package."""
2 |
--------------------------------------------------------------------------------
/tests/fixtures/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__/
3 | .env
4 | node_modules/
5 | .DS_Store
6 |
--------------------------------------------------------------------------------
/tests/fixtures/config/settings.yml:
--------------------------------------------------------------------------------
1 | app:
2 | name: TestApp
3 | version: 1.0.0
4 |
5 | database:
6 | host: localhost
7 | port: 5432
8 |
--------------------------------------------------------------------------------
/tests/fixtures/src/main.py:
--------------------------------------------------------------------------------
1 | def main():
2 | """Example main function."""
3 | print("Hello from main!")
4 | return True
5 |
6 |
7 | if __name__ == "__main__":
8 | main()
9 |
--------------------------------------------------------------------------------
/tests/fixtures/src/styles/main.css:
--------------------------------------------------------------------------------
1 | .app {
2 | margin: 0;
3 | padding: 20px;
4 | font-family: sans-serif;
5 | }
6 |
7 | .header {
8 | color: #333;
9 | font-size: 24px;
10 | }
11 |
--------------------------------------------------------------------------------
/tests/fixtures/src/types.ts:
--------------------------------------------------------------------------------
1 | interface User {
2 | id: number;
3 | name: string;
4 | email: string;
5 | }
6 |
7 | type UserRole = "admin" | "user" | "guest";
8 |
9 | export { User, UserRole };
10 |
--------------------------------------------------------------------------------
/tests/fixtures/db/schema.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE users (
2 | id SERIAL PRIMARY KEY,
3 | username VARCHAR(50) NOT NULL,
4 | email VARCHAR(255) NOT NULL
5 | );
6 |
7 | CREATE INDEX idx_username ON users(username);
--------------------------------------------------------------------------------
/tests/fixtures/docs/README.md:
--------------------------------------------------------------------------------
1 | # Test Project
2 |
3 | This is a test project with various file types.
4 |
5 | ## Structure
6 | - src/
7 | - main.py
8 | - app.js
9 | - styles/
10 | - utils/
11 | - docs/
12 | - tests/
13 |
--------------------------------------------------------------------------------
/tests/fixtures/src/app.js:
--------------------------------------------------------------------------------
1 | function App() {
2 | return (
3 |
4 |
Hello World
5 |
This is a test component
6 |
7 | );
8 | }
9 |
10 | export default App;
11 |
--------------------------------------------------------------------------------
/src/copychat/__init__.py:
--------------------------------------------------------------------------------
1 | """Convert source code directories into markdown for LLM context."""
2 |
3 |
4 | # --- Version ---
5 |
6 | try:
7 | from ._version import version as __version__ # type: ignore
8 | except ImportError:
9 | __version__ = "unknown"
10 |
--------------------------------------------------------------------------------
/tests/fixtures/src/utils/helpers.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 |
4 | def format_string(value: Any) -> str:
5 | """Format any value as a string."""
6 | return str(value).strip()
7 |
8 |
9 | def calculate_total(numbers: list[float]) -> float:
10 | """Calculate sum of numbers."""
11 | return sum(numbers)
12 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | fail_fast: true
2 |
3 | repos:
4 | - repo: https://github.com/abravalheri/validate-pyproject
5 | rev: v0.23
6 | hooks:
7 | - id: validate-pyproject
8 |
9 | - repo: https://github.com/astral-sh/ruff-pre-commit
10 | # Ruff version.
11 | rev: v0.11.4
12 | hooks:
13 | # Run the linter.
14 | - id: ruff
15 | args: [--fix, --exit-non-zero-on-fix]
16 | # Run the formatter.
17 | - id: ruff-format
18 |
--------------------------------------------------------------------------------
/.ccignore:
--------------------------------------------------------------------------------
1 | # CopyChat default ignore patterns
2 | # This file uses the same syntax as .gitignore
3 | # It applies to the current directory and all subdirectories
4 |
5 | # Build artifacts
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 | *.so
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | *.egg-info/
16 | .mypy_cache/
17 |
18 | # Editor and IDE files
19 | .idea/
20 | .vscode/
21 | *.swp
22 | *.swo
23 | *~
24 |
25 | # Local development
26 | .env
27 | .venv
28 | env/
29 | venv/
30 |
31 | # Dependencies
32 | node_modules/
33 |
34 | # Project-specific
35 | # Add patterns specific to your project here
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish Copychat to PyPI
2 | on:
3 | release:
4 | types: [published]
5 | workflow_dispatch:
6 |
7 | jobs:
8 | publish-pypi-release:
9 | runs-on: ubuntu-latest
10 | environment: release
11 | permissions:
12 | contents: write
13 | id-token: write
14 | steps:
15 | - name: Checkout
16 | uses: actions/checkout@v4
17 | - name: Set up Python
18 | uses: actions/setup-python@v5
19 | with:
20 | python-version: "3.11"
21 | cache: pip
22 | cache-dependency-path: "**/pyproject.toml"
23 | - name: Install dependencies
24 | run: |
25 | pip install setuptools wheel build
26 | - name: Build
27 | run: |
28 | python -m build
29 | - name: Publish
30 | uses: pypa/gh-action-pypi-publish@release/v1
31 | with:
32 | verbose: true
33 |
--------------------------------------------------------------------------------
/tests/test_patterns.py:
--------------------------------------------------------------------------------
1 | from copychat.patterns import (
2 | DEFAULT_EXTENSIONS,
3 | EXCLUDED_DIRS,
4 | EXCLUDED_PATTERNS,
5 | )
6 |
7 |
8 | def test_default_extensions():
9 | """Test default extensions are properly defined."""
10 | assert isinstance(DEFAULT_EXTENSIONS, set)
11 | assert "py" in DEFAULT_EXTENSIONS
12 | assert "js" in DEFAULT_EXTENSIONS
13 | assert "md" in DEFAULT_EXTENSIONS
14 |
15 |
16 | def test_excluded_dirs():
17 | """Test excluded directories are properly defined."""
18 | assert isinstance(EXCLUDED_DIRS, set)
19 | assert ".git" in EXCLUDED_DIRS
20 | assert "node_modules" in EXCLUDED_DIRS
21 | assert "__pycache__" in EXCLUDED_DIRS
22 |
23 |
24 | def test_excluded_patterns():
25 | """Test excluded patterns are properly defined."""
26 | assert isinstance(EXCLUDED_PATTERNS, set)
27 | assert "*.pyc" in EXCLUDED_PATTERNS
28 | assert "*.log" in EXCLUDED_PATTERNS
29 | assert ".env" in EXCLUDED_PATTERNS
30 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "copychat"
3 | dynamic = ["version"]
4 | description = "Easily copy code for LLM context"
5 | authors = []
6 | dependencies = [
7 | "typer>=0.9.0",
8 | "rich>=13.7.0",
9 | "pyperclip>=1.8.2",
10 | "pathspec>=0.12.1",
11 | "tiktoken>=0.6.0",
12 | "gitpython>=3.1.42",
13 | "fastmcp>=2.8.1",
14 | ]
15 | requires-python = ">=3.10"
16 | readme = "README.md"
17 | license = { text = "MIT" }
18 |
19 | [project.scripts]
20 | copychat = "copychat.cli:app"
21 |
22 | [build-system]
23 | requires = ["setuptools>=64", "setuptools_scm>=8"]
24 | build-backend = "setuptools.build_meta"
25 |
26 | [tool.setuptools_scm]
27 | version_file = "src/copychat/_version.py"
28 |
29 | [tool.ruff]
30 | line-length = 88
31 | target-version = "py39"
32 |
33 | [tool.pytest.ini_options]
34 | testpaths = ["tests"]
35 | addopts = "-v --tb=short"
36 |
37 | [dependency-groups]
38 | dev = [
39 | "pre-commit>=4.2.0",
40 | "pytest>=8.3.3",
41 | "pytest-asyncio>=0.24.0",
42 | "pytest-cov>=5.0.0",
43 | ]
44 |
--------------------------------------------------------------------------------
/.github/workflows/run-static.yml:
--------------------------------------------------------------------------------
1 | name: Run static analysis
2 |
3 | env:
4 | # enable colored output
5 | # https://github.com/pytest-dev/pytest/issues/7443
6 | PY_COLORS: 1
7 |
8 | on:
9 | push:
10 | branches: ["main"]
11 | paths:
12 | - "src/**"
13 | - "tests/**"
14 | - "uv.lock"
15 | - "pyproject.toml"
16 | - ".github/workflows/**"
17 |
18 | # run on all pull requests because these checks are required and will block merges otherwise
19 | pull_request:
20 |
21 | workflow_dispatch:
22 |
23 | permissions:
24 | contents: read
25 |
26 | jobs:
27 | static_analysis:
28 | timeout-minutes: 2
29 |
30 | runs-on: ubuntu-latest
31 |
32 | steps:
33 | - uses: actions/checkout@v4
34 | - name: Install uv
35 | uses: astral-sh/setup-uv@v5
36 | with:
37 | enable-cache: true
38 | cache-dependency-glob: "uv.lock"
39 | - name: Set up Python
40 | uses: actions/setup-python@v5
41 | with:
42 | python-version: "3.12"
43 | - name: Install dependencies
44 | run: uv sync --dev
45 | - name: Run pre-commit
46 | uses: pre-commit/action@v3.0.1
47 |
--------------------------------------------------------------------------------
/.github/workflows/run-tests.yml:
--------------------------------------------------------------------------------
1 | name: Run tests
2 |
3 | env:
4 | # enable colored output
5 | PY_COLORS: 1
6 |
7 | on:
8 | push:
9 | branches: ["main"]
10 | paths:
11 | - "src/**"
12 | - "tests/**"
13 | - "uv.lock"
14 | - "pyproject.toml"
15 | - ".github/workflows/**"
16 |
17 | # run on all pull requests because these checks are required and will block merges otherwise
18 | pull_request:
19 |
20 | workflow_dispatch:
21 |
22 | permissions:
23 | contents: read
24 |
25 | jobs:
26 | run_tests:
27 | name: "Run tests: Python ${{ matrix.python-version }} on ${{ matrix.os }}"
28 | runs-on: ${{ matrix.os }}
29 | strategy:
30 | matrix:
31 | os: [ubuntu-latest]
32 | python-version: ["3.10", "3.12"]
33 | fail-fast: false
34 | timeout-minutes: 5
35 |
36 | steps:
37 | - uses: actions/checkout@v4
38 |
39 | - name: Install uv
40 | uses: astral-sh/setup-uv@v5
41 | with:
42 | enable-cache: true
43 | cache-dependency-glob: "uv.lock"
44 | python-version: ${{ matrix.python-version }}
45 |
46 | - name: Install CopyChat
47 | run: uv sync --dev --locked
48 |
49 | - name: Run tests
50 | run: uv run pytest
51 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import pytest
3 | import shutil
4 |
5 |
6 | @pytest.fixture
7 | def sample_project(tmp_path) -> Path:
8 | """Create a copy of the fixture project in a temporary directory."""
9 | fixtures_dir = Path(__file__).parent / "fixtures"
10 | project_dir = tmp_path / "test_project"
11 |
12 | # Copy all fixtures to temporary directory
13 | shutil.copytree(fixtures_dir, project_dir, dirs_exist_ok=True)
14 |
15 | return project_dir
16 |
17 |
18 | @pytest.fixture
19 | def sample_project_files(sample_project) -> list[Path]:
20 | """Get a list of all files in the sample project."""
21 | return list(sample_project.rglob("*"))
22 |
23 |
24 | def test_fixture_structure(sample_project):
25 | """Verify the fixture structure is correct."""
26 | assert (sample_project / "src" / "main.py").exists()
27 | assert (sample_project / "src" / "app.js").exists()
28 | assert (sample_project / "src" / "styles" / "main.css").exists()
29 | assert (sample_project / "docs" / "README.md").exists()
30 | assert (sample_project / "config" / "settings.yml").exists()
31 | assert (sample_project / "db" / "schema.sql").exists()
32 | assert (sample_project / ".gitignore").exists()
33 | assert (sample_project / ".env").exists()
34 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.so
6 | .Python
7 | build/
8 | develop-eggs/
9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 |
23 | # Virtual environments
24 | .env
25 | .venv
26 | env/
27 | venv/
28 | ENV/
29 | env.bak/
30 | venv.bak/
31 |
32 | # IDE
33 | .idea/
34 | .vscode/
35 | *.swp
36 | *.swo
37 | *~
38 | .project
39 | .classpath
40 | .settings/
41 | *.sublime-workspace
42 | *.sublime-project
43 |
44 | # OS
45 | .DS_Store
46 | .DS_Store?
47 | ._*
48 | .Spotlight-V100
49 | .Trashes
50 | ehthumbs.db
51 | Thumbs.db
52 | Desktop.ini
53 |
54 | # Logs and databases
55 | *.log
56 | *.sqlite
57 | *.db
58 |
59 | # Coverage and test reports
60 | htmlcov/
61 | .tox/
62 | .nox/
63 | .coverage
64 | .coverage.*
65 | .cache
66 | nosetests.xml
67 | coverage.xml
68 | *.cover
69 | *.py,cover
70 | .hypothesis/
71 | .pytest_cache/
72 | cover/
73 |
74 | # Build and packaging
75 | *.manifest
76 | *.spec
77 | pip-log.txt
78 | pip-delete-this-directory.txt
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # Local development
84 | local_settings.py
85 | db.sqlite3
86 | db.sqlite3-journal
87 |
88 | # Node (in case of docs or frontend components)
89 | node_modules/
90 | npm-debug.log*
91 | yarn-debug.log*
92 | yarn-error.log*
93 | .pnpm-debug.log*
94 | src/copychat/_version.py
95 |
--------------------------------------------------------------------------------
/tests/test_sources.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import shutil
3 | from copychat.sources import GitHubSource
4 |
5 |
6 | @pytest.fixture
7 | def temp_cache_dir(tmp_path):
8 | """Create temporary cache directory."""
9 | cache_dir = tmp_path / "cache"
10 | cache_dir.mkdir()
11 | yield cache_dir
12 | # Cleanup
13 | if cache_dir.exists():
14 | shutil.rmtree(cache_dir)
15 |
16 |
17 | def test_github_source_init(temp_cache_dir):
18 | """Test GitHubSource initialization."""
19 | source = GitHubSource("owner/repo", cache_dir=temp_cache_dir)
20 | assert source.repo_path == "owner/repo"
21 | assert source.clone_url == "https://github.com/owner/repo.git"
22 | assert source.repo_dir == temp_cache_dir / "owner_repo"
23 |
24 |
25 | def test_github_source_fetch(temp_cache_dir):
26 | """Test fetching a real public repository."""
27 | source = GitHubSource("prefecthq/prefect", cache_dir=temp_cache_dir)
28 | repo_dir = source.fetch()
29 |
30 | assert repo_dir.exists()
31 | assert (repo_dir / ".git").exists()
32 | assert (repo_dir / "README.md").exists()
33 |
34 | # Test update of existing repo
35 | repo_dir = source.fetch() # Should use cached version
36 | assert repo_dir.exists()
37 |
38 |
39 | def test_github_source_cleanup(temp_cache_dir):
40 | """Test repository cleanup."""
41 | source = GitHubSource("prefecthq/prefect", cache_dir=temp_cache_dir)
42 | source.fetch()
43 | assert source.repo_dir.exists()
44 |
45 | source.cleanup()
46 | assert not source.repo_dir.exists()
47 |
--------------------------------------------------------------------------------
/src/copychat/patterns.py:
--------------------------------------------------------------------------------
1 | """Default patterns and extensions for file filtering."""
2 |
3 | # Default extensions we care about (without dots)
4 | DEFAULT_EXTENSIONS = {
5 | # Web
6 | "html",
7 | "css",
8 | "scss",
9 | "js",
10 | "jsx",
11 | "ts",
12 | "tsx",
13 | "json",
14 | # Python
15 | "py",
16 | "pyi",
17 | "pyw",
18 | # Ruby
19 | "rb",
20 | "erb",
21 | # JVM
22 | "java",
23 | "kt",
24 | "scala",
25 | "gradle",
26 | # Systems
27 | "c",
28 | "h",
29 | "cpp",
30 | "hpp",
31 | "rs",
32 | "go",
33 | # Shell
34 | "sh",
35 | "bash",
36 | "zsh",
37 | "fish",
38 | # Config
39 | "yaml",
40 | "yml",
41 | "toml",
42 | "ini",
43 | "conf",
44 | # Docs
45 | "md",
46 | "mdx",
47 | "rst",
48 | "txt",
49 | # Other
50 | "sql",
51 | "graphql",
52 | "xml",
53 | "dockerfile",
54 | "gitignore",
55 | }
56 |
57 | # Directories that should always be excluded
58 | EXCLUDED_DIRS = {
59 | # Version Control
60 | ".git",
61 | ".svn",
62 | ".hg",
63 | # Dependencies
64 | "node_modules",
65 | "venv",
66 | ".venv",
67 | "env",
68 | "__pycache__",
69 | ".pytest_cache",
70 | ".ruff_cache",
71 | "target",
72 | "build",
73 | "dist",
74 | # IDE
75 | ".idea",
76 | ".vscode",
77 | # Other
78 | ".next",
79 | ".nuxt",
80 | ".output",
81 | "coverage",
82 | }
83 |
84 | # Files or patterns that should always be excluded
85 | EXCLUDED_PATTERNS = {
86 | # Build artifacts
87 | "*.pyc",
88 | "*.pyo",
89 | "*.pyd",
90 | "*.so",
91 | "*.dll",
92 | "*.dylib",
93 | "*.class",
94 | "*.jar",
95 | "*.war",
96 | "*.min.js",
97 | "*.min.css",
98 | # Logs and databases
99 | "*.log",
100 | "*.sqlite",
101 | "*.db",
102 | # OS files
103 | ".DS_Store",
104 | "Thumbs.db",
105 | "desktop.ini",
106 | # Package files
107 | "package-lock.json",
108 | "yarn.lock",
109 | "poetry.lock",
110 | # Environment and secrets
111 | ".env",
112 | ".env.*",
113 | "*.env",
114 | # Other
115 | "*.bak",
116 | "*.swp",
117 | "*.swo",
118 | "*~",
119 | }
120 |
--------------------------------------------------------------------------------
/src/copychat/mcp_server.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 | from fastmcp import FastMCP
3 | from pydantic import Field
4 | import pyperclip
5 |
6 | from typer.testing import CliRunner
7 |
8 | mcp = FastMCP(
9 | "Copychat",
10 | instructions="An MCP server for copying source code and GitHub items to the clipboard. Use this whenever the user wants you to copy something.",
11 | )
12 |
13 |
14 | @mcp.tool
15 | def copy_text_to_clipboard(text: str) -> None:
16 | """Copy any text to the clipboard. This is useful for copying ad-hoc text.
17 | For files, use `copychat_files` instead."""
18 | pyperclip.copy(text)
19 | return f"Copied {len(text)} characters to the clipboard."
20 |
21 |
22 | @mcp.tool
23 | def read_clipboard() -> str:
24 | """Read the clipboard."""
25 | return pyperclip.paste()
26 |
27 |
28 | @mcp.tool
29 | def copy_files_to_clipboard(
30 | paths: list[str],
31 | include: Annotated[
32 | str | None,
33 | Field(
34 | description="Comma-separated list of file extensions to include, e.g. 'py,js,ts'. If None (default), all files are included."
35 | ),
36 | ] = None,
37 | exclude: Annotated[
38 | str | None,
39 | Field(
40 | description="Comma-separated list of glob patterns to exclude, e.g. '*.pyc,*.pyo,*.pyd'. If None (default), no files are excluded."
41 | ),
42 | ] = None,
43 | append_to_clipboard: Annotated[
44 | bool,
45 | Field(
46 | description="If True, appends to the existing clipboard. If False (default), overwrites the clipboard."
47 | ),
48 | ] = False,
49 | ) -> None:
50 | """Copy local files to the clipboard without loading them into context."""
51 | from copychat.cli import app
52 |
53 | if not paths:
54 | raise ValueError("No paths provided")
55 |
56 | runner = CliRunner()
57 |
58 | args = [*paths]
59 |
60 | if include:
61 | args.append("--include")
62 | args.append(include)
63 |
64 | if exclude:
65 | args.append("--exclude")
66 | args.append(exclude)
67 |
68 | if append_to_clipboard:
69 | args.append("--append")
70 |
71 | result = runner.invoke(app, args + ["-v"])
72 |
73 | if result.exception:
74 | raise result.exception
75 |
76 | return result.output
77 |
78 |
79 | if __name__ == "__main__":
80 | mcp.run()
81 |
--------------------------------------------------------------------------------
/src/copychat/cli_utilities.py:
--------------------------------------------------------------------------------
1 | import typer
2 |
3 | from collections.abc import Callable, Sequence
4 | from typing import Any
5 |
6 | import click
7 | from typer.core import DEFAULT_MARKUP_MODE, MarkupMode
8 | from typer.models import CommandFunctionType
9 |
10 |
11 | class TyperDefaultCommand(typer.core.TyperCommand):
12 | """Type that indicates if a command is the default command."""
13 |
14 |
15 | class TyperGroupWithDefault(typer.core.TyperGroup):
16 | """Use a default command if specified."""
17 |
18 | def __init__(
19 | self,
20 | *,
21 | name: str | None = None,
22 | commands: dict[str, click.Command] | Sequence[click.Command] | None = None,
23 | rich_markup_mode: MarkupMode = DEFAULT_MARKUP_MODE,
24 | rich_help_panel: str | None = None,
25 | **attrs: Any,
26 | ) -> None:
27 | super().__init__(
28 | name=name,
29 | commands=commands,
30 | rich_markup_mode=rich_markup_mode,
31 | rich_help_panel=rich_help_panel,
32 | **attrs,
33 | )
34 | # find the default command if any
35 | self.default_command = None
36 | if len(self.commands) > 1:
37 | for name, command in reversed(self.commands.items()):
38 | if isinstance(command, TyperDefaultCommand):
39 | self.default_command = name
40 | break
41 |
42 | def make_context(
43 | self,
44 | info_name: str | None,
45 | args: list[str],
46 | parent: click.Context | None = None,
47 | **extra: Any,
48 | ) -> click.Context:
49 | # if --help is specified, show the group help
50 | # else if default command was specified in the group and no args or no subcommand is specified, use the default command
51 | if (
52 | self.default_command
53 | and (not args or args[0] not in self.commands)
54 | and "--help" not in args
55 | ):
56 | args = [self.default_command] + args
57 | return super().make_context(info_name, args, parent, **extra)
58 |
59 |
60 | class TyperWithDefaultCommand(typer.Typer):
61 | """A Typer class with default command support.
62 | https://github.com/fastapi/typer/issues/18
63 |
64 | @app.command(default=True)
65 | def default_command():
66 | '''This is the default command.'''
67 | pass
68 |
69 | @app.command()
70 | def some_command():
71 | pass
72 |
73 | """
74 |
75 | def __init__(self, **kwargs):
76 | super().__init__(cls=TyperGroupWithDefault, **kwargs)
77 |
78 | def command(
79 | self, default: bool = False, **kwargs
80 | ) -> Callable[[CommandFunctionType], CommandFunctionType]:
81 | return super().command(cls=TyperDefaultCommand if default else None, **kwargs)
82 |
--------------------------------------------------------------------------------
/tests/test_format.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import pytest
3 | from copychat.format import (
4 | guess_language,
5 | format_file,
6 | create_header,
7 | estimate_tokens,
8 | format_files,
9 | )
10 |
11 |
12 | @pytest.fixture
13 | def temp_files(tmp_path):
14 | """Create temporary test files."""
15 | # Create a python file
16 | py_file = tmp_path / "test.py"
17 | py_file.write_text("def hello():\n print('world')")
18 |
19 | # Create a javascript file
20 | js_file = tmp_path / "test.js"
21 | js_file.write_text("function hello() {\n console.log('world');\n}")
22 |
23 | return tmp_path, [py_file, js_file]
24 |
25 |
26 | def test_guess_language():
27 | """Test language detection from file extensions."""
28 | assert guess_language(Path("test.py")) == "python"
29 | assert guess_language(Path("test.js")) == "javascript"
30 | assert guess_language(Path("test.tsx")) == "tsx"
31 | assert guess_language(Path("test.unknown")) is None
32 |
33 |
34 | def test_format_file(temp_files):
35 | """Test single file formatting."""
36 | root_path, (py_file, _) = temp_files
37 |
38 | formatted_file = format_file(py_file, root_path)
39 | result = formatted_file.formatted_content
40 |
41 | assert " 0
70 | assert isinstance(tokens, int)
71 |
72 |
73 | def test_format_files(temp_files):
74 | """Test formatting multiple files."""
75 | root_path, files = temp_files
76 |
77 | # Pass a list of tuples (Path, str) to format_files
78 | file_contents = [(f, f.read_text()) for f in files]
79 | format_result = format_files(file_contents)
80 | result = str(format_result)
81 |
82 | # Check header
83 | assert "Generated by copychat" in result
84 |
85 | # Check both files are included
86 | assert 'path="test.py"' in result
87 | assert 'path="test.js"' in result
88 |
89 | # Check content
90 | assert "def hello():" in result
91 | assert "console.log('world');" in result
92 |
93 |
94 | def test_format_files_empty():
95 | """Test formatting with no files."""
96 | format_result = format_files([])
97 | result = str(format_result)
98 | assert "No files found" in result
99 |
100 |
101 | def test_format_file_error(tmp_path):
102 | """Test handling of file read errors."""
103 | non_existent = tmp_path / "does_not_exist.py"
104 | formatted_file = format_file(non_existent, tmp_path)
105 | result = formatted_file.formatted_content
106 | assert "Error processing" in result
107 |
--------------------------------------------------------------------------------
/tests/test_github_item.py:
--------------------------------------------------------------------------------
1 | from copychat.sources import GitHubItem
2 |
3 |
4 | class DummyResponse:
5 | def __init__(self, data, status=200, is_text=False):
6 | self._data = data
7 | self.status_code = status
8 | self.ok = status == 200
9 | self._is_text = is_text
10 |
11 | def raise_for_status(self):
12 | if not self.ok:
13 | raise Exception("status")
14 |
15 | def json(self):
16 | return self._data
17 |
18 | @property
19 | def text(self):
20 | return self._data if self._is_text else ""
21 |
22 |
23 | def test_github_item_fetch(monkeypatch):
24 | """GitHubItem should format issue and comments."""
25 |
26 | issue_data = {
27 | "title": "Test issue",
28 | "body": "Body text",
29 | "comments_url": "http://example.com/comments",
30 | "pull_request": {},
31 | "html_url": "https://github.com/owner/repo/pull/1",
32 | "user": {"login": "testuser"},
33 | "created_at": "2024-01-01",
34 | "updated_at": "2024-01-02",
35 | "state": "open",
36 | }
37 | comments = [{"user": {"login": "alice"}, "created_at": "2024-01-01", "body": "hi"}]
38 | reviews = [
39 | {
40 | "user": {"login": "bob"},
41 | "created_at": "2024-01-02",
42 | "path": "file.py",
43 | "body": "looks good",
44 | }
45 | ]
46 |
47 | calls = []
48 |
49 | def fake_get(url, headers=None, timeout=0):
50 | calls.append(url)
51 | if "comments" in url and "pulls" in url:
52 | return DummyResponse(reviews)
53 | if "comments" in url:
54 | return DummyResponse(comments)
55 | return DummyResponse(issue_data)
56 |
57 | monkeypatch.setattr("requests.get", fake_get)
58 |
59 | item = GitHubItem("owner/repo", 1)
60 | path, content = item.fetch()
61 |
62 | assert path.name == "owner_repo_pr_1.md"
63 | assert "Test issue" in content
64 | assert "alice" in content
65 | assert "looks good" in content
66 | assert "**Pull Request**" in content
67 | assert "**Status**: OPEN" in content
68 | assert "**Author**: testuser" in content
69 | assert "https://github.com/owner/repo/pull/1" in content
70 | assert any("pulls" in c for c in calls)
71 |
72 |
73 | def test_github_item_fetch_with_diff(monkeypatch):
74 | """GitHubItem should include PR diff when available."""
75 |
76 | issue_data = {
77 | "title": "Test PR",
78 | "body": "PR description",
79 | "comments_url": "http://example.com/comments",
80 | "pull_request": {},
81 | "html_url": "https://github.com/owner/repo/pull/2",
82 | "user": {"login": "testuser"},
83 | "created_at": "2024-01-01",
84 | "updated_at": "2024-01-02",
85 | "state": "open",
86 | }
87 | comments = []
88 | reviews = []
89 | diff_content = """diff --git a/file.txt b/file.txt
90 | index abc123..def456 100644
91 | --- a/file.txt
92 | +++ b/file.txt
93 | @@ -1,3 +1,3 @@
94 | Line 1
95 | -Line 2
96 | +Line 2 modified
97 | Line 3"""
98 |
99 | calls = []
100 | headers_received = {}
101 |
102 | def fake_get(url, headers=None, timeout=0):
103 | calls.append(url)
104 | if headers:
105 | headers_received[url] = headers
106 |
107 | if "diff" in headers.get("Accept", "") and "pulls" in url:
108 | return DummyResponse(diff_content, is_text=True)
109 | if "comments" in url and "pulls" in url:
110 | return DummyResponse(reviews)
111 | if "comments" in url:
112 | return DummyResponse(comments)
113 | return DummyResponse(issue_data)
114 |
115 | monkeypatch.setattr("requests.get", fake_get)
116 |
117 | item = GitHubItem("owner/repo", 2)
118 | path, content = item.fetch()
119 |
120 | assert path.name == "owner_repo_pr_2.md"
121 | assert "Test PR" in content
122 | assert "PR description" in content
123 | assert "**Pull Request**" in content
124 | assert "## PR Diff" in content
125 | assert "```diff" in content
126 | assert "+Line 2 modified" in content
127 | assert "application/vnd.github.diff" in headers_received.get(
128 | "https://api.github.com/repos/owner/repo/pulls/2", {}
129 | ).get("Accept", "")
130 |
--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------
1 | # AGENTS
2 |
3 | Copychat converts project code into LLM-friendly context. This is a guide to help LLMs quickly understand and navigate the codebase. The repo is maintained by [@jlowin](https://github.com/jlowin) on GitHub[https://github.com/jlowin/copychat].
4 |
5 | ## Project Overview
6 |
7 | Copychat is a CLI tool that prepares source code for LLM context windows by:
8 | 1. Scanning directories/files based on inclusion/exclusion patterns
9 | 2. Respecting `.gitignore` and `.ccignore` patterns
10 | 3. Formatting code with proper language tags
11 | 4. Including git diff information when requested
12 | 5. Estimating token counts for context planning
13 |
14 | ## Repository Layout
15 |
16 | * `README.md` – overview and documentation
17 | * `src/copychat/` – CLI and library implementation
18 | * `tests/` – pytest suite
19 | * `pyproject.toml` – PEP-621 metadata; build is managed by **uv**
20 | * `.github/workflows/` – CI that lints, runs tests, and publishes to PyPI
21 | * `.ccignore` – custom ignore patterns for copychat itself
22 |
23 | ## Core Components
24 |
25 | * `core.py` - Main scanning functionality, git integration, and file handling
26 | - `scan_directory()` - Primary function for finding and processing files
27 | - `DiffMode` - Enum defining different git diff display modes
28 | - Handles `.gitignore` and `.ccignore` patterns
29 |
30 | * `format.py` - Formats code for LLM consumption
31 | - `format_files()` - Formats file content with metadata
32 | - `estimate_tokens()` - Calculates approximate token usage
33 |
34 | * `cli.py` - Command-line interface
35 | - Main entry point for user interaction
36 | - Parses arguments and handles output (clipboard/file)
37 |
38 | * `sources.py` - Handles different source types (filesystem, GitHub)
39 | - `GitHubSource` - Fetches code from GitHub repositories
40 |
41 | * `patterns.py` - Defines file patterns and exclusions
42 |
43 | ## Key Workflows
44 |
45 | 1. **Basic Usage**: `copychat` scans the current directory and copies formatted code to clipboard
46 | 2. **Filtered Scanning**: `copychat --include py,js` only processes specified file types
47 | 3. **Git Integration**: `copychat --diff-mode full-with-diff` shows changes with context
48 | 4. **GitHub**: `copychat --source github:user/repo` fetches remote code
49 |
50 | ## Common CLI Flags
51 |
52 | * `--include py,js` - restrict scanned extensions
53 | * `--exclude "**/*.test.js"` - exclude specific patterns
54 | * `--diff-mode full-with-diff` - embed git diff chunks
55 | * `--diff-branch main` - compare against specific branch
56 | * `--source github:/` - pull remote code via GitHub
57 | * `--out file.md` - write to file instead of clipboard
58 | * `--depth 2` - limit directory recursion depth
59 |
60 | ## Data Flow
61 |
62 | 1. CLI parses arguments → determines source type
63 | 2. `scan_directory()` finds matching files → applies filters
64 | 3. Git diff information is added if requested
65 | 4. `format_files()` processes content → calculates tokens
66 | 5. Formatted output is sent to clipboard/file/stdout
67 |
68 | ## Common Patterns
69 |
70 | - Path handling uses `pathlib.Path` throughout
71 | - File content is processed as `(path, content)` tuples
72 | - Git operations use subprocess to call git commands
73 | - `.ccignore` supports hierarchical pattern inheritance
74 |
75 | ## Testing
76 |
77 | - `tests/test_integration.py` has examples of all major functionality
78 | - `sample_project` fixture provides test files of various types
79 |
80 | ## Development
81 |
82 | ```bash
83 | uv sync # install runtime + dev deps
84 | uv run pre-commit run --all-files
85 | uv run pytest
86 | ```
87 |
88 | ## Release Process
89 |
90 | Pushing a semantic-version tag to `main` triggers the **Publish Copychat to PyPI** workflow, building and uploading the wheel.
91 |
92 | ## Development Guidelines
93 |
94 | 1. Fix all Ruff/Pyright warnings (`pre-commit` enforces)
95 | 2. Respect `.ccignore` and `.gitignore` patterns
96 | 3. Keep CLI flags backward-compatible
97 | 4. Document changes in `CHANGELOG.md`
98 |
99 | ## Extending the Tool
100 |
101 | - Add new file types in `patterns.py`
102 | - Add new source types in `sources.py`
103 | - Add new formatting options in `format.py`
104 | - `GitHubItem` in `sources.py` fetches issues and PRs. Pass an issue/PR URL
105 | (e.g. `owner/repo#123` or `https://github.com/owner/repo/issues/123`) directly
106 | to the main `copychat` command.
107 |
108 | **Note to LLMs**: When working on this repository, keep this AGENTS.md file up to date with new insights that would help future LLMs quickly understand the codebase structure and functionality. This file should serve as a quick reference that reduces the need for extensive code exploration.
109 |
--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from copychat.core import (
3 | find_gitignore,
4 | DiffMode,
5 | is_glob_pattern,
6 | resolve_paths,
7 | scan_directory,
8 | scan_files,
9 | )
10 | from pathlib import Path
11 |
12 |
13 | def test_diff_mode_enum():
14 | """Test DiffMode enum values."""
15 | assert DiffMode.FULL.value == "full"
16 | assert DiffMode.FULL_WITH_DIFF.value == "full-with-diff"
17 | assert DiffMode.CHANGED_WITH_DIFF.value == "changed-with-diff"
18 | assert DiffMode.DIFF_ONLY.value == "diff-only"
19 |
20 |
21 | def test_is_glob_pattern():
22 | """Test glob pattern detection."""
23 | assert is_glob_pattern("*.py")
24 | assert is_glob_pattern("src/**/*.js")
25 | assert is_glob_pattern("test/*")
26 | assert not is_glob_pattern("src/main.py")
27 | assert not is_glob_pattern("path/to/file")
28 |
29 |
30 | def test_resolve_paths(tmp_path):
31 | """Test path resolution with glob patterns."""
32 | # Create test files
33 | (tmp_path / "test1.py").touch()
34 | (tmp_path / "test2.py").touch()
35 | (tmp_path / "src").mkdir()
36 | (tmp_path / "src" / "main.py").touch()
37 | (tmp_path / "src" / "util.js").touch()
38 |
39 | # Test glob resolution
40 | paths = resolve_paths(["*.py", "src/**/*.py"], base_path=tmp_path)
41 | assert len(paths) == 3
42 | assert tmp_path / "test1.py" in paths
43 | assert tmp_path / "test2.py" in paths
44 | assert tmp_path / "src" / "main.py" in paths
45 |
46 | # Test mixed glob and regular paths
47 | paths = resolve_paths(["src", "*.py"], base_path=tmp_path)
48 | assert len(paths) == 3
49 | assert tmp_path / "src" in paths
50 |
51 |
52 | @pytest.fixture
53 | def git_repo(tmp_path):
54 | """Create a temporary git repository with a .gitignore file."""
55 | gitignore = tmp_path / ".gitignore"
56 | gitignore.write_text("*.pyc\n__pycache__/\n")
57 | return tmp_path
58 |
59 |
60 | def test_scan_with_glob_patterns():
61 | # Create test directory and files if they don't exist
62 | test_dir = Path("tests/data")
63 | test_dir.mkdir(parents=True, exist_ok=True)
64 |
65 | with open(test_dir / "test1.txt", "w") as f:
66 | f.write("This is a test file")
67 | with open(test_dir / "test2.md", "w") as f:
68 | f.write("This is another test file")
69 |
70 | files = scan_files(["*.txt", "*.md"], test_dir)
71 | assert len(files) == 2
72 |
73 |
74 | def test_find_gitignore_exists(git_repo):
75 | """Test finding .gitignore in current directory."""
76 | result = find_gitignore(git_repo)
77 | assert result == git_repo / ".gitignore"
78 |
79 |
80 | def test_find_gitignore_parent(git_repo):
81 | """Test finding .gitignore in parent directory."""
82 | child_dir = git_repo / "subdir"
83 | child_dir.mkdir()
84 | result = find_gitignore(child_dir)
85 | assert result == git_repo / ".gitignore"
86 |
87 |
88 | def test_find_gitignore_not_found(tmp_path):
89 | """Test behavior when no .gitignore is found."""
90 | result = find_gitignore(tmp_path)
91 | assert result is None
92 |
93 |
94 | def test_scan_with_recursive_glob(tmp_path):
95 | """Test scanning with recursive glob patterns."""
96 | # Create nested test files
97 | (tmp_path / "test1.py").write_text("print('test1')")
98 | deep_dir = tmp_path / "very" / "deep" / "nested"
99 | deep_dir.mkdir(parents=True)
100 | (deep_dir / "test2.py").write_text("print('test2')")
101 | (deep_dir / "test.js").write_text("console.log('test')")
102 |
103 | # Test recursive glob pattern
104 | files = scan_directory(
105 | tmp_path, include=["py"]
106 | ) # Changed from tmp_path / "**/*.py"
107 | assert len(files) == 2
108 | assert any("test1.py" in str(p) for p in files)
109 | assert any("test2.py" in str(p) for p in files)
110 |
111 | # Test from within subdirectory
112 | subdir_files = scan_directory(
113 | tmp_path / "very", include=["py"]
114 | ) # Changed from tmp_path / "very" / "**/*.py"
115 | assert len(subdir_files) == 1
116 | assert any("test2.py" in str(p) for p in subdir_files)
117 |
118 |
119 | def test_scan_single_file(tmp_path):
120 | """Test scanning a single file."""
121 | # Create a test file
122 | test_file = tmp_path / "test.py"
123 | test_file.write_text("print('hello world')")
124 |
125 | # Create some other files that shouldn't be included
126 | (tmp_path / "other.py").write_text("print('other')")
127 | (tmp_path / "test.js").write_text("console.log('test')")
128 |
129 | # Test scanning just the single file
130 | files = scan_directory(test_file, include=["py"])
131 |
132 | # Should only contain our specific file
133 | assert len(files) == 1
134 | assert test_file in files
135 | assert files[test_file] == "print('hello world')"
136 |
137 | # Test with non-matching extension filter
138 | files = scan_directory(test_file, include=["js"])
139 | assert len(files) == 0
140 |
141 | # Test with non-existent file
142 | files = scan_directory(tmp_path / "nonexistent.py", include=["py"])
143 | assert len(files) == 0
144 |
--------------------------------------------------------------------------------
/tests/test_ccignore.py:
--------------------------------------------------------------------------------
1 | """Tests for .ccignore functionality."""
2 |
3 | import pytest
4 | from copychat.core import (
5 | find_ccignore_files,
6 | get_ccignore_spec,
7 | scan_directory,
8 | )
9 |
10 |
11 | @pytest.fixture
12 | def ccignore_test_dir(tmp_path):
13 | """Create a test directory structure with .ccignore files."""
14 | # Root directory with .ccignore
15 | root_ccignore = tmp_path / ".ccignore"
16 | root_ccignore.write_text("*.log\n")
17 |
18 | # Create subdirectory with its own .ccignore
19 | subdir = tmp_path / "subdir"
20 | subdir.mkdir()
21 | subdir_ccignore = subdir / ".ccignore"
22 | subdir_ccignore.write_text("*.json\n")
23 |
24 | # Create nested subdirectory with its own .ccignore
25 | nested_subdir = subdir / "nested"
26 | nested_subdir.mkdir()
27 | nested_ccignore = nested_subdir / ".ccignore"
28 | nested_ccignore.write_text("*.md\n")
29 |
30 | # Create test files
31 | (tmp_path / "root.txt").write_text("root text file")
32 | (tmp_path / "root.log").write_text("root log file")
33 | (tmp_path / "root.json").write_text("root json file")
34 | (tmp_path / "root.md").write_text("root md file")
35 |
36 | (subdir / "subdir.txt").write_text("subdir text file")
37 | (subdir / "subdir.log").write_text("subdir log file")
38 | (subdir / "subdir.json").write_text("subdir json file")
39 | (subdir / "subdir.md").write_text("subdir md file")
40 |
41 | (nested_subdir / "nested.txt").write_text("nested text file")
42 | (nested_subdir / "nested.log").write_text("nested log file")
43 | (nested_subdir / "nested.json").write_text("nested json file")
44 | (nested_subdir / "nested.md").write_text("nested md file")
45 |
46 | return tmp_path
47 |
48 |
49 | def test_find_ccignore_files(ccignore_test_dir):
50 | """Test finding all .ccignore files that apply to a path."""
51 | nested_dir = ccignore_test_dir / "subdir" / "nested"
52 |
53 | # Should find 3 .ccignore files, from most specific to most general
54 | result = find_ccignore_files(nested_dir)
55 | assert len(result) == 3
56 |
57 | # Check the order - should be from most specific to most general
58 | assert result[0][0] == nested_dir / ".ccignore"
59 | assert result[1][0] == ccignore_test_dir / "subdir" / ".ccignore"
60 | assert result[2][0] == ccignore_test_dir / ".ccignore"
61 |
62 | # Test with path that has no .ccignore
63 | empty_dir = ccignore_test_dir / "empty_dir"
64 | empty_dir.mkdir()
65 | result = find_ccignore_files(empty_dir)
66 | assert len(result) == 1
67 | assert result[0][0] == ccignore_test_dir / ".ccignore"
68 |
69 |
70 | def test_get_ccignore_spec(ccignore_test_dir):
71 | """Test generating PathSpec from .ccignore files."""
72 | # Root directory should only exclude .log files
73 | root_spec = get_ccignore_spec(ccignore_test_dir)
74 | assert root_spec.match_file("test.log")
75 | assert not root_spec.match_file("test.json")
76 | assert not root_spec.match_file("test.md")
77 |
78 | # Subdirectory should exclude .log and .json files
79 | subdir_spec = get_ccignore_spec(ccignore_test_dir / "subdir")
80 | assert subdir_spec.match_file("test.log")
81 | assert subdir_spec.match_file("test.json")
82 | assert not subdir_spec.match_file("test.md")
83 |
84 | # Nested subdirectory should exclude .log, .json, and .md files
85 | nested_spec = get_ccignore_spec(ccignore_test_dir / "subdir" / "nested")
86 | assert nested_spec.match_file("test.log")
87 | assert nested_spec.match_file("test.json")
88 | assert nested_spec.match_file("test.md")
89 |
90 |
91 | def test_scan_directory_with_ccignore(ccignore_test_dir):
92 | """Test that scan_directory respects .ccignore patterns."""
93 | # Scan the root directory - should exclude .log files
94 | files = scan_directory(ccignore_test_dir, include=["txt", "json", "md", "log"])
95 | paths = {str(f) for f in files}
96 |
97 | # Root dir - .log should be excluded, others included
98 | assert not any(p.endswith("root.log") for p in paths)
99 | assert any(p.endswith("root.txt") for p in paths)
100 | assert any(p.endswith("root.json") for p in paths)
101 | assert any(p.endswith("root.md") for p in paths)
102 |
103 | # Subdir - .log and .json should be excluded, others included
104 | assert not any(p.endswith("subdir.log") for p in paths)
105 | assert not any(p.endswith("subdir.json") for p in paths)
106 | assert any(p.endswith("subdir.txt") for p in paths)
107 | assert any(p.endswith("subdir.md") for p in paths)
108 |
109 | # Nested subdir - .log, .json, and .md should be excluded, others included
110 | assert not any(p.endswith("nested.log") for p in paths)
111 | assert not any(p.endswith("nested.json") for p in paths)
112 | assert not any(p.endswith("nested.md") for p in paths)
113 | assert any(p.endswith("nested.txt") for p in paths)
114 |
115 |
116 | def test_ccignore_with_extra_patterns(ccignore_test_dir):
117 | """Test that extra exclude patterns work with .ccignore."""
118 | # Add extra exclude pattern for .txt files
119 | spec = get_ccignore_spec(ccignore_test_dir, extra_patterns=["*.txt"])
120 |
121 | # Should exclude both .log files (from .ccignore) and .txt files (from extra patterns)
122 | assert spec.match_file("test.log")
123 | assert spec.match_file("test.txt")
124 | assert not spec.match_file("test.json")
125 |
--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from copychat.core import scan_directory, DiffMode
3 | from copychat.format import format_files
4 |
5 |
6 | def test_basic_scan(sample_project):
7 | """Test basic file scanning functionality."""
8 | files = scan_directory(
9 | sample_project,
10 | include=["py", "js", "css"],
11 | )
12 |
13 | # Check we found the expected file types
14 | extensions = {f.suffix.lstrip(".") for f in files}
15 | assert extensions == {"py", "js", "css"}
16 |
17 | # Check we found files in nested directories
18 | assert any("utils" in str(f) for f in files)
19 | assert any("styles" in str(f) for f in files)
20 |
21 |
22 | def test_gitignore_handling(sample_project):
23 | """Test that .gitignore patterns are respected."""
24 | files = scan_directory(sample_project, include=["py", "env"])
25 |
26 | # These should be excluded by .gitignore
27 | paths = {str(f) for f in files}
28 | assert not any(f.endswith(".pyc") for f in paths)
29 | assert not any("__pycache__" in f for f in paths)
30 | assert not any(f.endswith(".env") for f in paths)
31 |
32 |
33 | def test_formatting_output(sample_project):
34 | """Test that output is formatted correctly."""
35 | # Get files and format them
36 | files = scan_directory(sample_project, include=["py", "js"])
37 | format_result = format_files([(f, f.read_text()) for f in files])
38 | result = str(format_result)
39 |
40 | # Check for file content without line numbers
41 | assert "def main():" in result # Remove the "1|" prefix
42 | assert 'print("Hello from main!")' in result
43 | assert "function App()" in result
44 | assert "def calculate_total" in result
45 |
46 |
47 | def test_different_file_types(sample_project):
48 | """Test handling of different file types."""
49 | files = scan_directory(
50 | sample_project,
51 | include=["yml", "sql", "ts", "md"],
52 | )
53 | # Convert files to (path, content) tuples
54 | files_with_content = [(f, f.read_text()) for f in files]
55 | format_result = format_files(files_with_content)
56 | result = str(format_result)
57 |
58 | # Check various file types are properly formatted
59 | assert 'language="yaml"' in result
60 | assert 'language="sql"' in result
61 | assert 'language="typescript"' in result
62 | assert 'language="markdown"' in result
63 |
64 | # Check content snippets from each type
65 | assert "CREATE TABLE users" in result
66 | assert "interface User" in result
67 | assert "TestApp" in result
68 | assert "# Test Project" in result
69 |
70 |
71 | def test_exclusion_patterns(sample_project):
72 | """Test explicit exclusion patterns."""
73 | files = scan_directory(
74 | sample_project,
75 | include=["py", "js"],
76 | exclude_patterns=["**/utils/*"], # Exclude utils directory
77 | )
78 |
79 | paths = {str(f) for f in files}
80 | assert not any("utils" in p for p in paths)
81 | assert any("main.py" in p for p in paths)
82 |
83 |
84 | def test_empty_directory(tmp_path):
85 | """Test handling of empty directories."""
86 | files = scan_directory(tmp_path)
87 | format_result = format_files([(f, f.read_text()) for f in list(files)])
88 | result = str(format_result)
89 | assert "No files found" in result
90 |
91 |
92 | def test_header_metadata(sample_project):
93 | """Test header metadata in formatted output."""
94 | files = scan_directory(sample_project, include=["py"])
95 | # Convert files to (path, content) tuples
96 | files_with_content = [(f, f.read_text()) for f in files]
97 | format_result = format_files(files_with_content)
98 | result = str(format_result)
99 |
100 | # Check header contains important metadata
101 | assert "Generated by copychat on" in result
102 | assert "Root path:" in result
103 | assert "Summary:" in result # Changed from Files:
104 |
105 | # Check file path info in header table format
106 | assert "Path" in result
107 | assert "Tokens" in result
108 | assert "Lines" in result
109 | # Paths will be in table rows instead of list format
110 |
111 |
112 | @pytest.mark.parametrize(
113 | "diff_mode",
114 | [
115 | DiffMode.FULL,
116 | DiffMode.FULL_WITH_DIFF,
117 | # Removing these modes for now as they require git setup
118 | # DiffMode.CHANGED_WITH_DIFF,
119 | # DiffMode.DIFF_ONLY,
120 | ],
121 | )
122 | def test_diff_modes(sample_project, diff_mode):
123 | """Test different diff modes."""
124 | files = scan_directory(
125 | sample_project,
126 | include=["py"],
127 | diff_mode=diff_mode,
128 | )
129 | assert len(files) > 0
130 |
131 |
132 | def test_token_estimation(sample_project):
133 | """Test token estimation functionality."""
134 | files = scan_directory(sample_project, include=["py", "js"])
135 | # Convert files to (path, content) tuples
136 | files_with_content = [(f, f.read_text()) for f in files]
137 | format_result = format_files(files_with_content)
138 | result = str(format_result)
139 |
140 | # Result should include token info in header
141 | assert "tokens" in result.lower()
142 |
143 | # Basic sanity check - content should be non-empty
144 | assert len(result) > 0
145 |
146 |
147 | def test_error_handling(sample_project, tmp_path):
148 | """Test error handling for problematic files."""
149 | try:
150 | # Create an unreadable file in the temporary directory
151 | bad_file = tmp_path / "bad.py"
152 | bad_file.write_text("def bad():\n pass\n")
153 | bad_file.chmod(0o000) # Remove read permissions
154 |
155 | # Include both the sample project and the tmp directory
156 | files = scan_directory(sample_project, include=["py"])
157 | files = list(files)
158 | files.append(bad_file)
159 |
160 | # Convert files to (path, content) tuples, handling potential read errors
161 | files_with_content = []
162 | for f in files:
163 | try:
164 | content = f.read_text()
165 | files_with_content.append((f, content))
166 | except (PermissionError, OSError):
167 | # Still include the file, but with empty content
168 | files_with_content.append((f, ""))
169 |
170 | format_result = format_files(files_with_content)
171 | result = str(format_result)
172 |
173 | # Check that the bad file is mentioned in the result
174 | assert "bad.py" in result
175 |
176 | # Should still process good files
177 | assert "main.py" in result
178 | assert "def main():" in result
179 | finally:
180 | # Cleanup
181 | try:
182 | bad_file.chmod(0o666)
183 | bad_file.unlink()
184 | except Exception:
185 | pass
186 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Copychat 📋🐈⬛
2 |
3 | **Simple code-to-context.**
4 |
5 | Copychat is a lightweight CLI tool that prepares your code for conversations with LLMs. It intelligently formats your source files into chat-ready context, handling everything from file selection to git diffs.
6 |
7 | ## Features
8 |
9 | - 🎯 **Smart file selection**: Automatically identifies relevant source files while respecting `.gitignore`
10 | - 🔍 **Git-aware**: Can include diffs and focus on changed files
11 | - 📦 **GitHub integration**: Pull directly from repositories
12 | - 🎨 **Clean output**: Formats code with proper language tags and metadata
13 | - 📋 **Clipboard ready**: Results go straight to your clipboard
14 | - 🔢 **Token smart**: Estimates token count for context planning
15 |
16 | ## Running Copychat
17 |
18 | You can use [uv](https://docs.astral.sh/uv/) to run copychat directly from the command line, without needing to install it first:
19 |
20 | ```bash
21 | uvx copychat
22 | ```
23 |
24 | Frequent users may want to add the following alias to their `.zshrc` or `.bashrc`:
25 |
26 | ```bash
27 | alias cc="uvx copychat"
28 | ```
29 |
30 | This permits you to quickly copy context by running e.g. `cc docs/getting-started/ src/core/` from any directory, in any environment.
31 |
32 | If you want to save a few milliseconds, you can install copychat globally with `uv tool install copychat` or add it to your environment with `uv add copychat`. And of course, `pip install copychat` works too.
33 |
34 | ## Quick Start
35 |
36 | Collect, format, and copy all source code in the current directory (and subdirectories) to the clipboard:
37 |
38 | ```bash
39 | copychat
40 | ```
41 |
42 | Copy only Python files to clipboard:
43 |
44 | ```bash
45 | copychat -i py
46 | ```
47 |
48 | Copy specific files, including any git diffs:
49 |
50 | ```bash
51 | copychat src/ tests/test_api.py --diff-mode full-with-diff
52 | ```
53 |
54 | Use GitHub as a source instead of the local filesystem:
55 |
56 | ```bash
57 | copychat src/ -s github:prefecthq/controlflow
58 | ```
59 |
60 | ## MCP Server
61 |
62 | Copychat can run as an MCP (Model Context Protocol) server, allowing AI assistants and other MCP clients to access its functionality directly, including simple ad-hoc copy/paste functionality. This enables seamless integration with tools like Cursor, Claude Desktop, Claude Code, and other MCP-compatible applications.
63 |
64 | Start the MCP server locally:
65 |
66 | ```bash
67 | copychat mcp
68 | ```
69 |
70 | For MCP clients that support local STDIO servers, use the following command to run the MCP server without needing to install copychat first:
71 |
72 | ```bash
73 | uvx copychat mcp
74 | ```
75 |
76 | ## Usage Guide
77 |
78 | Copychat is designed to be intuitive while offering powerful options for more complex needs. Let's walk through common use cases:
79 |
80 | ### Basic Directory Scanning
81 |
82 | At its simplest, run `copychat` in any directory to scan and format all recognized source files:
83 |
84 | ```bash
85 | copychat
86 | ```
87 |
88 | This will scan the current directory, format all supported files, and copy the result to your clipboard. The output includes metadata like character and token counts to help you stay within LLM context limits.
89 |
90 | ### Targeting Specific Files
91 |
92 | You can specify exactly what you want to include:
93 |
94 | ```bash
95 | # Single file
96 | copychat src/main.py
97 |
98 | # Multiple specific files and directories
99 | copychat src/api.py tests/test_api.py docs/
100 |
101 | # Glob patterns
102 | copychat src/*.py tests/**/*.md
103 | ```
104 |
105 | ### Filtering by Language
106 |
107 | When you only want specific file types, use the `--include` flag with comma-separated extensions:
108 |
109 | ```bash
110 | # Just Python files
111 | copychat --include py
112 |
113 | # Python and JavaScript
114 | copychat --include py,js,jsx
115 | ```
116 |
117 | ### Working with Git
118 |
119 | Copychat shines when working with git repositories. Use different diff modes to focus on what matters:
120 |
121 | ```bash
122 | # Show only files that have changed, with their diffs
123 | copychat --diff-mode changed-with-diff
124 |
125 | # Show all files, but include diffs for changed ones
126 | copychat --diff-mode full-with-diff
127 |
128 | # Show only the git diff chunks themselves
129 | copychat --diff-mode diff-only
130 |
131 | # See what changed since branching from develop
132 | copychat --diff-mode diff-only --diff-branch develop
133 | ```
134 |
135 | The `-diff-mode` and `--diff-branch` options are particularly useful when you want to:
136 |
137 | - Review any changes you've made, either in isolation or in context
138 | - Compare changes against a specific branch
139 |
140 | ### Excluding Files
141 |
142 | You can exclude files that match certain patterns:
143 |
144 | ```bash
145 | # Skip test files
146 | copychat --exclude "**/*.test.js,**/*.spec.py"
147 |
148 | # Skip specific directories
149 | copychat --exclude "build/*,dist/*"
150 | ```
151 |
152 | Copychat automatically respects your `.gitignore` file and common ignore patterns (like `node_modules`).
153 |
154 | ### GitHub Integration
155 |
156 | #### Reading GitHub Repositories
157 |
158 | Pull directly from GitHub repositories:
159 |
160 | ```bash
161 | # Using the github: prefix
162 | copychat --source github:username/repo
163 |
164 | # Or just paste a GitHub URL
165 | copychat --source https://github.com/username/repo
166 |
167 | # Process specific paths within the repository
168 | copychat --source github:username/repo src/main.py tests/
169 | ```
170 |
171 | The `--source` flag specifies where to look (GitHub, filesystem, etc.), and then any additional arguments specify which paths within that source to process. This means you can target specific files or directories within a GitHub repository just like you would with local files.
172 |
173 | #### Reading GitHub Issues, PRs & Discussions
174 |
175 | Copy the full text and comment history of a GitHub issue, pull request, or discussion by
176 | passing the identifier directly to the main command:
177 |
178 | ```bash
179 | # Issues and PRs
180 | copychat owner/repo#123
181 | copychat https://github.com/owner/repo/issues/123
182 | copychat https://github.com/owner/repo/pull/456
183 |
184 | # Discussions
185 | copychat https://github.com/owner/repo/discussions/789
186 | ```
187 |
188 | For pull requests, the diff is included by default, giving you complete context of the proposed changes.
189 |
190 | Set `GITHUB_TOKEN` or use `--token` if you need to access private content or want higher rate limits.
191 |
192 | #### Reading Individual GitHub Files
193 |
194 | You can fetch individual files directly from GitHub without cloning the entire repository by using blob URLs:
195 |
196 | ```bash
197 | # Fetch a specific file from a commit/branch/tag
198 | copychat https://github.com/owner/repo/blob/main/src/api.py
199 | copychat https://github.com/owner/repo/blob/v1.2.3/config/settings.yaml
200 | copychat https://github.com/owner/repo/blob/abc123def/docs/README.md
201 | ```
202 |
203 | This is perfect for quickly grabbing specific files for context without the overhead of repository cloning.
204 |
205 | The output is formatted like other files, with XML-style tags and proper language detection.
206 |
207 | ### Output Options
208 |
209 | By default, Copychat copies to your clipboard, but you have other options:
210 |
211 | ```bash
212 | # Append to clipboard
213 | copychat --append
214 |
215 | # Write to a file
216 | copychat --out context.md
217 |
218 | # Append to existing file
219 | copychat --out context.md --append
220 |
221 | # Print to screen
222 | copychat --print
223 |
224 | # Both copy to clipboard and save to file
225 | copychat --out context.md
226 | ```
227 |
228 | ### Verbose Output
229 |
230 | Use the `--verbose` flag (or `-v`) to include detailed file information in the output, including token counts:
231 |
232 | ```bash
233 | copychat -v
234 | ```
235 |
236 | ### Limiting Directory Depth
237 |
238 | Control how deep copychat scans subdirectories:
239 |
240 | ```bash
241 | # Only files in current directory
242 | copychat --depth 0
243 |
244 | # Current directory and immediate subdirectories only
245 | copychat --depth 1
246 |
247 | # Scan up to 3 levels deep
248 | copychat --depth 3
249 | ```
250 |
251 | ## Options
252 |
253 | ```bash
254 | copychat [OPTIONS] [PATHS]...
255 |
256 | Options:
257 | -s, --source TEXT Source to scan (filesystem path, github:owner/repo, or URL)
258 | -o, --out PATH Write output to file
259 | -a, --append Append output instead of overwriting
260 | -p, --print Print output to screen
261 | -v, --verbose Show detailed file information in output
262 | -i, --include TEXT Extensions to include (comma-separated, e.g. 'py,js,ts')
263 | -x, --exclude TEXT Glob patterns to exclude
264 | -d, --depth INTEGER Maximum directory depth to scan (0 = current dir only)
265 | --diff-mode TEXT How to handle git diffs
266 | --diff-branch TEXT Compare changes against specified branch
267 | --debug Debug mode for development
268 | --help Show this message and exit
269 | ```
270 |
271 | ## Supported File Types
272 |
273 | Copychat automatically recognizes and properly formats many common file types, including:
274 |
275 | - Python (`.py`, `.pyi`)
276 | - JavaScript/TypeScript (`.js`, `.ts`, `.jsx`, `.tsx`)
277 | - Web (`.html`, `.css`, `.scss`)
278 | - Systems (`.c`, `.cpp`, `.rs`, `.go`)
279 | - Config (`.yaml`, `.toml`, `.json`)
280 | - Documentation (`.md`, `.rst`, `.txt`)
281 | - And [many more](https://github.com/username/copychat/blob/main/copychat/patterns.py)
282 |
283 | ## Output Format
284 |
285 | Copychat generates clean, structured output with:
286 |
287 | - File paths and language tags
288 | - Token count estimates
289 | - Git diff information (when requested)
290 | - Proper syntax highlighting markers
291 |
292 | ## Using `.ccignore` Files
293 |
294 | CopyChat supports hierarchical ignore patterns through `.ccignore` files. These files work similarly to `.gitignore` files but with an important difference: they apply to all directories and subdirectories where they're located.
295 |
296 | ### Key Features
297 |
298 | - `.ccignore` files use the same syntax as `.gitignore` files
299 | - Each `.ccignore` file applies to its directory and all subdirectories
300 | - Patterns from multiple `.ccignore` files are inherited, with more specific directories taking precedence
301 |
302 | ### Example
303 |
304 | ```
305 | project/
306 | ├── .ccignore # Contains "*.log" - excludes log files in all directories
307 | ├── src/
308 | │ ├── .ccignore # Contains "*.tmp" - excludes tmp files in src/ and below
309 | │ └── ...
310 | └── tests/
311 | ├── .ccignore # Contains "*.fixture" - excludes fixture files in tests/ and below
312 | └── ...
313 | ```
314 |
315 | In this example:
316 |
317 | - `*.log` files are excluded everywhere
318 | - `*.tmp` files are only excluded in `src/` and its subdirectories
319 | - `*.fixture` files are only excluded in `tests/` and its subdirectories
320 |
321 | ### Creating a `.ccignore` File
322 |
323 | Create a `.ccignore` file in your project root or any subdirectory:
324 |
325 | ```
326 | # Comment lines start with #
327 | # Blank lines are ignored
328 |
329 | # Ignore all files with .log extension
330 | *.log
331 |
332 | # Ignore specific files
333 | secrets.json
334 | credentials.yaml
335 |
336 | # Ignore directories
337 | node_modules/
338 | __pycache__/
339 | ```
340 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 | from copychat.cli import app
3 | import pyperclip
4 | import re
5 | from pathlib import Path
6 |
7 | runner = CliRunner()
8 |
9 |
10 | def strip_ansi(text: str) -> str:
11 | """Remove ANSI escape codes from text."""
12 | ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
13 | return ansi_escape.sub("", text)
14 |
15 |
16 | def test_cli_default_behavior(tmp_path, monkeypatch):
17 | """Test that default behavior copies to clipboard."""
18 | # Create a test file
19 | test_file = tmp_path / "test.py"
20 | test_file.write_text("print('hello')")
21 |
22 | # Mock pyperclip.copy
23 | copied_content = []
24 |
25 | def mock_copy(text):
26 | copied_content.append(text)
27 |
28 | monkeypatch.setattr(pyperclip, "copy", mock_copy)
29 |
30 | # Run CLI
31 | result = runner.invoke(app, [str(tmp_path)])
32 |
33 | assert result.exit_code == 0
34 | assert len(copied_content) == 1
35 | assert 'language="python"' in copied_content[0]
36 | assert "print('hello')" in copied_content[0]
37 |
38 |
39 | def test_cli_output_file(tmp_path, monkeypatch):
40 | """Test writing output to file."""
41 | # Create a test file
42 | test_file = tmp_path / "test.py"
43 | test_file.write_text("print('hello')")
44 |
45 | # Create output file path
46 | out_file = tmp_path / "output.md"
47 |
48 | # Mock pyperclip.copy
49 | monkeypatch.setattr(pyperclip, "copy", lambda x: None)
50 |
51 | # Run CLI
52 | result = runner.invoke(app, [str(tmp_path), "--out", str(out_file)])
53 |
54 | assert result.exit_code == 0
55 | assert out_file.exists()
56 | content = out_file.read_text()
57 | assert 'language="python"' in content
58 | assert "print('hello')" in content
59 |
60 |
61 | def test_cli_print_output(tmp_path, monkeypatch):
62 | """Test printing output to screen."""
63 | # Create a test file
64 | test_file = tmp_path / "test.py"
65 | test_file.write_text("print('hello')")
66 |
67 | # Mock pyperclip.copy
68 | monkeypatch.setattr(pyperclip, "copy", lambda x: None)
69 |
70 | # Run CLI
71 | result = runner.invoke(app, [str(tmp_path), "--print"])
72 |
73 | assert result.exit_code == 0
74 | assert 'language="python"' in result.stdout
75 | assert "print('hello')" in result.stdout
76 |
77 |
78 | def test_cli_no_files_found(tmp_path):
79 | """Test behavior when no matching files are found."""
80 | # Create a non-matching file
81 | test_file = tmp_path / "test.txt"
82 | test_file.write_text("hello")
83 |
84 | # Run CLI with filter for .py files only
85 | result = runner.invoke(app, [str(tmp_path), "--include", "py"])
86 |
87 | # Since this is expected behavior, CLI should exit with code 0 rather than 1
88 | assert result.exit_code == 0
89 | assert "Found 0 matching files" in strip_ansi(result.stderr)
90 |
91 |
92 | def test_cli_multiple_outputs(tmp_path, monkeypatch):
93 | """Test combining output options."""
94 | # Create a test file
95 | test_file = tmp_path / "test.py"
96 | test_file.write_text("print('hello')")
97 |
98 | # Create output file path
99 | out_file = tmp_path / "output.md"
100 |
101 | # Mock pyperclip.copy and paste
102 | copied_content = []
103 |
104 | def mock_copy(text):
105 | copied_content.append(text)
106 |
107 | # Since we're using output file, clipboard copy won't happen
108 | # Instead just check the file output and stdout
109 | monkeypatch.setattr(pyperclip, "copy", mock_copy)
110 |
111 | # Run CLI with both file output and print
112 | result = runner.invoke(app, [str(tmp_path), "--out", str(out_file), "--print"])
113 |
114 | assert result.exit_code == 0
115 |
116 | # Check file
117 | assert out_file.exists()
118 | file_content = out_file.read_text()
119 | assert 'language="python"' in file_content
120 |
121 | # Check stdout
122 | assert 'language="python"' in result.stdout
123 |
124 |
125 | def test_cli_append_file(tmp_path, monkeypatch):
126 | """Test appending output to an existing file."""
127 | # Create a test file to scan
128 | test_file = tmp_path / "test.py"
129 | test_file.write_text("print('hello')")
130 |
131 | # Create existing output file with content
132 | out_file = tmp_path / "output.md"
133 | out_file.write_text("existing content\n")
134 |
135 | # Mock pyperclip.copy
136 | monkeypatch.setattr(pyperclip, "copy", lambda x: None)
137 |
138 | # Run CLI with append flag
139 | result = runner.invoke(app, [str(tmp_path), "--out", str(out_file), "--append"])
140 |
141 | assert result.exit_code == 0
142 | content = out_file.read_text()
143 | assert "existing content" in content
144 | assert 'language="python"' in content
145 | assert "print('hello')" in content
146 |
147 |
148 | def test_cli_append_clipboard(tmp_path, monkeypatch):
149 | """Test appending output to clipboard content."""
150 | # Create a test file
151 | test_file = tmp_path / "test.py"
152 | test_file.write_text("print('new content')")
153 |
154 | # Mock clipboard content and operations
155 | clipboard_content = ["existing clipboard content"]
156 |
157 | def mock_copy(text):
158 | clipboard_content[0] = text
159 |
160 | def mock_paste():
161 | return clipboard_content[0]
162 |
163 | monkeypatch.setattr(pyperclip, "copy", mock_copy)
164 | monkeypatch.setattr(pyperclip, "paste", mock_paste)
165 |
166 | # Run CLI with append flag
167 | result = runner.invoke(app, [str(tmp_path), "--append"])
168 |
169 | assert result.exit_code == 0
170 | assert "existing clipboard content" in clipboard_content[0]
171 | assert 'language="python"' in clipboard_content[0]
172 | assert "print('new content')" in clipboard_content[0]
173 |
174 |
175 | def test_cli_exclude_pattern(tmp_path, monkeypatch):
176 | """Test excluding files with patterns."""
177 | # Create test files
178 | py_file = tmp_path / "code.py"
179 | py_file.write_text("print('include me')")
180 |
181 | js_file = tmp_path / "script.js"
182 | js_file.write_text("console.log('exclude me')")
183 |
184 | # Mock pyperclip.copy
185 | copied_content = []
186 |
187 | def mock_copy(text):
188 | copied_content.append(text)
189 |
190 | monkeypatch.setattr(pyperclip, "copy", mock_copy)
191 |
192 | # Run CLI with exclude pattern for JS files
193 | result = runner.invoke(app, [str(tmp_path), "--exclude", "*.js"])
194 |
195 | assert result.exit_code == 0
196 | assert len(copied_content) == 1
197 | assert "print('include me')" in copied_content[0]
198 | assert "console.log('exclude me')" not in copied_content[0]
199 |
200 |
201 | def test_cli_directory_depth(tmp_path, monkeypatch):
202 | """Test limiting directory scan depth."""
203 | # Create nested directory structure
204 | level1 = tmp_path / "level1"
205 | level1.mkdir()
206 | level1_file = level1 / "level1.py"
207 | level1_file.write_text("print('level1')")
208 |
209 | level2 = level1 / "level2"
210 | level2.mkdir()
211 | level2_file = level2 / "level2.py"
212 | level2_file.write_text("print('level2')")
213 |
214 | # Mock pyperclip.copy
215 | copied_content = []
216 |
217 | def mock_copy(text):
218 | copied_content.append(text)
219 |
220 | monkeypatch.setattr(pyperclip, "copy", mock_copy)
221 |
222 | # Run CLI with depth=1 (should only include level1 directory)
223 | result = runner.invoke(app, [str(tmp_path), "--depth", "1"])
224 |
225 | assert result.exit_code == 0
226 | assert len(copied_content) == 1
227 | assert "print('level1')" in copied_content[0]
228 | assert "print('level2')" not in copied_content[0]
229 |
230 |
231 | def test_cli_verbose_output(tmp_path, monkeypatch):
232 | """Test verbose output includes file metadata."""
233 | # Create a test file
234 | test_file = tmp_path / "test.py"
235 | test_file.write_text("print('hello')")
236 |
237 | # Mock pyperclip.copy
238 | copied_content = []
239 |
240 | def mock_copy(text):
241 | copied_content.append(text)
242 |
243 | monkeypatch.setattr(pyperclip, "copy", mock_copy)
244 |
245 | # Run CLI with verbose flag
246 | result = runner.invoke(app, [str(tmp_path), "--verbose"])
247 |
248 | assert result.exit_code == 0
249 | assert len(copied_content) == 1
250 |
251 | # Verbose output should include file metadata header with summary
252 | # header_content = copied_content[0].split("```")[0]
253 | assert "File summary" in strip_ansi(result.stderr)
254 | assert (
255 | "Files: 1" in strip_ansi(result.stderr)
256 | or "1 file" in strip_ansi(result.stderr).lower()
257 | )
258 |
259 |
260 | def test_cli_github_item_basic(monkeypatch):
261 | """Basic test for GitHub item handling that doesn't rely on internal implementation."""
262 | runner = CliRunner()
263 |
264 | # Instead of mocking complex internals, just provide a simple mock for the scan_directory function
265 | # so it returns a known result when the CLI processes a GitHub item
266 | def mock_scan_empty(directory, **kwargs):
267 | """Return empty dict to ensure our mock item is the only one processed."""
268 | return {}
269 |
270 | # Mock clipboard operations
271 | copied = []
272 | monkeypatch.setattr(pyperclip, "copy", lambda x: copied.append(x))
273 |
274 | # Replace scan_directory with our mock to avoid file system dependencies
275 | monkeypatch.setattr("copychat.cli.scan_directory", mock_scan_empty)
276 |
277 | # Run the CLI with a mocked item
278 | # The exact format doesn't matter as we're not testing the GitHub API integration
279 | result = runner.invoke(app, ["owner/repo#123"], catch_exceptions=False)
280 |
281 | # We expect either:
282 | # 1. Success (exit_code=0) if the mock returns results, or
283 | # 2. "Found 0 matching files" message (exit_code=0) if mocking couldn't succeed
284 | # Either way, we've tested that the CLI can handle the GitHub item format
285 | assert result.exit_code == 0 or "No module named 'requests'" in result.stderr
286 |
287 | # If we failed to fetch anything due to missing requests library
288 | # at least make sure we attempted to parse the GitHub item format
289 | if result.exit_code != 0:
290 | assert "owner/repo#123" in result.stderr or "GitHub" in result.stderr
291 |
292 |
293 | def test_table_alignment_with_dot_path(tmp_path, monkeypatch):
294 | """Test table alignment when path resolves to '.'"""
295 | # Create a test file
296 | test_file = tmp_path / "test.md"
297 | test_file.write_text("# Test content")
298 |
299 | # Mock relative_to so it returns "." path
300 | original_relative_to = Path.relative_to
301 |
302 | def mock_relative_to(self, other):
303 | # Always return a path that is just "."
304 | if str(self) == str(test_file):
305 | return Path(".")
306 | return original_relative_to(self, other)
307 |
308 | monkeypatch.setattr(Path, "relative_to", mock_relative_to)
309 |
310 | # Mock pyperclip.copy
311 | copied_content = []
312 |
313 | def mock_copy(text):
314 | copied_content.append(text)
315 |
316 | monkeypatch.setattr(pyperclip, "copy", mock_copy)
317 |
318 | # Run CLI with verbose flag
319 | result = runner.invoke(app, [str(test_file), "--verbose"])
320 |
321 | assert result.exit_code == 0
322 |
323 | # Ensure table is properly aligned in the output
324 | table_output = strip_ansi(result.stderr)
325 |
326 | # The "Path" header and first column content should be aligned
327 | path_header_idx = table_output.find("│ Path")
328 | assert path_header_idx > 0, "Path header not found in table"
329 |
330 | # Extract the table rows by looking for lines with │ characters
331 | table_lines = [line for line in table_output.split("\n") if "│" in line]
332 |
333 | # Verify there are at least a header row and a data row
334 | assert len(table_lines) >= 2, "Table should have header and data rows"
335 |
336 | # Check that columns align vertically - the first │ should be at the same position in each row
337 | positions = [line.find("│") for line in table_lines]
338 | assert len(set(positions)) == 1, "Misaligned table columns (first pipe)"
339 |
340 | # Check that second │ (after Path column) aligns in all rows
341 | positions = [line.find("│", positions[0] + 1) for line in table_lines]
342 | assert len(set(positions)) == 1, "Misaligned table columns (second pipe)"
343 |
344 | # Confirm test.md appears in the table with proper alignment
345 | assert "test.md" in table_output, "Filename should appear in table output"
346 |
--------------------------------------------------------------------------------
/src/copychat/format.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Optional
3 | from os.path import commonpath
4 | from datetime import datetime, timezone
5 | import tiktoken
6 | from dataclasses import dataclass
7 |
8 |
9 | @dataclass
10 | class FileStats:
11 | """Statistics for a single file."""
12 |
13 | chars: int
14 | tokens: int
15 |
16 |
17 | @dataclass
18 | class FormattedFile:
19 | """A formatted file with its stats."""
20 |
21 | path: Path
22 | content: str
23 | stats: FileStats
24 | formatted_content: str
25 |
26 |
27 | @dataclass
28 | class FormatResult:
29 | """Result of formatting one or more files."""
30 |
31 | files: list[FormattedFile]
32 | root_path: Path
33 | timestamp: datetime
34 | formatted_content: str
35 | total_chars: int = 0
36 | total_tokens: int = 0
37 | has_header: bool = True
38 |
39 | def __str__(self) -> str:
40 | """Return the formatted content."""
41 | return self.formatted_content
42 |
43 |
44 | def format_file(
45 | file_path: Path, root_path: Path, content: Optional[str] = None
46 | ) -> FormattedFile:
47 | """Format a single file as XML-style markdown and return structured result."""
48 | try:
49 | # Use provided content or read from file
50 | if content is None:
51 | content = file_path.read_text()
52 |
53 | # Calculate stats
54 | stats = FileStats(chars=len(content), tokens=estimate_tokens(content))
55 |
56 | # Use string paths for comparison to handle symlinks and different path formats
57 | file_str = str(file_path.resolve())
58 | root_str = str(root_path.resolve())
59 |
60 | # Remove the root path and any leading slashes
61 | if file_str.startswith(root_str):
62 | rel_path = file_str[len(root_str) :].lstrip("/\\")
63 | else:
64 | rel_path = file_str # Fallback to full path if not a subpath
65 |
66 | language = guess_language(file_path)
67 |
68 | # Build the XML tag with attributes
69 | tag_attrs = [f'path="{rel_path}"']
70 | if language:
71 | tag_attrs.append(f'language="{language}"')
72 |
73 | attrs_str = " ".join(tag_attrs)
74 |
75 | formatted_content = f"""
76 | {content}
77 | """
78 |
79 | return FormattedFile(
80 | path=file_path,
81 | content=content,
82 | stats=stats,
83 | formatted_content=formatted_content,
84 | )
85 |
86 | except Exception as e:
87 | # Return empty stats for failed files
88 | return FormattedFile(
89 | path=file_path,
90 | content=f"",
91 | stats=FileStats(chars=0, tokens=0),
92 | formatted_content=f"",
93 | )
94 |
95 |
96 | def create_header(result: FormatResult) -> str:
97 | """Create a header with metadata about the export."""
98 | timestamp = result.timestamp.strftime("%Y-%m-%d %H:%M:%S UTC")
99 |
100 | # Create a table-like format for files
101 | rel_paths = []
102 | for f in result.files:
103 | try:
104 | rel_path = str(f.path.relative_to(result.root_path))
105 | # Make sure path is not empty or just "."
106 | if not rel_path or rel_path == ".":
107 | # For GitHub items, use a more descriptive name
108 | if (
109 | isinstance(f.path, Path)
110 | and f.path.name
111 | and (
112 | "_pr_" in f.path.name
113 | or "_issue_" in f.path.name
114 | or "_discussion_" in f.path.name
115 | )
116 | ) or (
117 | # Also check for GitHub blob files (they have repo_ref_filepath pattern)
118 | isinstance(f.path, Path)
119 | and f.path.name
120 | and "_" in f.path.name
121 | and len(f.path.name.split("_"))
122 | >= 3 # repo_ref_filepath has at least 3 parts
123 | ):
124 | # This appears to be a GitHub item, use a more descriptive name
125 | rel_path = f.path.name
126 | else:
127 | rel_path = f.path.name or str(f.path)
128 | except ValueError:
129 | rel_path = str(f.path) # Fallback to full path if not a subpath
130 | rel_paths.append(rel_path)
131 |
132 | # Use the minimum of the longest path or 50 chars
133 | max_path_len = (
134 | max(len(path) for path in rel_paths) if rel_paths else 4
135 | ) # Min "Path" header width
136 | max_path_len = max(max_path_len, 4) # Ensure min width for "Path" header
137 | max_path_len = min(max_path_len, 50) # Cap path length for readability
138 |
139 | # Calculate line counts
140 | file_lines = {f.path: f.content.count("\n") + 1 for f in result.files}
141 | total_lines = sum(file_lines.values())
142 |
143 | header = [
144 | "",
199 | "",
200 | ]
201 | )
202 |
203 | return "\n".join(header)
204 |
205 |
206 | def create_display_header(result: FormatResult) -> str:
207 | """Create a display-friendly header without XML comments."""
208 | timestamp = result.timestamp.strftime("%Y-%m-%d %H:%M:%S UTC")
209 |
210 | # Create a table-like format for files
211 | rel_paths = []
212 | for f in result.files:
213 | try:
214 | rel_path = str(f.path.relative_to(result.root_path))
215 | # Make sure path is not empty or just "."
216 | if not rel_path or rel_path == ".":
217 | # For GitHub items, use a more descriptive name
218 | if (
219 | isinstance(f.path, Path)
220 | and f.path.name
221 | and (
222 | "_pr_" in f.path.name
223 | or "_issue_" in f.path.name
224 | or "_discussion_" in f.path.name
225 | )
226 | ) or (
227 | # Also check for GitHub blob files (they have repo_ref_filepath pattern)
228 | isinstance(f.path, Path)
229 | and f.path.name
230 | and "_" in f.path.name
231 | and len(f.path.name.split("_"))
232 | >= 3 # repo_ref_filepath has at least 3 parts
233 | ):
234 | # This appears to be a GitHub item, use a more descriptive name
235 | rel_path = f.path.name
236 | else:
237 | rel_path = f.path.name or str(f.path)
238 | except ValueError:
239 | rel_path = str(f.path) # Fallback to full path if not a subpath
240 | rel_paths.append(rel_path)
241 |
242 | # Use the minimum of the longest path or 50 chars
243 | max_path_len = (
244 | max(len(path) for path in rel_paths) if rel_paths else 4
245 | ) # Min "Path" header width
246 | max_path_len = max(max_path_len, 4) # Ensure min width for "Path" header
247 | max_path_len = min(max_path_len, 50) # Cap path length for readability
248 |
249 | # Calculate line counts
250 | file_lines = {f.path: f.content.count("\n") + 1 for f in result.files}
251 | total_lines = sum(file_lines.values())
252 |
253 | header = [
254 | f"Generated by copychat on {timestamp}",
255 | f"Root path: {result.root_path}",
256 | f"Summary: {len(result.files)} files, ~{result.total_tokens:,} tokens, {total_lines:,} lines",
257 | "",
258 | "┌" + "─" * (max_path_len + 2) + "┬" + "─" * 12 + "┬" + "─" * 10 + "┐",
259 | f"│ {'Path':<{max_path_len}} │ {'Tokens':>10} │ {'Lines':>8} │",
260 | "├" + "─" * (max_path_len + 2) + "┼" + "─" * 12 + "┼" + "─" * 10 + "┤",
261 | ]
262 |
263 | # Format each file as a table row
264 | for f in sorted(result.files, key=lambda x: str(x.path)):
265 | # Calculate relative path for this specific file
266 | try:
267 | rel_path = str(f.path.relative_to(result.root_path))
268 | # Make sure path is not empty or just "."
269 | if not rel_path or rel_path == ".":
270 | # For GitHub items, use a more descriptive name
271 | if (
272 | isinstance(f.path, Path)
273 | and f.path.name
274 | and (
275 | "_pr_" in f.path.name
276 | or "_issue_" in f.path.name
277 | or "_discussion_" in f.path.name
278 | )
279 | ) or (
280 | # Also check for GitHub blob files (they have repo_ref_filepath pattern)
281 | isinstance(f.path, Path)
282 | and f.path.name
283 | and "_" in f.path.name
284 | and len(f.path.name.split("_"))
285 | >= 3 # repo_ref_filepath has at least 3 parts
286 | ):
287 | # This appears to be a GitHub item, use a more descriptive name
288 | rel_path = f.path.name
289 | else:
290 | rel_path = f.path.name or str(f.path)
291 | except ValueError:
292 | rel_path = str(f.path) # Fallback to full path if not a subpath
293 |
294 | if len(rel_path) > max_path_len:
295 | trunc_len = max_path_len - 3
296 | rel_path = "..." + rel_path[-trunc_len:]
297 |
298 | lines = file_lines[f.path]
299 | header.append(
300 | f"│ {rel_path:<{max_path_len}} │ {f.stats.tokens:>10,} │ {lines:>8,} │"
301 | )
302 |
303 | header.append(
304 | "└" + "─" * (max_path_len + 2) + "┴" + "─" * 12 + "┴" + "─" * 10 + "┘"
305 | )
306 |
307 | return "\n".join(header)
308 |
309 |
310 | def format_files(files: list[tuple[Path, str]]) -> FormatResult:
311 | """Format files into markdown with XML-style tags.
312 |
313 | Args:
314 | files: List of (path, content) tuples to format
315 |
316 | Returns:
317 | FormatResult containing all formatting information
318 | """
319 | if not files:
320 | return FormatResult(
321 | files=[],
322 | root_path=Path("."),
323 | timestamp=datetime.now(timezone.utc),
324 | formatted_content="\n",
325 | has_header=False,
326 | )
327 |
328 | # Find common root path using os.path.commonpath
329 | paths = [f[0] for f in files]
330 | str_paths = [str(f.absolute()) for f in paths]
331 | root_path = Path(commonpath(str_paths))
332 |
333 | # Format each file
334 | formatted_files = []
335 | total_chars = 0
336 | total_tokens = 0
337 |
338 | for file_path, content in files:
339 | formatted = format_file(file_path, root_path, content)
340 | formatted_files.append(formatted)
341 | total_chars += formatted.stats.chars
342 | total_tokens += formatted.stats.tokens
343 |
344 | result = FormatResult(
345 | files=formatted_files,
346 | root_path=root_path,
347 | timestamp=datetime.now(timezone.utc),
348 | total_chars=total_chars,
349 | total_tokens=total_tokens,
350 | formatted_content="", # Will be set after header
351 | )
352 |
353 | # Create header and combine all parts
354 | header = create_header(result)
355 | formatted_content = "\n".join(
356 | [header] + [f.formatted_content for f in formatted_files]
357 | )
358 |
359 | # Update the formatted content
360 | result.formatted_content = formatted_content
361 |
362 | return result
363 |
364 |
365 | # Keep existing helper functions unchanged
366 | def estimate_tokens(text: str) -> int:
367 | """Estimate the number of tokens in the text using GPT tokenizer."""
368 | try:
369 | # Using cl100k_base (used by GPT-4, Claude)
370 | encoding = tiktoken.get_encoding("cl100k_base")
371 | return len(encoding.encode(text))
372 | except Exception:
373 | # Fallback to rough estimate if tiktoken fails
374 | return len(text) // 4 # Rough estimate: ~4 chars per token
375 |
376 |
377 | def guess_language(file_path: Path) -> Optional[str]:
378 | """Guess the programming language based on file extension."""
379 | ext = file_path.suffix.lower()
380 |
381 | # Common language mappings
382 | language_map = {
383 | ".py": "python",
384 | ".js": "javascript",
385 | ".ts": "typescript",
386 | ".jsx": "jsx",
387 | ".tsx": "tsx",
388 | ".html": "html",
389 | ".css": "css",
390 | ".scss": "scss",
391 | ".rs": "rust",
392 | ".go": "go",
393 | ".java": "java",
394 | ".cpp": "cpp",
395 | ".c": "c",
396 | ".h": "c",
397 | ".hpp": "cpp",
398 | ".rb": "ruby",
399 | ".php": "php",
400 | ".sh": "bash",
401 | ".yaml": "yaml",
402 | ".yml": "yaml",
403 | ".json": "json",
404 | ".md": "markdown",
405 | ".sql": "sql",
406 | ".r": "r",
407 | ".swift": "swift",
408 | ".kt": "kotlin",
409 | ".kts": "kotlin",
410 | ".scala": "scala",
411 | ".pl": "perl",
412 | ".pm": "perl",
413 | }
414 |
415 | return language_map.get(ext)
416 |
--------------------------------------------------------------------------------
/src/copychat/sources.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import shutil
3 | from typing import Optional
4 | import git
5 | from rich.console import Console
6 | import tempfile
7 |
8 | error_console = Console(stderr=True)
9 |
10 | # Shared temporary directory for GitHub items
11 | _github_temp_dir = None
12 |
13 |
14 | def get_github_temp_dir() -> Path:
15 | """Get a temporary directory for GitHub items that persists for the process."""
16 | global _github_temp_dir
17 | if _github_temp_dir is None:
18 | _github_temp_dir = Path(tempfile.mkdtemp(prefix="copychat_github_"))
19 | return _github_temp_dir
20 |
21 |
22 | class GitHubSource:
23 | """Handle GitHub repositories as sources."""
24 |
25 | def __init__(self, repo_path: str, cache_dir: Optional[Path] = None):
26 | """Initialize GitHub source."""
27 | self.repo_path = repo_path.strip("/")
28 | self.cache_dir = cache_dir or Path.home() / ".cache" / "copychat" / "github"
29 | self.cache_dir.mkdir(parents=True, exist_ok=True)
30 |
31 | @property
32 | def clone_url(self) -> str:
33 | """Get HTTPS clone URL for repository."""
34 | return f"https://github.com/{self.repo_path}.git"
35 |
36 | @property
37 | def repo_dir(self) -> Path:
38 | """Get path to cached repository."""
39 | return self.cache_dir / self.repo_path.replace("/", "_")
40 |
41 | def fetch(self) -> Path:
42 | """Fetch repository and return path to files."""
43 | try:
44 | if self.repo_dir.exists():
45 | # Update existing repo
46 | repo = git.Repo(self.repo_dir)
47 | repo.remotes.origin.fetch()
48 | repo.remotes.origin.pull()
49 | else:
50 | # Clone new repo
51 | git.Repo.clone_from(self.clone_url, self.repo_dir, depth=1)
52 |
53 | return self.repo_dir
54 |
55 | except git.GitCommandError as e:
56 | error_console.print(f"[red]Error accessing repository:[/] {str(e)}")
57 | raise
58 |
59 | def cleanup(self) -> None:
60 | """Remove cached repository."""
61 | if self.repo_dir.exists():
62 | shutil.rmtree(self.repo_dir)
63 |
64 |
65 | class GitHubItem:
66 | """Fetch a GitHub issue, pull request, or discussion with comments."""
67 |
68 | def __init__(
69 | self,
70 | repo_path: str,
71 | number: int,
72 | token: Optional[str] = None,
73 | item_type: str = "issue",
74 | ):
75 | self.repo_path = repo_path.strip("/")
76 | self.number = number
77 | self.token = token
78 | self.item_type = item_type # 'issue', 'pull', or 'discussion'
79 | self.api_base = "https://api.github.com"
80 |
81 | def _headers(self) -> dict[str, str]:
82 | headers = {"Accept": "application/vnd.github+json"}
83 | if self.token:
84 | headers["Authorization"] = f"Bearer {self.token}"
85 | return headers
86 |
87 | def _graphql_headers(self) -> dict[str, str]:
88 | headers = {"Content-Type": "application/json"}
89 | if self.token:
90 | headers["Authorization"] = f"Bearer {self.token}"
91 | return headers
92 |
93 | def _fetch_discussion(self) -> tuple[dict, list]:
94 | """Fetch discussion data using GraphQL API."""
95 | import requests
96 |
97 | if not self.token:
98 | error_console.print(
99 | "[yellow]Warning: GitHub token recommended for discussions. Some rate limits may apply.[/]"
100 | )
101 |
102 | # GraphQL query to fetch discussion
103 | query = """
104 | query($owner: String!, $name: String!, $number: Int!) {
105 | repository(owner: $owner, name: $name) {
106 | discussion(number: $number) {
107 | title
108 | body
109 | url
110 | createdAt
111 | updatedAt
112 | author {
113 | login
114 | }
115 | category {
116 | name
117 | }
118 | comments(first: 100) {
119 | nodes {
120 | body
121 | createdAt
122 | author {
123 | login
124 | }
125 | replies(first: 50) {
126 | nodes {
127 | body
128 | createdAt
129 | author {
130 | login
131 | }
132 | }
133 | }
134 | }
135 | }
136 | }
137 | }
138 | }
139 | """
140 |
141 | owner, repo = self.repo_path.split("/")
142 | variables = {"owner": owner, "name": repo, "number": self.number}
143 |
144 | try:
145 | resp = requests.post(
146 | "https://api.github.com/graphql",
147 | headers=self._graphql_headers(),
148 | json={"query": query, "variables": variables},
149 | timeout=30,
150 | )
151 | resp.raise_for_status()
152 | data = resp.json()
153 |
154 | if "errors" in data:
155 | error_console.print(f"[red]GraphQL errors:[/] {data['errors']}")
156 | raise Exception(f"GraphQL errors: {data['errors']}")
157 |
158 | discussion = data["data"]["repository"]["discussion"]
159 | if not discussion:
160 | raise Exception(f"Discussion #{self.number} not found")
161 |
162 | # Flatten comments and replies
163 | comments = []
164 | for comment in discussion["comments"]["nodes"]:
165 | comments.append(comment)
166 | # Add replies as nested comments
167 | for reply in comment["replies"]["nodes"]:
168 | comments.append(reply)
169 |
170 | return discussion, comments
171 |
172 | except Exception as e:
173 | error_console.print(
174 | f"[yellow]Warning: Failed to fetch discussion: {str(e)}[/]"
175 | )
176 | raise
177 |
178 | def _fetch_pr_diff(self) -> Optional[str]:
179 | """Fetch the PR diff from GitHub."""
180 | import requests
181 |
182 | if not self.token:
183 | error_console.print(
184 | "[yellow]Warning: GitHub token not provided. Some rate limits may apply.[/]"
185 | )
186 |
187 | # Get the diff using the GitHub API
188 | diff_url = f"{self.api_base}/repos/{self.repo_path}/pulls/{self.number}"
189 | headers = self._headers()
190 | headers["Accept"] = "application/vnd.github.diff"
191 | try:
192 | diff_resp = requests.get(diff_url, headers=headers, timeout=30)
193 | diff_resp.raise_for_status()
194 | return diff_resp.text
195 | except Exception as e:
196 | error_console.print(
197 | f"[yellow]Warning: Failed to fetch PR diff: {str(e)}[/]"
198 | )
199 | return None
200 |
201 | def fetch(self) -> tuple[Path, str]:
202 | """Return (path, content) for the issue, PR, or discussion."""
203 | if self.item_type == "discussion":
204 | return self._fetch_discussion_content()
205 | else:
206 | return self._fetch_issue_or_pr_content()
207 |
208 | def _fetch_discussion_content(self) -> tuple[Path, str]:
209 | """Fetch and format discussion content."""
210 | discussion, comments = self._fetch_discussion()
211 |
212 | lines = [f"# {discussion.get('title', '')} (#{self.number})", ""]
213 |
214 | # Add metadata section
215 | html_url = discussion.get(
216 | "url", f"https://github.com/{self.repo_path}/discussions/{self.number}"
217 | )
218 | user = discussion.get("author", {}).get("login", "unknown")
219 | created_at = discussion.get("createdAt", "")
220 | updated_at = discussion.get("updatedAt", "")
221 | category = discussion.get("category", {}).get("name", "")
222 |
223 | lines.extend(
224 | [
225 | f"> **Discussion**: [{self.repo_path}#{self.number}]({html_url})",
226 | f"> **Category**: {category}",
227 | f"> **Author**: {user}",
228 | f"> **Created**: {created_at}",
229 | f"> **Updated**: {updated_at}",
230 | "",
231 | ]
232 | )
233 |
234 | body = discussion.get("body") or ""
235 | if body:
236 | lines.append(body)
237 | lines.append("")
238 |
239 | # Add comments
240 | for comment in comments:
241 | user = comment.get("author", {}).get("login", "unknown")
242 | created = comment.get("createdAt", "")
243 | lines.append(f"## {user} - {created}")
244 | if comment.get("body"):
245 | lines.append(comment["body"])
246 | lines.append("")
247 |
248 | content = "\n".join(lines).strip() + "\n"
249 |
250 | # Use temporary directory
251 | filename = f"{self.repo_path.replace('/', '_')}_discussion_{self.number}.md"
252 | temp_dir = get_github_temp_dir()
253 | path = temp_dir / filename
254 |
255 | return path, content
256 |
257 | def _fetch_issue_or_pr_content(self) -> tuple[Path, str]:
258 | """Fetch and format issue or PR content."""
259 | import requests
260 |
261 | issue_url = f"{self.api_base}/repos/{self.repo_path}/issues/{self.number}"
262 | resp = requests.get(issue_url, headers=self._headers(), timeout=30)
263 | resp.raise_for_status()
264 | data = resp.json()
265 |
266 | comments_resp = requests.get(
267 | data.get("comments_url"), headers=self._headers(), timeout=30
268 | )
269 | comments_resp.raise_for_status()
270 | comments = comments_resp.json()
271 |
272 | review_comments = []
273 | is_pr = "pull_request" in data
274 | diff_content = None
275 |
276 | if is_pr:
277 | # Fetch review comments
278 | review_url = (
279 | f"{self.api_base}/repos/{self.repo_path}/pulls/{self.number}/comments"
280 | )
281 | rc = requests.get(review_url, headers=self._headers(), timeout=30)
282 | if rc.ok:
283 | review_comments = rc.json()
284 |
285 | # Get the PR diff
286 | diff_content = self._fetch_pr_diff()
287 |
288 | lines = [f"# {data.get('title', '')} (#{self.number})", ""]
289 | body = data.get("body") or ""
290 |
291 | # Add metadata section
292 | item_type = "Pull Request" if is_pr else "Issue"
293 | html_url = data.get(
294 | "html_url", f"https://github.com/{self.repo_path}/issues/{self.number}"
295 | )
296 | user = data.get("user", {}).get("login", "unknown")
297 | created_at = data.get("created_at", "")
298 | updated_at = data.get("updated_at", "")
299 | state = data.get("state", "").upper()
300 |
301 | # Create a metadata header
302 | lines.extend(
303 | [
304 | f"> **{item_type}**: [{self.repo_path}#{self.number}]({html_url})",
305 | f"> **Status**: {state}",
306 | f"> **Author**: {user}",
307 | f"> **Created**: {created_at}",
308 | f"> **Updated**: {updated_at}",
309 | "",
310 | ]
311 | )
312 |
313 | if body:
314 | lines.append(body)
315 | lines.append("")
316 |
317 | # Add PR diff if available
318 | if is_pr and diff_content:
319 | lines.extend(
320 | [
321 | "## PR Diff",
322 | "",
323 | "```diff",
324 | diff_content,
325 | "```",
326 | "",
327 | ]
328 | )
329 |
330 | for c in comments:
331 | user = c.get("user", {}).get("login", "unknown")
332 | created = c.get("created_at", "")
333 | lines.append(f"## {user} - {created}")
334 | if c.get("body"):
335 | lines.append(c["body"])
336 | lines.append("")
337 |
338 | for c in review_comments:
339 | user = c.get("user", {}).get("login", "unknown")
340 | created = c.get("created_at", "")
341 | path = c.get("path", "")
342 | lines.append(f"## Review by {user} on {path} - {created}")
343 | if c.get("body"):
344 | lines.append(c["body"])
345 | lines.append("")
346 |
347 | content = "\n".join(lines).strip() + "\n"
348 | item_type_filename = "pr" if is_pr else "issue"
349 |
350 | # Use temporary directory
351 | filename = (
352 | f"{self.repo_path.replace('/', '_')}_{item_type_filename}_{self.number}.md"
353 | )
354 | temp_dir = get_github_temp_dir()
355 | path = temp_dir / filename
356 |
357 | return path, content
358 |
359 |
360 | class GitHubFile:
361 | """Fetch a single file from GitHub via blob URL."""
362 |
363 | def __init__(self, blob_url: str, token: Optional[str] = None):
364 | self.blob_url = blob_url
365 | self.token = token
366 |
367 | # Parse the blob URL to extract repo, ref, and file path
368 | import re
369 |
370 | match = re.search(r"github\.com/([^/]+/[^/]+)/blob/([^/]+)/(.*)", blob_url)
371 | if not match:
372 | raise ValueError(f"Invalid GitHub blob URL: {blob_url}")
373 |
374 | self.repo_path = match.group(1)
375 | self.ref = match.group(2)
376 | self.file_path = match.group(3)
377 |
378 | def _headers(self) -> dict[str, str]:
379 | headers = {"Accept": "application/vnd.github+json"}
380 | if self.token:
381 | headers["Authorization"] = f"Bearer {self.token}"
382 | return headers
383 |
384 | def fetch(self) -> tuple[Path, str]:
385 | """Fetch the file content and return (path, content)."""
386 | import requests
387 |
388 | # Use the raw.githubusercontent.com URL for direct file access
389 | raw_url = f"https://raw.githubusercontent.com/{self.repo_path}/{self.ref}/{self.file_path}"
390 |
391 | try:
392 | resp = requests.get(raw_url, timeout=30)
393 | resp.raise_for_status()
394 | content = resp.text
395 | except Exception as e:
396 | error_console.print(
397 | f"[yellow]Warning: Failed to fetch from raw URL, trying API:[/] {str(e)}"
398 | )
399 |
400 | # Fallback to GitHub API
401 | api_url = f"https://api.github.com/repos/{self.repo_path}/contents/{self.file_path}"
402 | params = {"ref": self.ref}
403 |
404 | try:
405 | resp = requests.get(
406 | api_url, headers=self._headers(), params=params, timeout=30
407 | )
408 | resp.raise_for_status()
409 | data = resp.json()
410 |
411 | if data.get("type") != "file":
412 | raise Exception(
413 | f"URL points to a {data.get('type', 'unknown')}, not a file"
414 | )
415 |
416 | # Decode base64 content
417 | import base64
418 |
419 | content = base64.b64decode(data["content"]).decode("utf-8")
420 | except Exception as api_error:
421 | error_console.print(f"[red]Failed to fetch file:[/] {str(api_error)}")
422 | raise
423 |
424 | # Create a meaningful filename in temp directory
425 | filename = f"{self.repo_path.replace('/', '_')}_{self.ref}_{self.file_path.replace('/', '_')}"
426 | temp_dir = get_github_temp_dir()
427 | path = temp_dir / filename
428 |
429 | return path, content
430 |
--------------------------------------------------------------------------------
/src/copychat/core.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Optional
3 | import pathspec
4 | import subprocess
5 | from enum import Enum
6 | import os
7 |
8 | from .patterns import DEFAULT_EXTENSIONS, EXCLUDED_DIRS, EXCLUDED_PATTERNS
9 |
10 |
11 | class DiffMode(Enum):
12 | FULL = "full" # All files as-is
13 | FULL_WITH_DIFF = "full-with-diff" # All files with diff markers
14 | CHANGED_WITH_DIFF = "changed-with-diff" # Only changed files with diff markers
15 | DIFF_ONLY = "diff-only" # Only the diff chunks
16 |
17 |
18 | def is_glob_pattern(path: str) -> bool:
19 | """Check if a path contains glob patterns."""
20 | return "*" in path
21 |
22 |
23 | def resolve_paths(paths: list[str], base_path: Path = Path(".")) -> list[Path]:
24 | """Resolve a mix of glob patterns and regular paths."""
25 | resolved = []
26 | base_path = base_path.resolve()
27 |
28 | # Get gitignore and ccignore specs once for all paths
29 | git_spec = get_gitignore_spec(base_path)
30 | cc_spec = get_ccignore_spec(base_path)
31 |
32 | for path in paths:
33 | if is_glob_pattern(path):
34 | matches = list(base_path.glob(path))
35 | # Filter matches through gitignore and ccignore
36 | for match in matches:
37 | try:
38 | # Check if under base path
39 | rel_path = match.relative_to(base_path)
40 | # Skip if matches gitignore or ccignore patterns
41 | rel_path_str = str(rel_path)
42 | if git_spec.match_file(rel_path_str) or cc_spec.match_file(
43 | rel_path_str
44 | ):
45 | continue
46 | resolved.append(match)
47 | except ValueError:
48 | # If path is not relative to base_path, just use it as-is
49 | resolved.append(match)
50 | else:
51 | # For non-glob paths, use them as-is
52 | path_obj = Path(path)
53 | if path_obj.is_absolute():
54 | resolved.append(path_obj)
55 | else:
56 | resolved.append(base_path / path)
57 | return resolved
58 |
59 |
60 | def find_gitignore(start_path: Path) -> Optional[Path]:
61 | """Search for .gitignore file in current and parent directories."""
62 | current = start_path.absolute()
63 | while current != current.parent:
64 | gitignore = current / ".gitignore"
65 | if gitignore.is_file():
66 | return gitignore
67 | current = current.parent
68 | return None
69 |
70 |
71 | def find_ccignore_files(start_path: Path) -> list[tuple[Path, Path]]:
72 | """
73 | Find all .ccignore files that apply to the given path.
74 |
75 | Returns a list of tuples (ccignore_file, directory) where:
76 | - ccignore_file is the path to the .ccignore file
77 | - directory is the directory containing the .ccignore file
78 |
79 | The list is ordered from most specific (closest to start_path) to most general.
80 | """
81 | ccignore_files = []
82 | current = start_path.absolute()
83 |
84 | # Start from the given path and traverse up to the root
85 | while current != current.parent:
86 | ccignore = current / ".ccignore"
87 | if ccignore.is_file():
88 | ccignore_files.append((ccignore, current))
89 | current = current.parent
90 |
91 | return ccignore_files
92 |
93 |
94 | def get_gitignore_spec(
95 | path: Path, extra_patterns: Optional[list[str]] = None
96 | ) -> pathspec.PathSpec:
97 | """Load .gitignore patterns and combine with our default exclusions."""
98 | patterns = list(EXCLUDED_PATTERNS)
99 |
100 | # Add directory exclusions
101 | dir_patterns = [f"{d}/" for d in EXCLUDED_DIRS]
102 | patterns.extend(dir_patterns)
103 |
104 | # Add any extra patterns provided
105 | if extra_patterns:
106 | patterns.extend(extra_patterns)
107 |
108 | # Add patterns from .gitignore if found
109 | gitignore_path = find_gitignore(path)
110 | if gitignore_path:
111 | with open(gitignore_path) as f:
112 | gitignore_patterns = [
113 | line.strip() for line in f if line.strip() and not line.startswith("#")
114 | ]
115 | patterns.extend(gitignore_patterns)
116 |
117 | return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
118 |
119 |
120 | def get_ccignore_spec(
121 | path: Path, extra_patterns: Optional[list[str]] = None
122 | ) -> pathspec.PathSpec:
123 | """
124 | Load .ccignore patterns from all applicable directories.
125 |
126 | This function finds all .ccignore files that apply to the given path,
127 | from the most specific (closest to the path) to the most general (root).
128 | Patterns from more specific .ccignore files take precedence over more general ones.
129 | """
130 | patterns = []
131 |
132 | # Add any extra patterns provided
133 | if extra_patterns:
134 | patterns.extend(extra_patterns)
135 |
136 | # Get all applicable .ccignore files
137 | ccignore_files = find_ccignore_files(path)
138 |
139 | # Process files from most general to most specific
140 | # This way, more specific patterns override more general ones
141 | for ccignore_path, dir_path in reversed(ccignore_files):
142 | with open(ccignore_path) as f:
143 | ccignore_patterns = [
144 | line.strip() for line in f if line.strip() and not line.startswith("#")
145 | ]
146 | patterns.extend(ccignore_patterns)
147 |
148 | return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
149 |
150 |
151 | def get_git_diff(path: Path, compare_branch: Optional[str] = None) -> str:
152 | """Get git diff for the given path, optionally comparing against a specific branch."""
153 | try:
154 | # First check if file is tracked by git
155 | result = subprocess.run(
156 | ["git", "ls-files", "--error-unmatch", str(path)],
157 | capture_output=True,
158 | text=True,
159 | check=False, # Don't raise error for untracked files
160 | )
161 | if result.returncode != 0:
162 | return "" # File is not tracked by git
163 |
164 | # Get the diff, either against the index (default) or specified branch
165 | if compare_branch:
166 | # First get the merge base
167 | merge_base = subprocess.run(
168 | ["git", "merge-base", "HEAD", compare_branch],
169 | capture_output=True,
170 | text=True,
171 | check=True,
172 | ).stdout.strip()
173 |
174 | # Then do the diff against the merge base
175 | result = subprocess.run(
176 | ["git", "diff", merge_base, "--", str(path)],
177 | capture_output=True,
178 | text=True,
179 | check=False,
180 | )
181 | else:
182 | result = subprocess.run(
183 | ["git", "diff", "--", str(path)], # Removed --exit-code
184 | capture_output=True,
185 | text=True,
186 | check=False,
187 | )
188 | return result.stdout # Return output regardless of return code
189 |
190 | except subprocess.CalledProcessError:
191 | return ""
192 |
193 |
194 | def get_changed_files(compare_branch: Optional[str] = None) -> set[Path]:
195 | """Get set of files that have changes according to git."""
196 | try:
197 | # First get the git root directory
198 | git_root = subprocess.run(
199 | ["git", "rev-parse", "--show-toplevel"],
200 | capture_output=True,
201 | text=True,
202 | check=True,
203 | ).stdout.strip()
204 | git_root_path = Path(git_root)
205 |
206 | if compare_branch:
207 | # Get all changes between current branch and compare branch
208 | result = subprocess.run(
209 | [
210 | "git",
211 | "diff",
212 | "--name-status",
213 | f"{compare_branch}...HEAD", # Use triple dot to compare branches
214 | ],
215 | capture_output=True,
216 | text=True,
217 | check=True,
218 | )
219 | # Also get any unstaged/uncommitted changes
220 | unstaged_result = subprocess.run(
221 | ["git", "status", "--porcelain"],
222 | capture_output=True,
223 | text=True,
224 | check=True,
225 | )
226 | # Combine both results
227 | combined_output = result.stdout + unstaged_result.stdout
228 | else:
229 | # Get both staged and unstaged changes (current behavior)
230 | combined_output = subprocess.run(
231 | ["git", "status", "--porcelain"],
232 | capture_output=True,
233 | text=True,
234 | check=True,
235 | ).stdout
236 |
237 | changed = set()
238 | for line in combined_output.splitlines():
239 | if not line.strip():
240 | continue
241 |
242 | # Split on tab or space to handle both formats
243 | parts = line.split(None, 1) # Split on whitespace, max 1 split
244 | if len(parts) < 2:
245 | continue
246 |
247 | status, filepath = parts
248 |
249 | # Handle renamed files (they have arrow notation)
250 | if " -> " in filepath:
251 | filepath = filepath.split(" -> ")[-1]
252 |
253 | # Convert relative path to absolute using git root
254 | abs_path = (git_root_path / filepath).resolve()
255 | changed.add(abs_path)
256 |
257 | return changed
258 | except subprocess.CalledProcessError:
259 | return set()
260 |
261 |
262 | def get_file_content(
263 | path: Path,
264 | diff_mode: DiffMode,
265 | changed_files: Optional[set[Path]] = None,
266 | compare_branch: Optional[str] = None,
267 | ) -> Optional[str]:
268 | """Get file content based on diff mode."""
269 | if not path.is_file():
270 | return None
271 |
272 | # Get content
273 | content = path.read_text()
274 |
275 | # Return full content immediately if that's what we want
276 | if diff_mode == DiffMode.FULL:
277 | return content
278 |
279 | # Check if file has changes and get diff if needed
280 | if changed_files is not None:
281 | has_changes = path in changed_files
282 | # Get diff here so we can use it for all diff modes
283 | diff = get_git_diff(path, compare_branch) if has_changes else ""
284 | else:
285 | # Get diff first, then check if there are changes
286 | diff = get_git_diff(path, compare_branch)
287 | has_changes = bool(diff)
288 |
289 | # Handle different modes
290 | if diff_mode == DiffMode.DIFF_ONLY:
291 | return diff if has_changes else None
292 | elif diff_mode == DiffMode.CHANGED_WITH_DIFF:
293 | if not has_changes:
294 | return None
295 | return f"{content}\n\n# Git Diff:\n{diff}"
296 | elif diff_mode == DiffMode.FULL_WITH_DIFF:
297 | if not has_changes:
298 | return content
299 | return f"{content}\n\n# Git Diff:\n{diff}"
300 |
301 | return None
302 |
303 |
304 | def scan_directory(
305 | path: Path,
306 | include: Optional[list[str]] = None,
307 | exclude_patterns: Optional[list[str]] = None,
308 | diff_mode: DiffMode = DiffMode.FULL,
309 | max_depth: Optional[int] = None,
310 | compare_branch: Optional[str] = None,
311 | ) -> dict[Path, str]:
312 | """Scan directory for files to process."""
313 | # Get changed files upfront if we're using a diff mode
314 | changed_files = (
315 | get_changed_files(compare_branch) if diff_mode != DiffMode.FULL else None
316 | )
317 |
318 | # Convert string paths to Path objects and handle globs
319 | if isinstance(path, str):
320 | if is_glob_pattern(path):
321 | paths = resolve_paths([path])
322 | else:
323 | paths = [Path(path)]
324 | else:
325 | paths = [path]
326 |
327 | result = {}
328 |
329 | # Pre-compute extension set
330 | include_set = {f".{ext.lstrip('.')}" for ext in (include or DEFAULT_EXTENSIONS)}
331 |
332 | for current_path in paths:
333 | if current_path.is_file():
334 | # For single files, just check if it matches filters
335 | if include and current_path.suffix.lstrip(".") not in include:
336 | continue
337 | content = get_file_content(
338 | current_path, diff_mode, changed_files, compare_branch
339 | )
340 | if content is not None:
341 | result[current_path] = content
342 | continue
343 |
344 | # Convert to absolute path once
345 | abs_path = current_path.resolve()
346 | if not abs_path.exists():
347 | continue
348 |
349 | # Get gitignore spec once for the starting directory
350 | git_spec = get_gitignore_spec(abs_path, exclude_patterns)
351 |
352 | # Use os.walk for better performance than rglob
353 | for root, _, files in os.walk(abs_path):
354 | root_path = Path(root)
355 |
356 | # Check depth if max_depth is specified
357 | if max_depth is not None:
358 | try:
359 | # Calculate current depth relative to the starting path
360 | rel_path = root_path.relative_to(abs_path)
361 | current_depth = len(rel_path.parts)
362 | if current_depth > max_depth:
363 | continue
364 | except ValueError:
365 | continue
366 |
367 | # Get relative path once per directory
368 | try:
369 | rel_root = str(root_path.relative_to(abs_path))
370 | if rel_root == ".":
371 | rel_root = ""
372 | except ValueError:
373 | continue
374 |
375 | # Get ccignore spec for the current directory (to handle hierarchical patterns)
376 | cc_spec = get_ccignore_spec(root_path, exclude_patterns)
377 |
378 | # Check if directory should be skipped (via gitignore or ccignore)
379 | if rel_root:
380 | dir_path = rel_root + "/"
381 | if git_spec.match_file(dir_path) or cc_spec.match_file(dir_path):
382 | continue
383 |
384 | for filename in files:
385 | # Quick extension check before more expensive operations
386 | ext = Path(filename).suffix.lower()
387 | if ext not in include_set:
388 | continue
389 |
390 | # Build relative path string directly
391 | rel_path_str = (
392 | os.path.join(rel_root, filename) if rel_root else filename
393 | )
394 |
395 | # Check both gitignore and ccignore patterns
396 | if git_spec.match_file(rel_path_str) or cc_spec.match_file(
397 | rel_path_str
398 | ):
399 | continue
400 |
401 | # Only create Path object if file passes all filters
402 | file_path = root_path / filename
403 |
404 | # Get content based on diff mode
405 | content = get_file_content(
406 | file_path, diff_mode, changed_files, compare_branch
407 | )
408 | if content is not None:
409 | result[file_path] = content
410 |
411 | return result
412 |
413 |
414 | def scan_files(patterns: list[str], root: Path) -> set[Path]:
415 | """Scan directory for files matching glob patterns."""
416 | files = set()
417 | for pattern in patterns:
418 | files.update(root.glob(pattern))
419 | return files
420 |
--------------------------------------------------------------------------------
/src/copychat/cli.py:
--------------------------------------------------------------------------------
1 | import typer
2 | from pathlib import Path
3 | from typing import Annotated
4 | from rich.console import Console
5 | import pyperclip
6 | from enum import Enum
7 | from importlib.metadata import version as get_version
8 | import atexit
9 | import shutil
10 |
11 | from .core import (
12 | scan_directory,
13 | DiffMode,
14 | get_file_content,
15 | )
16 | from .format import (
17 | format_files as format_files_xml,
18 | create_display_header,
19 | )
20 | from .sources import GitHubSource, GitHubItem, GitHubFile
21 | from copychat.cli_utilities import TyperWithDefaultCommand
22 |
23 |
24 | # Register cleanup of temporary GitHub directory
25 | def _cleanup_github_temp():
26 | from .sources import _github_temp_dir
27 |
28 | if _github_temp_dir is not None and _github_temp_dir.exists():
29 | try:
30 | shutil.rmtree(_github_temp_dir)
31 | except Exception:
32 | pass # Ignore cleanup errors
33 |
34 |
35 | atexit.register(_cleanup_github_temp)
36 |
37 |
38 | class SourceType(Enum):
39 | """Type of source to scan."""
40 |
41 | FILESYSTEM = "filesystem" # Default
42 | GITHUB = "github"
43 | WEB = "web" # For future use
44 |
45 |
46 | def parse_source(source: str) -> tuple[SourceType, str]:
47 | """Parse source string into type and location."""
48 | import re
49 |
50 | if source.startswith(("github:", "gh:")):
51 | return SourceType.GITHUB, source.split(":", 1)[1]
52 |
53 | # Handle GitHub URLs with issues/pulls/discussions
54 | if source and source.startswith(("http://", "https://")) and "github.com" in source:
55 | pr_issue_match = re.search(
56 | r"github\.com/([^/]+/[^/]+)/(?:issues|pull|discussions)/([0-9]+)", source
57 | )
58 | if pr_issue_match:
59 | # This is a PR, issue, or discussion URL, keep it as FILESYSTEM type so it's processed directly
60 | return SourceType.FILESYSTEM, source
61 |
62 | # Handle GitHub blob URLs (individual files)
63 | blob_match = re.search(r"github\.com/([^/]+/[^/]+)/blob/([^/]+)/(.*)", source)
64 | if blob_match:
65 | # This is a GitHub blob URL, keep it as FILESYSTEM type so it's processed directly
66 | return SourceType.FILESYSTEM, source
67 |
68 | # Regular GitHub repo URL
69 | if source and "github.com" in source:
70 | parts = source.split("github.com/", 1)
71 | if len(parts) == 2:
72 | return SourceType.GITHUB, parts[1]
73 |
74 | if source and source.startswith(("http://", "https://")):
75 | return SourceType.WEB, source
76 |
77 | return SourceType.FILESYSTEM, source
78 |
79 |
80 | def parse_github_item(item: str) -> tuple[str, int, str]:
81 | """Parse issue, PR, or discussion identifier into repo, number, and type."""
82 | import re
83 |
84 | if item.startswith("http://") or item.startswith("https://"):
85 | m = re.search(
86 | r"github\.com/([^/]+/[^/]+)/(issues|pull|discussions)/([0-9]+)", item
87 | )
88 | if not m:
89 | raise typer.BadParameter("Invalid GitHub URL")
90 | return (
91 | m.group(1),
92 | int(m.group(3)),
93 | m.group(2).rstrip("s"),
94 | ) # Remove 's' from 'issues' -> 'issue'
95 |
96 | if "#" in item:
97 | repo, num = item.split("#", 1)
98 | return (
99 | repo.strip(),
100 | int(num),
101 | "issue",
102 | ) # Default to issue for backward compatibility
103 |
104 | raise typer.BadParameter("Item must be in owner/repo#number format or URL")
105 |
106 |
107 | def parse_github_blob(item: str) -> tuple[str, str, str]:
108 | """Parse GitHub blob URL into repo, ref, and file path."""
109 | import re
110 |
111 | match = re.search(r"github\.com/([^/]+/[^/]+)/blob/([^/]+)/(.*)", item)
112 | if not match:
113 | raise typer.BadParameter("Invalid GitHub blob URL")
114 |
115 | return match.group(1), match.group(2), match.group(3) # repo, ref, file_path
116 |
117 |
118 | def diff_mode_callback(value: str) -> DiffMode:
119 | """Convert string value to DiffMode enum."""
120 | try:
121 | if isinstance(value, DiffMode):
122 | return value
123 | return DiffMode(value)
124 | except ValueError:
125 | valid_values = [mode.value for mode in DiffMode]
126 | raise typer.BadParameter(f"Must be one of: {', '.join(valid_values)}")
127 |
128 |
129 | app = TyperWithDefaultCommand(
130 | add_completion=False, # Disable shell completion for simplicity
131 | )
132 | console = Console()
133 | error_console = Console(stderr=True)
134 |
135 |
136 | @app.command(default=True)
137 | def main(
138 | paths: Annotated[
139 | list[str],
140 | typer.Argument(
141 | help="Paths to process within the source (defaults to current directory)",
142 | ),
143 | ],
144 | version: Annotated[
145 | bool,
146 | typer.Option(
147 | "--version",
148 | help="Show version and exit.",
149 | is_eager=True,
150 | ),
151 | ] = None,
152 | source: Annotated[
153 | str | None,
154 | typer.Option(
155 | "--source",
156 | "-s",
157 | help="Source to scan (filesystem path, github:owner/repo, or URL)",
158 | ),
159 | ] = None,
160 | outfile: Annotated[
161 | Path | None,
162 | typer.Option(
163 | "--out",
164 | "-o",
165 | help="Write output to file. If provided, output will not be copied to clipboard.",
166 | ),
167 | ] = None,
168 | append: Annotated[
169 | bool,
170 | typer.Option(
171 | "--append",
172 | "-a",
173 | help="Append output instead of overwriting",
174 | ),
175 | ] = False,
176 | print_output: Annotated[
177 | bool,
178 | typer.Option(
179 | "--print",
180 | "-p",
181 | help="Print output to screen",
182 | ),
183 | ] = False,
184 | verbose: Annotated[
185 | bool,
186 | typer.Option(
187 | "--verbose",
188 | "-v",
189 | help="Show detailed file information in output",
190 | ),
191 | ] = False,
192 | include: Annotated[
193 | str | None,
194 | typer.Option(
195 | "--include",
196 | "-i",
197 | help="Extensions to include (comma-separated, e.g. 'py,js,ts')",
198 | ),
199 | ] = None,
200 | exclude: Annotated[
201 | list[str] | None,
202 | typer.Option(
203 | "--exclude",
204 | "-x",
205 | help="Glob patterns to exclude",
206 | ),
207 | ] = None,
208 | diff_mode: Annotated[
209 | str,
210 | typer.Option(
211 | "--diff-mode",
212 | help="How to handle git diffs",
213 | callback=diff_mode_callback,
214 | ),
215 | ] = "full",
216 | depth: Annotated[
217 | int | None,
218 | typer.Option(
219 | "--depth",
220 | "-d",
221 | help="Maximum directory depth to scan (0 = current dir only)",
222 | ),
223 | ] = None,
224 | debug: Annotated[
225 | bool,
226 | typer.Option(
227 | "--debug",
228 | help="Debug mode for development",
229 | ),
230 | ] = False,
231 | compare_branch: Annotated[
232 | str | None,
233 | typer.Option(
234 | "--diff-branch",
235 | help="Compare changes against specified branch instead of working directory",
236 | ),
237 | ] = None,
238 | token: Annotated[
239 | str | None,
240 | typer.Option(
241 | "--token",
242 | envvar="GITHUB_TOKEN",
243 | help="GitHub token for issue and PR access",
244 | ),
245 | ] = None,
246 | ) -> None:
247 | """Convert source code files to markdown format for LLM context."""
248 | if version:
249 | console.print(f"copychat version {get_version('copychat')}")
250 | raise typer.Exit()
251 |
252 | try:
253 | # Parse source type and location
254 | source_type, source_loc = (
255 | parse_source(source) if source else (SourceType.FILESYSTEM, ".")
256 | )
257 |
258 | if debug:
259 | error_console.print(
260 | f"[magenta]Source type:[/] {source_type}, location: {source_loc}"
261 | )
262 | error_console.print(f"[magenta]Paths to process:[/] {paths}")
263 |
264 | # Handle different source types
265 | if source_type == SourceType.GITHUB:
266 | try:
267 | github_source = GitHubSource(source_loc)
268 | source_dir = github_source.fetch()
269 | except Exception as e:
270 | if debug:
271 | raise
272 | error_console.print(
273 | f"[red]Error fetching GitHub repository:[/] {str(e)}"
274 | )
275 | raise typer.Exit(1)
276 | elif source_type == SourceType.WEB:
277 | error_console.print("[red]Web sources not yet implemented[/]")
278 | raise typer.Exit(1)
279 | else:
280 | source_dir = Path(source_loc)
281 |
282 | # Handle file vs directory source
283 | if source_dir.is_file():
284 | content = get_file_content(
285 | source_dir, diff_mode, compare_branch=compare_branch
286 | )
287 | all_files = {source_dir: content} if content is not None else {}
288 | else:
289 | # For directories, scan all paths
290 | if not paths:
291 | paths = ["."]
292 |
293 | # Handle paths
294 | all_files = {}
295 | for path in paths:
296 | if debug:
297 | error_console.print(f"[cyan]Processing path:[/] {path}")
298 |
299 | # Allow GitHub issues/PRs/discussions as direct arguments
300 | try:
301 | repo, num, item_type = parse_github_item(path)
302 | if debug:
303 | error_console.print(
304 | f"[blue]Processing GitHub {item_type}:[/] {repo}#{num}"
305 | )
306 | gh_item = GitHubItem(repo, num, token, item_type)
307 | p, content = gh_item.fetch()
308 | all_files[p] = content
309 | if debug:
310 | error_console.print(
311 | f"[green]Successfully fetched GitHub {item_type}[/]"
312 | )
313 | continue
314 | except Exception as e:
315 | if debug:
316 | error_console.print(
317 | f"[yellow]Failed to process as GitHub item:[/] {str(e)}"
318 | )
319 | pass
320 |
321 | # Allow GitHub blob URLs (individual files)
322 | try:
323 | repo, ref, file_path = parse_github_blob(path)
324 | if debug:
325 | error_console.print(
326 | f"[blue]Processing GitHub file:[/] {repo}/{file_path}@{ref}"
327 | )
328 | gh_file = GitHubFile(path, token)
329 | p, content = gh_file.fetch()
330 | all_files[p] = content
331 | if debug:
332 | error_console.print(
333 | "[green]Successfully fetched GitHub file[/]"
334 | )
335 | continue
336 | except Exception as e:
337 | if debug:
338 | error_console.print(
339 | f"[yellow]Failed to process as GitHub blob:[/] {str(e)}"
340 | )
341 | pass
342 |
343 | target = Path(path)
344 | if target.is_absolute():
345 | # Use absolute paths as-is
346 | if target.is_file():
347 | content = get_file_content(
348 | target, diff_mode, compare_branch=compare_branch
349 | )
350 | if content is not None:
351 | all_files[target] = content
352 | else:
353 | files = scan_directory(
354 | target,
355 | include=include.split(",") if include else None,
356 | exclude_patterns=exclude,
357 | diff_mode=diff_mode,
358 | max_depth=depth,
359 | compare_branch=compare_branch,
360 | )
361 | all_files.update(files)
362 | else:
363 | # For relative paths, try source dir first, then current dir
364 | targets = []
365 | if source_type == SourceType.GITHUB:
366 | # For GitHub sources, only look in the source directory
367 | targets = [source_dir / path]
368 | else:
369 | # For filesystem sources, try both but prefer source dir
370 | if source_dir != Path("."):
371 | targets.append(source_dir / path)
372 | targets.append(Path.cwd() / path)
373 |
374 | for target in targets:
375 | if target.exists():
376 | if target.is_file():
377 | content = get_file_content(
378 | target, diff_mode, compare_branch=compare_branch
379 | )
380 | if content is not None:
381 | all_files[target] = content
382 | break
383 | else:
384 | files = scan_directory(
385 | target,
386 | include=include.split(",") if include else None,
387 | exclude_patterns=exclude,
388 | diff_mode=diff_mode,
389 | max_depth=depth,
390 | compare_branch=compare_branch,
391 | )
392 | all_files.update(files)
393 | break
394 | if not all_files:
395 | error_console.print("Found [red]0[/] matching files")
396 | return
397 |
398 | # Separate GitHub issues/PRs from regular files for better reporting
399 | github_items = []
400 | filesystem_files = []
401 |
402 | for path, content in all_files.items():
403 | if (
404 | str(path).endswith((".md", ".issue.md", ".pr.md", ".discussion.md"))
405 | and isinstance(path, Path)
406 | and not path.exists()
407 | ) or (
408 | # Also detect GitHub files by checking if the filename contains repo info and doesn't exist locally
409 | isinstance(path, Path)
410 | and not path.exists()
411 | and "_"
412 | in str(
413 | path.name
414 | ) # GitHub files have underscores from repo/ref/path formatting
415 | and any(part in str(path.name) for part in ["github", "blob", "_"])
416 | ):
417 | github_items.append((path, content))
418 | else:
419 | filesystem_files.append((path, content))
420 |
421 | # Format files - pass both paths and content
422 | format_result = format_files_xml(
423 | [(path, content) for path, content in all_files.items()]
424 | )
425 |
426 | # Get the formatted content, conditionally including header
427 | if verbose:
428 | result = str(format_result)
429 | # Print the display header to stderr for visibility
430 | error_console.print(
431 | "\nFile summary:",
432 | style="bold blue",
433 | )
434 | # Use the display-friendly header
435 | error_console.print(create_display_header(format_result))
436 | error_console.print() # Add blank line after header
437 | else:
438 | # Skip the header by taking only the formatted files
439 | result = "\n".join(f.formatted_content for f in format_result.files)
440 |
441 | # Custom message based on content types
442 | if github_items and filesystem_files:
443 | error_console.print(
444 | f"Downloaded [green]{len(github_items)}[/] GitHub items and found [green]{len(filesystem_files)}[/] matching files"
445 | )
446 | elif github_items:
447 | error_console.print(
448 | f"Downloaded [green]{len(github_items)}[/] GitHub {'item' if len(github_items) == 1 else 'items'}"
449 | )
450 | else:
451 | error_console.print(
452 | f"Found [green]{len(format_result.files)}[/] matching files"
453 | )
454 |
455 | # Handle outputs
456 | if outfile:
457 | if append and outfile.exists():
458 | existing_content = outfile.read_text()
459 | result = existing_content + "\n\n" + result
460 | outfile.write_text(result)
461 | error_console.print(
462 | f"Output {'appended' if append else 'written'} to [green]{outfile}[/]"
463 | )
464 | # Only use clipboard if not writing to file AND not just printing to stdout
465 | elif not print_output or append:
466 | if append:
467 | try:
468 | existing_clipboard = pyperclip.paste()
469 | result = existing_clipboard + "\n\n" + result
470 | except Exception:
471 | error_console.print(
472 | "[yellow]Warning: Could not read clipboard for append[/]"
473 | )
474 |
475 | try:
476 | pyperclip.copy(result)
477 | # Calculate total lines outside the f-string
478 | total_lines = sum(
479 | f.content.count("\n") + 1 for f in format_result.files
480 | )
481 | error_console.print(
482 | f"{'Appended' if append else 'Copied'} to clipboard "
483 | f"(~{format_result.total_tokens:,} tokens, {total_lines:,} lines)"
484 | )
485 | except Exception as e:
486 | error_console.print(
487 | f"[yellow]Warning: Could not copy to clipboard: {str(e)}[/]"
488 | )
489 | if not print_output:
490 | # If clipboard failed and we're not printing, show the content
491 | error_console.print("[cyan]Content would have been:[/]")
492 | print(result)
493 |
494 | # Print to stdout only if explicitly requested
495 | if print_output:
496 | print(result)
497 |
498 | except Exception as e:
499 | if debug:
500 | raise
501 | error_console.print(f"[red]Error:[/] {str(e)}")
502 | raise typer.Exit(1)
503 |
504 |
505 | @app.command()
506 | def mcp():
507 | from .mcp_server import mcp
508 |
509 | mcp.run()
510 |
--------------------------------------------------------------------------------