├── .DS_Store ├── .flake8 ├── .github └── workflows │ ├── tag-and-release.yml │ └── test-and-lint.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bump_version.sh ├── code_collator ├── __init__.py └── collate.py ├── requirements.txt ├── setup.py └── tests └── test_collate.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tawandakembo/code-collator/f5699ae8b87cc23918aa36358873434ed0fa9b68/.DS_Store -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | exclude = .git,__pycache__,docs/source/conf.py,old,build,dist 4 | ignore = E402 -------------------------------------------------------------------------------- /.github/workflows/tag-and-release.yml: -------------------------------------------------------------------------------- 1 | name: Tag, Release, and Publish 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | tag-and-release: 10 | runs-on: ubuntu-latest 11 | outputs: 12 | new_version: ${{ steps.bump_version.outputs.NEW_VERSION }} 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v3 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Set up Git 20 | run: | 21 | git config --global user.name "github-actions" 22 | git config --global user.email "github-actions@github.com" 23 | 24 | - name: Bump version and push tag 25 | id: bump_version 26 | env: 27 | GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }} 28 | run: | 29 | ./bump_version.sh 30 | echo "NEW_VERSION=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT 31 | 32 | - name: Generate Release Notes 33 | id: generate_notes 34 | run: | 35 | PREVIOUS_TAG=$(git describe --tags --abbrev=0 --always HEAD^) 36 | NOTES=$(git log ${PREVIOUS_TAG}..HEAD --pretty=format:"* %s" --reverse) 37 | echo "RELEASE_NOTES<> $GITHUB_OUTPUT 38 | echo "$NOTES" >> $GITHUB_OUTPUT 39 | echo "EOF" >> $GITHUB_OUTPUT 40 | 41 | - name: Update CHANGELOG.md 42 | run: | 43 | echo "# Changelog" > CHANGELOG.md 44 | echo "" >> CHANGELOG.md 45 | echo "## ${{ steps.bump_version.outputs.NEW_VERSION }}" >> CHANGELOG.md 46 | echo "" >> CHANGELOG.md 47 | echo "${{ steps.generate_notes.outputs.RELEASE_NOTES }}" >> CHANGELOG.md 48 | echo "" >> CHANGELOG.md 49 | git add CHANGELOG.md 50 | git commit -m "Update CHANGELOG.md for ${{ steps.bump_version.outputs.NEW_VERSION }}" 51 | git push 52 | 53 | - name: Create Release 54 | uses: ncipollo/release-action@v1 55 | with: 56 | tag: ${{ steps.bump_version.outputs.NEW_VERSION }} 57 | name: Release ${{ steps.bump_version.outputs.NEW_VERSION }} 58 | body: ${{ steps.generate_notes.outputs.RELEASE_NOTES }} 59 | draft: false 60 | prerelease: false 61 | token: ${{ secrets.GITHUB_TOKEN }} 62 | 63 | publish-to-pypi: 64 | needs: tag-and-release 65 | runs-on: ubuntu-latest 66 | permissions: 67 | id-token: write 68 | steps: 69 | - uses: actions/checkout@v3 70 | with: 71 | fetch-depth: 0 72 | - name: Set up Python 73 | uses: actions/setup-python@v4 74 | with: 75 | python-version: '3.x' 76 | - name: Install dependencies 77 | run: | 78 | python -m pip install --upgrade pip 79 | pip install build 80 | - name: Build package 81 | env: 82 | PACKAGE_VERSION: ${{ needs.tag-and-release.outputs.new_version }} 83 | run: python -m build 84 | - name: Publish package 85 | uses: pypa/gh-action-pypi-publish@v1.9.0 -------------------------------------------------------------------------------- /.github/workflows/test-and-lint.yml: -------------------------------------------------------------------------------- 1 | name: Test and Lint 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test-and-lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: '3.x' 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install -r requirements.txt 22 | - name: Run tests with pytest 23 | run: pytest tests/ --cov=code_collator --cov-report=xml 24 | - name: Upload coverage to Codecov 25 | uses: codecov/codecov-action@v3 26 | - name: Lint with flake8 27 | run: flake8 code_collator/ tests/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | collated-code.md 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | Pipfile.lock 88 | 89 | # poetry 90 | poetry.lock 91 | 92 | # Celery stuff 93 | celerybeat-schedule 94 | celerybeat.pid 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | .dmypy.json 121 | dmypy.json 122 | 123 | # Pyre type checker 124 | .pyre/ 125 | 126 | # pytype static type analyzer 127 | .pytype/ 128 | 129 | # Cython debug symbols 130 | cython_debug/ 131 | 132 | # Pytest cache 133 | .pytest_cache/ 134 | 135 | # Python cache 136 | __pycache__/ 137 | *.pyc 138 | 139 | # Distribution / packaging 140 | *.egg-info/ 141 | build/ 142 | dist/ 143 | .aider* 144 | .aider.* 145 | .benchmarks/ 146 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v0.20.0 4 | 5 | * Update CHANGELOG.md for v0.19.0 6 | * Update README.md 7 | 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Code Collator 2 | 3 | Thank you for your interest in contributing to Code Collator! Here are some guidelines to help you get started: 4 | 5 | ## Getting Started 6 | 7 | 1. Fork the repository and clone your fork. 8 | 2. Create a new branch from the `main` branch for your feature or fix. 9 | 3. Make your changes in the new branch. 10 | 4. Commit your changes with a meaningful commit message. 11 | 5. Push your changes to your fork. 12 | 6. Create a pull request (PR) from your forked repository to the main repository. 13 | 14 | ## Commit Messages 15 | 16 | Please follow the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification for your commit messages. This helps in automating the versioning and changelog generation. 17 | 18 | ### Commit Message Format 19 | 20 | Each commit message consists of a header, a body, and a footer. The header has a specific format that includes a type, an optional scope, and a subject. 21 | 22 | #### Type 23 | 24 | Must be one of the following: 25 | 26 | - **feat**: A new feature 27 | - **fix**: A bug fix 28 | - **docs**: Documentation only changes 29 | - **style**: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc) 30 | - **refactor**: A code change that neither fixes a bug nor adds a feature 31 | - **perf**: A code change that improves performance 32 | - **test**: Adding missing or correcting existing tests 33 | - **build**: Changes that affect the build system or external dependencies (example scopes: gulp, broccoli, npm) 34 | - **ci**: Changes to our CI configuration files and scripts (example scopes: Travis, Circle, BrowserStack, SauceLabs) 35 | - **chore**: Other changes that don't modify src or test files 36 | - **revert**: Reverts a previous commit 37 | 38 | #### Subject 39 | 40 | The subject contains a succinct description of the change: 41 | 42 | - Use the imperative, present tense: "change" not "changed" nor "changes" 43 | - Do not capitalize the first letter 44 | - Do not add a period (.) at the end 45 | 46 | ## Pull Request Process 47 | 48 | 1. Ensure that your code adheres to the project's coding standards. 49 | 2. Ensure that your code passes all tests. 50 | 3. Update the documentation as necessary. 51 | 4. Create a pull request from your branch to the `main` branch. 52 | 5. The pull request will be reviewed by one of the maintainers. 53 | 6. Once the pull request is approved, it will be merged into the `main` branch. 54 | 7. The version will be automatically updated if your commit messages adhere to the Conventional Commits specification and include a `feat`, `fix`, or other relevant types. 55 | 56 | ## License 57 | 58 | By contributing to Code Collator, you agree that your contributions will be licensed under the MIT License. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code Collator 2 | 3 | Code Collator is a powerful CLI tool designed to streamline your code review and documentation process by collating your entire codebase into a single, organised Markdown file. This is particularly useful for sharing with AI tools like ChatGPT or Claude for analysis, troubleshooting, or documentation. 4 | 5 | ## Use Case 6 | 7 | Have you ever needed to provide a comprehensive overview of your codebase for a code review, AI analysis, or detailed documentation? Code Collator simplifies this task by aggregating all your code files into a single Markdown file. This makes it easy to: 8 | 9 | - Share your code with AI tools like ChatGPT or Claude for intelligent analysis. 10 | - Generate a unified document for code reviews or team collaboration. 11 | - Create comprehensive documentation for your projects with minimal effort. 12 | 13 | ## Features 14 | 15 | - **Full Codebase Collation**: Collates all files in the specified directory and subdirectories into one Markdown file. 16 | - **.gitignore Support**: Automatically ignores files specified in the `.gitignore` file if one exists. 17 | - **Customizable Output**: Outputs a single Markdown file named `collated-code.md` by default, with options to specify the path to the codebase directory and output file name. 18 | - **Binary File Inclusion**: Includes binary files such as images in the output with a note about their file type. 19 | - **Comment Exclusion Option**: Allows users to exclude comments and docstrings from the collated code. 20 | - **Help Command**: Provides a help command to display usage instructions. 21 | 22 | ## Demo Video 23 | 24 | [![Watch the demo video]([https://img.youtube.com/vi/e8Ep_NOi_xU/0.jpg](https://i.ytimg.com/vi/e8Ep_NOi_xU/maxresdefault.jpg))](https://youtu.be/e8Ep_NOi_xU) 25 | 26 | *Click the image above to watch a hands-on demo of how Code Collator works.* 27 | 28 | ## Installation 29 | 30 | You can easily install Code Collator using pip: 31 | 32 | ```sh 33 | pip install code-collator 34 | -------------------------------------------------------------------------------- /bump_version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Get the current version 5 | git fetch --tags 6 | CURRENT_VERSION=$(git describe --tags `git rev-list --tags --max-count=1` 2>/dev/null || echo "0.0.0") 7 | 8 | # Default bump type 9 | DEFAULT_BUMP=${DEFAULT_BUMP:-minor} 10 | 11 | # Get the current version 12 | CURRENT_VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "0.0.0") 13 | 14 | # Remove the 'v' prefix if it exists 15 | CURRENT_VERSION=${CURRENT_VERSION#v} 16 | 17 | # Split the version into parts 18 | IFS='.' read -r -a VERSION_PARTS <<< "$CURRENT_VERSION" 19 | MAJOR="${VERSION_PARTS[0]}" 20 | MINOR="${VERSION_PARTS[1]}" 21 | PATCH="${VERSION_PARTS[2]}" 22 | 23 | # Bump the version 24 | case $DEFAULT_BUMP in 25 | major) 26 | MAJOR=$((MAJOR + 1)) 27 | MINOR=0 28 | PATCH=0 29 | ;; 30 | minor) 31 | MINOR=$((MINOR + 1)) 32 | PATCH=0 33 | ;; 34 | patch) 35 | PATCH=$((PATCH + 1)) 36 | ;; 37 | *) 38 | echo "Unknown bump type: $DEFAULT_BUMP" 39 | exit 1 40 | ;; 41 | esac 42 | 43 | NEW_VERSION="$MAJOR.$MINOR.$PATCH" 44 | 45 | # Update version in setup.py 46 | sed -i "s/version=\".*\"/version=\"$NEW_VERSION\"/" setup.py 47 | 48 | # Configure git to use the GITHUB_TOKEN 49 | git config --global user.name "github-actions" 50 | git config --global user.email "github-actions@github.com" 51 | 52 | # Set the remote URL with the GITHUB_TOKEN 53 | git remote set-url origin "https://${GITHUB_TOKEN}@github.com/tawanda-kembo/code-collator.git" 54 | 55 | # Check if the tag already exists 56 | if git rev-parse "v$NEW_VERSION" >/dev/null 2>&1; then 57 | echo "Tag v$NEW_VERSION already exists. Skipping tag creation." 58 | else 59 | # Create a new tag 60 | git tag "v$NEW_VERSION" 61 | 62 | # Push the tag using the GITHUB_TOKEN 63 | git push origin "v$NEW_VERSION" 64 | fi 65 | 66 | # Set the output variable for the new version 67 | echo "::set-output name=NEW_VERSION::v$NEW_VERSION" -------------------------------------------------------------------------------- /code_collator/__init__.py: -------------------------------------------------------------------------------- 1 | from .collate import collate_codebase, main # noqa: F401 2 | -------------------------------------------------------------------------------- /code_collator/collate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | from pathlib import Path 5 | import logging 6 | from fnmatch import fnmatch 7 | from pygments import lexers, token 8 | from pygments.util import ClassNotFound 9 | 10 | 11 | def setup_logging(): 12 | """Set up logging configuration.""" 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.INFO) 15 | handler = logging.StreamHandler(sys.stdout) 16 | handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) 17 | logger.addHandler(handler) 18 | 19 | 20 | def is_binary_file(filepath): 21 | """Check if a file is binary.""" 22 | try: 23 | with open(filepath, 'rb') as f: 24 | chunk = f.read(1024) 25 | return b'\x00' in chunk 26 | except Exception as e: 27 | logging.error("Error reading file %s: %s", filepath, e) 28 | return False 29 | 30 | 31 | def read_gitignore(path): 32 | """Read the .gitignore file and return patterns to ignore.""" 33 | gitignore_path = os.path.join(path, '.gitignore') 34 | if not os.path.exists(gitignore_path): 35 | return [] 36 | 37 | try: 38 | with open(gitignore_path, 'r') as f: 39 | patterns = f.read().splitlines() 40 | logging.info(f"Loaded .gitignore patterns from {gitignore_path}") 41 | return patterns 42 | except Exception as e: 43 | logging.error(f"Error reading .gitignore file {gitignore_path}: {e}") 44 | return [] 45 | 46 | 47 | def should_ignore(file_path, ignore_patterns): 48 | """Check if a file should be ignored based on .gitignore patterns and if it's in the .git directory.""" 49 | if '.git' in Path(file_path).parts: 50 | return True 51 | 52 | relative_path = os.path.relpath(file_path) 53 | path_parts = relative_path.split(os.sep) 54 | 55 | for pattern in ignore_patterns: 56 | if pattern.endswith('/'): 57 | # Directory pattern 58 | if any(fnmatch(part, pattern[:-1]) for part in path_parts): 59 | return True 60 | elif '/' in pattern: 61 | # Path pattern 62 | if fnmatch(relative_path, pattern): 63 | return True 64 | else: 65 | # File pattern 66 | if fnmatch(os.path.basename(file_path), pattern): 67 | return True 68 | 69 | return False 70 | 71 | 72 | def process_file_content(content, file_path, include_comments): 73 | """Process file content, optionally removing comments and docstrings.""" 74 | if include_comments: 75 | return content 76 | 77 | try: 78 | lexer = lexers.get_lexer_for_filename(file_path) 79 | except ClassNotFound: 80 | logging.warning(f"No lexer found for {file_path}. Returning original content.") 81 | return content 82 | 83 | tokens = list(lexer.get_tokens(content)) 84 | processed_tokens = [] 85 | in_multiline_comment = False 86 | 87 | for token_type, value in tokens: 88 | if token_type in token.Comment or token_type in token.String.Doc: 89 | if token_type == token.Comment.Multiline: 90 | in_multiline_comment = not in_multiline_comment 91 | continue 92 | if not in_multiline_comment: 93 | processed_tokens.append((token_type, value)) 94 | 95 | processed_content = ''.join(value for _, value in processed_tokens).strip() 96 | 97 | # Remove any remaining single-line comments 98 | processed_content = '\n'.join(line for line in processed_content.split('\n') if not line.strip().startswith('#')) 99 | 100 | return processed_content 101 | 102 | 103 | def collate_codebase(path, output_file, include_comments=True): 104 | """Aggregate the codebase into a single Markdown file.""" 105 | ignore_patterns = read_gitignore(path) 106 | try: 107 | with open(output_file, 'w', encoding='utf-8') as output: 108 | output.write("# Collated Codebase\n\n") 109 | for root, _, files in os.walk(path): 110 | for file in files: 111 | file_path = os.path.join(root, file) 112 | if should_ignore(file_path, ignore_patterns): 113 | logging.info("Ignored file %s", file_path) 114 | continue 115 | 116 | output.write(f"## {file_path}\n\n") 117 | is_binary = is_binary_file(file_path) 118 | logging.info("File %s is binary: %s", file_path, is_binary) 119 | if is_binary: 120 | output.write("**Note**: This is a binary file.\n\n") 121 | elif file.endswith('.svg'): 122 | output.write("**Note**: This is an SVG file.\n\n") 123 | else: 124 | try: 125 | with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: 126 | content = f.read() 127 | processed_content = process_file_content(content, file_path, include_comments) 128 | output.write(f"```\n{processed_content}\n```\n\n") 129 | except Exception as e: 130 | logging.error("Error reading file %s: %s", file_path, e) 131 | output.write("**Note**: Error reading this file.\n\n") 132 | logging.info("Collated codebase written to %s", output_file) 133 | except Exception as e: 134 | logging.error("Error writing to output file %s: %s", output_file, e) 135 | 136 | 137 | def main(): 138 | """Parse arguments and initiate codebase collation.""" 139 | parser = argparse.ArgumentParser(description="Aggregate codebase into a single Markdown file.") 140 | parser.add_argument( 141 | '-p', 142 | '--path', 143 | type=str, 144 | default='.', 145 | help="Specify the path to the codebase directory (default: current directory)") 146 | parser.add_argument('-o', '--output', type=str, default='collated-code.md', 147 | help="Specify output file (default: collated-code.md)") 148 | parser.add_argument('-c', '--comments', type=str, choices=['on', 'off'], default='on', 149 | help="Include comments and docstrings (default: on)") 150 | 151 | args = parser.parse_args() 152 | 153 | setup_logging() 154 | logging.info("Starting code collation for directory: %s", args.path) 155 | collate_codebase(args.path, args.output, include_comments=(args.comments == 'on')) 156 | logging.info("Code collation completed.") 157 | 158 | 159 | if __name__ == "__main__": 160 | main() 161 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | code-collator 2 | Pygments==2.18.0 3 | certifi==2024.7.4 4 | charset-normalizer==3.3.2 5 | coverage==7.6.0 6 | docutils==0.21.2 7 | flake8==7.1.0 8 | idna==3.7 9 | importlib_metadata==8.2.0 10 | iniconfig==2.0.0 11 | jaraco.classes==3.4.0 12 | jaraco.context==5.3.0 13 | jaraco.functools==4.0.1 14 | keyring==25.2.1 15 | markdown-it-py==3.0.0 16 | mccabe==0.7.0 17 | mdurl==0.1.2 18 | more-itertools==10.3.0 19 | nh3==0.2.18 20 | packaging==24.1 21 | pkginfo==1.10.0 22 | pluggy==1.5.0 23 | pycodestyle==2.12.0 24 | pyflakes==3.2.0 25 | pytest==8.3.2 26 | pytest-cov==5.0.0 27 | readme_renderer==44.0 28 | requests==2.32.3 29 | requests-toolbelt==1.0.0 30 | rfc3986==2.0.0 31 | rich==13.7.1 32 | setuptools==71.1.0 33 | twine==5.1.1 34 | urllib3==2.2.2 35 | wheel==0.43.0 36 | zipp==3.19.2 37 | autopep8==2.3.1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import pathlib 3 | import os 4 | 5 | here = pathlib.Path(__file__).parent.resolve() 6 | 7 | 8 | def get_version(): 9 | version = os.environ.get('PACKAGE_VERSION', '0.0.0') 10 | return version 11 | 12 | 13 | setup( 14 | name="code-collator", 15 | version=get_version(), 16 | description="A CLI tool to aggregate codebase into a single Markdown file", 17 | long_description=(here / 'README.md').read_text(encoding='utf-8'), 18 | long_description_content_type='text/markdown', 19 | url="https://github.com/tawanda-kembo/code-collator", 20 | author="Tawanda Kembo", 21 | author_email="tawanda@mrkembo.com", 22 | classifiers=[ 23 | "Development Status :: 3 - Alpha", 24 | "Intended Audience :: Developers", 25 | "Topic :: Software Development :: Build Tools", 26 | "License :: OSI Approved :: MIT License", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.8", 29 | "Programming Language :: Python :: 3.9", 30 | "Programming Language :: Python :: 3.10", 31 | "Programming Language :: Python :: 3.11", 32 | ], 33 | keywords="cli, development, documentation", 34 | packages=find_packages(include=['code_collator', 'code_collator.*']), 35 | python_requires=">=3.6, <4", 36 | install_requires=[ 37 | 'pygments', 38 | ], 39 | entry_points={ 40 | "console_scripts": [ 41 | "code-collator=code_collator.collate:main", 42 | ], 43 | }, 44 | project_urls={ 45 | "Bug Reports": "https://github.com/tawanda-kembo/code-collator/issues", 46 | "Source": "https://github.com/tawanda-kembo/code-collator", 47 | }, 48 | ) 49 | -------------------------------------------------------------------------------- /tests/test_collate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pytest 4 | import logging 5 | from unittest.mock import mock_open, patch 6 | 7 | # Add the parent directory to sys.path 8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 9 | 10 | from code_collator import collate 11 | import sys 12 | import os 13 | 14 | # Add the parent directory to sys.path 15 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 16 | 17 | 18 | # Add the parent directory to sys.path 19 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 20 | 21 | 22 | def test_is_binary_file(): 23 | with patch('builtins.open', mock_open(read_data=b'\x00binary\xff')): 24 | assert collate.is_binary_file('test.bin') is True 25 | 26 | with patch('builtins.open', mock_open(read_data=b'hello world')): 27 | assert collate.is_binary_file('test.txt') is False 28 | 29 | 30 | def test_read_gitignore(): 31 | with patch('builtins.open', mock_open(read_data='*.pyc\n__pycache__\n')): 32 | patterns = collate.read_gitignore('.') 33 | assert patterns == ['*.pyc', '__pycache__'] 34 | 35 | 36 | def test_should_ignore(): 37 | patterns = ['*.pyc', '__pycache__'] 38 | assert collate.should_ignore('test.pyc', patterns) 39 | assert collate.should_ignore('test.py', patterns) is False 40 | assert collate.should_ignore('.git/config', patterns) 41 | 42 | 43 | def test_process_file_content(): 44 | content = ''' 45 | def hello(): 46 | """This is a docstring.""" 47 | # This is a comment 48 | print("Hello, World!") 49 | ''' 50 | file_path = "test.py" 51 | 52 | # Test with comments included 53 | processed = collate.process_file_content(content, file_path, include_comments=True) 54 | assert '"""This is a docstring."""' in processed 55 | assert '# This is a comment' in processed 56 | 57 | # Test with comments excluded 58 | processed = collate.process_file_content(content, file_path, include_comments=False) 59 | assert '"""This is a docstring."""' not in processed 60 | assert '# This is a comment' not in processed 61 | assert 'print("Hello, World!")' in processed 62 | 63 | 64 | @pytest.fixture 65 | def mock_file_system(tmp_path): 66 | d = tmp_path / "test_dir" 67 | d.mkdir() 68 | (d / "test.py").write_text("# This is a comment\nprint('hello')") 69 | (d / "test.pyc").write_bytes(b'\x00\x01\x02') 70 | return d 71 | 72 | 73 | def test_collate_codebase(mock_file_system, caplog): 74 | caplog.set_level(logging.INFO) 75 | output_file = mock_file_system / "output.md" 76 | 77 | # Test with comments included 78 | collate.collate_codebase(str(mock_file_system), str(output_file), include_comments=True) 79 | with open(output_file, 'r') as f: 80 | content = f.read() 81 | assert "# Collated Codebase" in content 82 | assert "test.py" in content 83 | assert "print('hello')" in content 84 | assert "# This is a comment" in content 85 | 86 | # Test with comments excluded 87 | collate.collate_codebase(str(mock_file_system), str(output_file), include_comments=False) 88 | with open(output_file, 'r') as f: 89 | content = f.read() 90 | assert "# Collated Codebase" in content 91 | assert "test.py" in content 92 | assert "print('hello')" in content 93 | assert "# This is a comment" not in content 94 | 95 | 96 | def test_main(mock_file_system, caplog, capsys): 97 | caplog.set_level(logging.INFO) 98 | 99 | # Test with comments included 100 | output_with_comments = mock_file_system / 'output_with_comments.md' 101 | with patch('sys.argv', ['collate', '-p', str(mock_file_system), '-o', str(output_with_comments), '-c', 'on']): 102 | collate.main() 103 | 104 | with open(output_with_comments, 'r') as f: 105 | content = f.read() 106 | assert "# This is a comment" in content 107 | 108 | # Test with comments excluded 109 | output_without_comments = mock_file_system / 'output_without_comments.md' 110 | with patch('sys.argv', ['collate', '-p', str(mock_file_system), '-o', str(output_without_comments), '-c', 'off']): 111 | collate.main() 112 | 113 | with open(output_without_comments, 'r') as f: 114 | content = f.read() 115 | assert "# This is a comment" not in content 116 | 117 | # Assert log messages 118 | assert "Starting code collation for directory:" in caplog.text 119 | assert "Code collation completed." in caplog.text 120 | 121 | # Check if specific files were processed 122 | assert f"File {mock_file_system}/test.py is binary: False" in caplog.text 123 | assert f"File {mock_file_system}/test.pyc is binary: True" in caplog.text 124 | --------------------------------------------------------------------------------