├── .DS_Store
├── .flake8
├── .github
    └── workflows
    │   ├── tag-and-release.yml
    │   └── test-and-lint.yml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── bump_version.sh
├── code_collator
    ├── __init__.py
    └── collate.py
├── requirements.txt
├── setup.py
└── tests
    └── test_collate.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tawandakembo/code-collator/f5699ae8b87cc23918aa36358873434ed0fa9b68/.DS_Store


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | exclude = .git,__pycache__,docs/source/conf.py,old,build,dist
4 | ignore = E402


--------------------------------------------------------------------------------
/.github/workflows/tag-and-release.yml:
--------------------------------------------------------------------------------
 1 | name: Tag, Release, and Publish
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   tag-and-release:
10 |     runs-on: ubuntu-latest
11 |     outputs:
12 |       new_version: ${{ steps.bump_version.outputs.NEW_VERSION }}
13 |     steps:
14 |       - name: Checkout
15 |         uses: actions/checkout@v3
16 |         with:
17 |           fetch-depth: 0
18 | 
19 |       - name: Set up Git
20 |         run: |
21 |           git config --global user.name "github-actions"
22 |           git config --global user.email "github-actions@github.com"
23 | 
24 |       - name: Bump version and push tag
25 |         id: bump_version
26 |         env:
27 |           GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
28 |         run: |
29 |           ./bump_version.sh
30 |           echo "NEW_VERSION=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT
31 | 
32 |       - name: Generate Release Notes
33 |         id: generate_notes
34 |         run: |
35 |           PREVIOUS_TAG=$(git describe --tags --abbrev=0 --always HEAD^)
36 |           NOTES=$(git log ${PREVIOUS_TAG}..HEAD --pretty=format:"* %s" --reverse)
37 |           echo "RELEASE_NOTES<<EOF" >> $GITHUB_OUTPUT
38 |           echo "$NOTES" >> $GITHUB_OUTPUT
39 |           echo "EOF" >> $GITHUB_OUTPUT
40 | 
41 |       - name: Update CHANGELOG.md
42 |         run: |
43 |           echo "# Changelog" > CHANGELOG.md
44 |           echo "" >> CHANGELOG.md
45 |           echo "## ${{ steps.bump_version.outputs.NEW_VERSION }}" >> CHANGELOG.md
46 |           echo "" >> CHANGELOG.md
47 |           echo "${{ steps.generate_notes.outputs.RELEASE_NOTES }}" >> CHANGELOG.md
48 |           echo "" >> CHANGELOG.md
49 |           git add CHANGELOG.md
50 |           git commit -m "Update CHANGELOG.md for ${{ steps.bump_version.outputs.NEW_VERSION }}"
51 |           git push
52 | 
53 |       - name: Create Release
54 |         uses: ncipollo/release-action@v1
55 |         with:
56 |           tag: ${{ steps.bump_version.outputs.NEW_VERSION }}
57 |           name: Release ${{ steps.bump_version.outputs.NEW_VERSION }}
58 |           body: ${{ steps.generate_notes.outputs.RELEASE_NOTES }}
59 |           draft: false
60 |           prerelease: false
61 |           token: ${{ secrets.GITHUB_TOKEN }}
62 | 
63 |   publish-to-pypi:
64 |     needs: tag-and-release
65 |     runs-on: ubuntu-latest
66 |     permissions:
67 |       id-token: write
68 |     steps:
69 |       - uses: actions/checkout@v3
70 |         with:
71 |           fetch-depth: 0
72 |       - name: Set up Python
73 |         uses: actions/setup-python@v4
74 |         with:
75 |           python-version: '3.x'
76 |       - name: Install dependencies
77 |         run: |
78 |           python -m pip install --upgrade pip
79 |           pip install build
80 |       - name: Build package
81 |         env:
82 |           PACKAGE_VERSION: ${{ needs.tag-and-release.outputs.new_version }}
83 |         run: python -m build
84 |       - name: Publish package
85 |         uses: pypa/gh-action-pypi-publish@v1.9.0


--------------------------------------------------------------------------------
/.github/workflows/test-and-lint.yml:
--------------------------------------------------------------------------------
 1 | name: Test and Lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   test-and-lint:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: actions/checkout@v3
14 |     - name: Set up Python
15 |       uses: actions/setup-python@v4
16 |       with:
17 |         python-version: '3.x'
18 |     - name: Install dependencies
19 |       run: |
20 |         python -m pip install --upgrade pip
21 |         pip install -r requirements.txt
22 |     - name: Run tests with pytest
23 |       run: pytest tests/ --cov=code_collator --cov-report=xml
24 |     - name: Upload coverage to Codecov
25 |       uses: codecov/codecov-action@v3
26 |     - name: Lint with flake8
27 |       run: flake8 code_collator/ tests/


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.pyc
  2 | collated-code.md
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # pipenv
 87 | Pipfile.lock
 88 | 
 89 | # poetry
 90 | poetry.lock
 91 | 
 92 | # Celery stuff
 93 | celerybeat-schedule
 94 | celerybeat.pid
 95 | 
 96 | # SageMath parsed files
 97 | *.sage.py
 98 | 
 99 | # Environments
100 | .env
101 | .venv
102 | env/
103 | venv/
104 | ENV/
105 | env.bak/
106 | venv.bak/
107 | 
108 | # Spyder project settings
109 | .spyderproject
110 | .spyproject
111 | 
112 | # Rope project settings
113 | .ropeproject
114 | 
115 | # mkdocs documentation
116 | /site
117 | 
118 | # mypy
119 | .mypy_cache/
120 | .dmypy.json
121 | dmypy.json
122 | 
123 | # Pyre type checker
124 | .pyre/
125 | 
126 | # pytype static type analyzer
127 | .pytype/
128 | 
129 | # Cython debug symbols
130 | cython_debug/
131 | 
132 | # Pytest cache
133 | .pytest_cache/
134 | 
135 | # Python cache
136 | __pycache__/
137 | *.pyc
138 | 
139 | # Distribution / packaging
140 | *.egg-info/
141 | build/
142 | dist/
143 | .aider*
144 | .aider.*
145 | .benchmarks/
146 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | ## v0.20.0
4 | 
5 | * Update CHANGELOG.md for v0.19.0
6 | * Update README.md
7 | 
8 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Code Collator
 2 | 
 3 | Thank you for your interest in contributing to Code Collator! Here are some guidelines to help you get started:
 4 | 
 5 | ## Getting Started
 6 | 
 7 | 1. Fork the repository and clone your fork.
 8 | 2. Create a new branch from the `main` branch for your feature or fix.
 9 | 3. Make your changes in the new branch.
10 | 4. Commit your changes with a meaningful commit message.
11 | 5. Push your changes to your fork.
12 | 6. Create a pull request (PR) from your forked repository to the main repository.
13 | 
14 | ## Commit Messages
15 | 
16 | Please follow the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification for your commit messages. This helps in automating the versioning and changelog generation.
17 | 
18 | ### Commit Message Format
19 | 
20 | Each commit message consists of a header, a body, and a footer. The header has a specific format that includes a type, an optional scope, and a subject.
21 | 
22 | #### Type
23 | 
24 | Must be one of the following:
25 | 
26 | - **feat**: A new feature
27 | - **fix**: A bug fix
28 | - **docs**: Documentation only changes
29 | - **style**: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc)
30 | - **refactor**: A code change that neither fixes a bug nor adds a feature
31 | - **perf**: A code change that improves performance
32 | - **test**: Adding missing or correcting existing tests
33 | - **build**: Changes that affect the build system or external dependencies (example scopes: gulp, broccoli, npm)
34 | - **ci**: Changes to our CI configuration files and scripts (example scopes: Travis, Circle, BrowserStack, SauceLabs)
35 | - **chore**: Other changes that don't modify src or test files
36 | - **revert**: Reverts a previous commit
37 | 
38 | #### Subject
39 | 
40 | The subject contains a succinct description of the change:
41 | 
42 | - Use the imperative, present tense: "change" not "changed" nor "changes"
43 | - Do not capitalize the first letter
44 | - Do not add a period (.) at the end
45 | 
46 | ## Pull Request Process
47 | 
48 | 1. Ensure that your code adheres to the project's coding standards.
49 | 2. Ensure that your code passes all tests.
50 | 3. Update the documentation as necessary.
51 | 4. Create a pull request from your branch to the `main` branch.
52 | 5. The pull request will be reviewed by one of the maintainers.
53 | 6. Once the pull request is approved, it will be merged into the `main` branch.
54 | 7. The version will be automatically updated if your commit messages adhere to the Conventional Commits specification and include a `feat`, `fix`, or other relevant types.
55 | 
56 | ## License
57 | 
58 | By contributing to Code Collator, you agree that your contributions will be licensed under the MIT License.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Code Collator
 2 | 
 3 | Code Collator is a powerful CLI tool designed to streamline your code review and documentation process by collating your entire codebase into a single, organised Markdown file. This is particularly useful for sharing with AI tools like ChatGPT or Claude for analysis, troubleshooting, or documentation.
 4 | 
 5 | ## Use Case
 6 | 
 7 | Have you ever needed to provide a comprehensive overview of your codebase for a code review, AI analysis, or detailed documentation? Code Collator simplifies this task by aggregating all your code files into a single Markdown file. This makes it easy to:
 8 | 
 9 | - Share your code with AI tools like ChatGPT or Claude for intelligent analysis.
10 | - Generate a unified document for code reviews or team collaboration.
11 | - Create comprehensive documentation for your projects with minimal effort.
12 | 
13 | ## Features
14 | 
15 | - **Full Codebase Collation**: Collates all files in the specified directory and subdirectories into one Markdown file.
16 | - **.gitignore Support**: Automatically ignores files specified in the `.gitignore` file if one exists.
17 | - **Customizable Output**: Outputs a single Markdown file named `collated-code.md` by default, with options to specify the path to the codebase directory and output file name.
18 | - **Binary File Inclusion**: Includes binary files such as images in the output with a note about their file type.
19 | - **Comment Exclusion Option**: Allows users to exclude comments and docstrings from the collated code.
20 | - **Help Command**: Provides a help command to display usage instructions.
21 | 
22 | ## Demo Video
23 | 
24 | [![Watch the demo video]([https://img.youtube.com/vi/e8Ep_NOi_xU/0.jpg](https://i.ytimg.com/vi/e8Ep_NOi_xU/maxresdefault.jpg))](https://youtu.be/e8Ep_NOi_xU)
25 | 
26 | *Click the image above to watch a hands-on demo of how Code Collator works.*
27 | 
28 | ## Installation
29 | 
30 | You can easily install Code Collator using pip:
31 | 
32 | ```sh
33 | pip install code-collator
34 | 


--------------------------------------------------------------------------------
/bump_version.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Get the current version
 5 | git fetch --tags
 6 | CURRENT_VERSION=$(git describe --tags `git rev-list --tags --max-count=1` 2>/dev/null || echo "0.0.0")
 7 | 
 8 | # Default bump type
 9 | DEFAULT_BUMP=${DEFAULT_BUMP:-minor}
10 | 
11 | # Get the current version
12 | CURRENT_VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "0.0.0")
13 | 
14 | # Remove the 'v' prefix if it exists
15 | CURRENT_VERSION=${CURRENT_VERSION#v}
16 | 
17 | # Split the version into parts
18 | IFS='.' read -r -a VERSION_PARTS <<< "$CURRENT_VERSION"
19 | MAJOR="${VERSION_PARTS[0]}"
20 | MINOR="${VERSION_PARTS[1]}"
21 | PATCH="${VERSION_PARTS[2]}"
22 | 
23 | # Bump the version
24 | case $DEFAULT_BUMP in
25 |   major)
26 |     MAJOR=$((MAJOR + 1))
27 |     MINOR=0
28 |     PATCH=0
29 |     ;;
30 |   minor)
31 |     MINOR=$((MINOR + 1))
32 |     PATCH=0
33 |     ;;
34 |   patch)
35 |     PATCH=$((PATCH + 1))
36 |     ;;
37 |   *)
38 |     echo "Unknown bump type: $DEFAULT_BUMP"
39 |     exit 1
40 |     ;;
41 | esac
42 | 
43 | NEW_VERSION="$MAJOR.$MINOR.$PATCH"
44 | 
45 | # Update version in setup.py
46 | sed -i "s/version=\".*\"/version=\"$NEW_VERSION\"/" setup.py
47 | 
48 | # Configure git to use the GITHUB_TOKEN
49 | git config --global user.name "github-actions"
50 | git config --global user.email "github-actions@github.com"
51 | 
52 | # Set the remote URL with the GITHUB_TOKEN
53 | git remote set-url origin "https://${GITHUB_TOKEN}@github.com/tawanda-kembo/code-collator.git"
54 | 
55 | # Check if the tag already exists
56 | if git rev-parse "v$NEW_VERSION" >/dev/null 2>&1; then
57 |     echo "Tag v$NEW_VERSION already exists. Skipping tag creation."
58 | else
59 |     # Create a new tag
60 |     git tag "v$NEW_VERSION"
61 | 
62 |     # Push the tag using the GITHUB_TOKEN
63 |     git push origin "v$NEW_VERSION"
64 | fi
65 | 
66 | # Set the output variable for the new version
67 | echo "::set-output name=NEW_VERSION::v$NEW_VERSION"


--------------------------------------------------------------------------------
/code_collator/__init__.py:
--------------------------------------------------------------------------------
1 | from .collate import collate_codebase, main  # noqa: F401
2 | 


--------------------------------------------------------------------------------
/code_collator/collate.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | from pathlib import Path
  5 | import logging
  6 | from fnmatch import fnmatch
  7 | from pygments import lexers, token
  8 | from pygments.util import ClassNotFound
  9 | 
 10 | 
 11 | def setup_logging():
 12 |     """Set up logging configuration."""
 13 |     logger = logging.getLogger()
 14 |     logger.setLevel(logging.INFO)
 15 |     handler = logging.StreamHandler(sys.stdout)
 16 |     handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
 17 |     logger.addHandler(handler)
 18 | 
 19 | 
 20 | def is_binary_file(filepath):
 21 |     """Check if a file is binary."""
 22 |     try:
 23 |         with open(filepath, 'rb') as f:
 24 |             chunk = f.read(1024)
 25 |             return b'\x00' in chunk
 26 |     except Exception as e:
 27 |         logging.error("Error reading file %s: %s", filepath, e)
 28 |         return False
 29 | 
 30 | 
 31 | def read_gitignore(path):
 32 |     """Read the .gitignore file and return patterns to ignore."""
 33 |     gitignore_path = os.path.join(path, '.gitignore')
 34 |     if not os.path.exists(gitignore_path):
 35 |         return []
 36 | 
 37 |     try:
 38 |         with open(gitignore_path, 'r') as f:
 39 |             patterns = f.read().splitlines()
 40 |         logging.info(f"Loaded .gitignore patterns from {gitignore_path}")
 41 |         return patterns
 42 |     except Exception as e:
 43 |         logging.error(f"Error reading .gitignore file {gitignore_path}: {e}")
 44 |         return []
 45 | 
 46 | 
 47 | def should_ignore(file_path, ignore_patterns):
 48 |     """Check if a file should be ignored based on .gitignore patterns and if it's in the .git directory."""
 49 |     if '.git' in Path(file_path).parts:
 50 |         return True
 51 | 
 52 |     relative_path = os.path.relpath(file_path)
 53 |     path_parts = relative_path.split(os.sep)
 54 | 
 55 |     for pattern in ignore_patterns:
 56 |         if pattern.endswith('/'):
 57 |             # Directory pattern
 58 |             if any(fnmatch(part, pattern[:-1]) for part in path_parts):
 59 |                 return True
 60 |         elif '/' in pattern:
 61 |             # Path pattern
 62 |             if fnmatch(relative_path, pattern):
 63 |                 return True
 64 |         else:
 65 |             # File pattern
 66 |             if fnmatch(os.path.basename(file_path), pattern):
 67 |                 return True
 68 | 
 69 |     return False
 70 | 
 71 | 
 72 | def process_file_content(content, file_path, include_comments):
 73 |     """Process file content, optionally removing comments and docstrings."""
 74 |     if include_comments:
 75 |         return content
 76 | 
 77 |     try:
 78 |         lexer = lexers.get_lexer_for_filename(file_path)
 79 |     except ClassNotFound:
 80 |         logging.warning(f"No lexer found for {file_path}. Returning original content.")
 81 |         return content
 82 | 
 83 |     tokens = list(lexer.get_tokens(content))
 84 |     processed_tokens = []
 85 |     in_multiline_comment = False
 86 | 
 87 |     for token_type, value in tokens:
 88 |         if token_type in token.Comment or token_type in token.String.Doc:
 89 |             if token_type == token.Comment.Multiline:
 90 |                 in_multiline_comment = not in_multiline_comment
 91 |             continue
 92 |         if not in_multiline_comment:
 93 |             processed_tokens.append((token_type, value))
 94 | 
 95 |     processed_content = ''.join(value for _, value in processed_tokens).strip()
 96 | 
 97 |     # Remove any remaining single-line comments
 98 |     processed_content = '\n'.join(line for line in processed_content.split('\n') if not line.strip().startswith('#'))
 99 | 
100 |     return processed_content
101 | 
102 | 
103 | def collate_codebase(path, output_file, include_comments=True):
104 |     """Aggregate the codebase into a single Markdown file."""
105 |     ignore_patterns = read_gitignore(path)
106 |     try:
107 |         with open(output_file, 'w', encoding='utf-8') as output:
108 |             output.write("# Collated Codebase\n\n")
109 |             for root, _, files in os.walk(path):
110 |                 for file in files:
111 |                     file_path = os.path.join(root, file)
112 |                     if should_ignore(file_path, ignore_patterns):
113 |                         logging.info("Ignored file %s", file_path)
114 |                         continue
115 | 
116 |                     output.write(f"## {file_path}\n\n")
117 |                     is_binary = is_binary_file(file_path)
118 |                     logging.info("File %s is binary: %s", file_path, is_binary)
119 |                     if is_binary:
120 |                         output.write("**Note**: This is a binary file.\n\n")
121 |                     elif file.endswith('.svg'):
122 |                         output.write("**Note**: This is an SVG file.\n\n")
123 |                     else:
124 |                         try:
125 |                             with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
126 |                                 content = f.read()
127 |                                 processed_content = process_file_content(content, file_path, include_comments)
128 |                                 output.write(f"```\n{processed_content}\n```\n\n")
129 |                         except Exception as e:
130 |                             logging.error("Error reading file %s: %s", file_path, e)
131 |                             output.write("**Note**: Error reading this file.\n\n")
132 |         logging.info("Collated codebase written to %s", output_file)
133 |     except Exception as e:
134 |         logging.error("Error writing to output file %s: %s", output_file, e)
135 | 
136 | 
137 | def main():
138 |     """Parse arguments and initiate codebase collation."""
139 |     parser = argparse.ArgumentParser(description="Aggregate codebase into a single Markdown file.")
140 |     parser.add_argument(
141 |         '-p',
142 |         '--path',
143 |         type=str,
144 |         default='.',
145 |         help="Specify the path to the codebase directory (default: current directory)")
146 |     parser.add_argument('-o', '--output', type=str, default='collated-code.md',
147 |                         help="Specify output file (default: collated-code.md)")
148 |     parser.add_argument('-c', '--comments', type=str, choices=['on', 'off'], default='on',
149 |                         help="Include comments and docstrings (default: on)")
150 | 
151 |     args = parser.parse_args()
152 | 
153 |     setup_logging()
154 |     logging.info("Starting code collation for directory: %s", args.path)
155 |     collate_codebase(args.path, args.output, include_comments=(args.comments == 'on'))
156 |     logging.info("Code collation completed.")
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     main()
161 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | code-collator
 2 | Pygments==2.18.0
 3 | certifi==2024.7.4
 4 | charset-normalizer==3.3.2
 5 | coverage==7.6.0
 6 | docutils==0.21.2
 7 | flake8==7.1.0
 8 | idna==3.7
 9 | importlib_metadata==8.2.0
10 | iniconfig==2.0.0
11 | jaraco.classes==3.4.0
12 | jaraco.context==5.3.0
13 | jaraco.functools==4.0.1
14 | keyring==25.2.1
15 | markdown-it-py==3.0.0
16 | mccabe==0.7.0
17 | mdurl==0.1.2
18 | more-itertools==10.3.0
19 | nh3==0.2.18
20 | packaging==24.1
21 | pkginfo==1.10.0
22 | pluggy==1.5.0
23 | pycodestyle==2.12.0
24 | pyflakes==3.2.0
25 | pytest==8.3.2
26 | pytest-cov==5.0.0
27 | readme_renderer==44.0
28 | requests==2.32.3
29 | requests-toolbelt==1.0.0
30 | rfc3986==2.0.0
31 | rich==13.7.1
32 | setuptools==71.1.0
33 | twine==5.1.1
34 | urllib3==2.2.2
35 | wheel==0.43.0
36 | zipp==3.19.2
37 | autopep8==2.3.1


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | import pathlib
 3 | import os
 4 | 
 5 | here = pathlib.Path(__file__).parent.resolve()
 6 | 
 7 | 
 8 | def get_version():
 9 |     version = os.environ.get('PACKAGE_VERSION', '0.0.0')
10 |     return version
11 | 
12 | 
13 | setup(
14 |     name="code-collator",
15 |     version=get_version(),
16 |     description="A CLI tool to aggregate codebase into a single Markdown file",
17 |     long_description=(here / 'README.md').read_text(encoding='utf-8'),
18 |     long_description_content_type='text/markdown',
19 |     url="https://github.com/tawanda-kembo/code-collator",
20 |     author="Tawanda Kembo",
21 |     author_email="tawanda@mrkembo.com",
22 |     classifiers=[
23 |         "Development Status :: 3 - Alpha",
24 |         "Intended Audience :: Developers",
25 |         "Topic :: Software Development :: Build Tools",
26 |         "License :: OSI Approved :: MIT License",
27 |         "Programming Language :: Python :: 3",
28 |         "Programming Language :: Python :: 3.8",
29 |         "Programming Language :: Python :: 3.9",
30 |         "Programming Language :: Python :: 3.10",
31 |         "Programming Language :: Python :: 3.11",
32 |     ],
33 |     keywords="cli, development, documentation",
34 |     packages=find_packages(include=['code_collator', 'code_collator.*']),
35 |     python_requires=">=3.6, <4",
36 |     install_requires=[
37 |         'pygments',
38 |     ],
39 |     entry_points={
40 |         "console_scripts": [
41 |             "code-collator=code_collator.collate:main",
42 |         ],
43 |     },
44 |     project_urls={
45 |         "Bug Reports": "https://github.com/tawanda-kembo/code-collator/issues",
46 |         "Source": "https://github.com/tawanda-kembo/code-collator",
47 |     },
48 | )
49 | 


--------------------------------------------------------------------------------
/tests/test_collate.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import pytest
  4 | import logging
  5 | from unittest.mock import mock_open, patch
  6 | 
  7 | # Add the parent directory to sys.path
  8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
  9 | 
 10 | from code_collator import collate
 11 | import sys
 12 | import os
 13 | 
 14 | # Add the parent directory to sys.path
 15 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 16 | 
 17 | 
 18 | # Add the parent directory to sys.path
 19 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 20 | 
 21 | 
 22 | def test_is_binary_file():
 23 |     with patch('builtins.open', mock_open(read_data=b'\x00binary\xff')):
 24 |         assert collate.is_binary_file('test.bin') is True
 25 | 
 26 |     with patch('builtins.open', mock_open(read_data=b'hello world')):
 27 |         assert collate.is_binary_file('test.txt') is False
 28 | 
 29 | 
 30 | def test_read_gitignore():
 31 |     with patch('builtins.open', mock_open(read_data='*.pyc\n__pycache__\n')):
 32 |         patterns = collate.read_gitignore('.')
 33 |         assert patterns == ['*.pyc', '__pycache__']
 34 | 
 35 | 
 36 | def test_should_ignore():
 37 |     patterns = ['*.pyc', '__pycache__']
 38 |     assert collate.should_ignore('test.pyc', patterns)
 39 |     assert collate.should_ignore('test.py', patterns) is False
 40 |     assert collate.should_ignore('.git/config', patterns)
 41 | 
 42 | 
 43 | def test_process_file_content():
 44 |     content = '''
 45 | def hello():
 46 |     """This is a docstring."""
 47 |     # This is a comment
 48 |     print("Hello, World!")
 49 | '''
 50 |     file_path = "test.py"
 51 | 
 52 |     # Test with comments included
 53 |     processed = collate.process_file_content(content, file_path, include_comments=True)
 54 |     assert '"""This is a docstring."""' in processed
 55 |     assert '# This is a comment' in processed
 56 | 
 57 |     # Test with comments excluded
 58 |     processed = collate.process_file_content(content, file_path, include_comments=False)
 59 |     assert '"""This is a docstring."""' not in processed
 60 |     assert '# This is a comment' not in processed
 61 |     assert 'print("Hello, World!")' in processed
 62 | 
 63 | 
 64 | @pytest.fixture
 65 | def mock_file_system(tmp_path):
 66 |     d = tmp_path / "test_dir"
 67 |     d.mkdir()
 68 |     (d / "test.py").write_text("# This is a comment\nprint('hello')")
 69 |     (d / "test.pyc").write_bytes(b'\x00\x01\x02')
 70 |     return d
 71 | 
 72 | 
 73 | def test_collate_codebase(mock_file_system, caplog):
 74 |     caplog.set_level(logging.INFO)
 75 |     output_file = mock_file_system / "output.md"
 76 | 
 77 |     # Test with comments included
 78 |     collate.collate_codebase(str(mock_file_system), str(output_file), include_comments=True)
 79 |     with open(output_file, 'r') as f:
 80 |         content = f.read()
 81 |     assert "# Collated Codebase" in content
 82 |     assert "test.py" in content
 83 |     assert "print('hello')" in content
 84 |     assert "# This is a comment" in content
 85 | 
 86 |     # Test with comments excluded
 87 |     collate.collate_codebase(str(mock_file_system), str(output_file), include_comments=False)
 88 |     with open(output_file, 'r') as f:
 89 |         content = f.read()
 90 |     assert "# Collated Codebase" in content
 91 |     assert "test.py" in content
 92 |     assert "print('hello')" in content
 93 |     assert "# This is a comment" not in content
 94 | 
 95 | 
 96 | def test_main(mock_file_system, caplog, capsys):
 97 |     caplog.set_level(logging.INFO)
 98 | 
 99 |     # Test with comments included
100 |     output_with_comments = mock_file_system / 'output_with_comments.md'
101 |     with patch('sys.argv', ['collate', '-p', str(mock_file_system), '-o', str(output_with_comments), '-c', 'on']):
102 |         collate.main()
103 | 
104 |     with open(output_with_comments, 'r') as f:
105 |         content = f.read()
106 |     assert "# This is a comment" in content
107 | 
108 |     # Test with comments excluded
109 |     output_without_comments = mock_file_system / 'output_without_comments.md'
110 |     with patch('sys.argv', ['collate', '-p', str(mock_file_system), '-o', str(output_without_comments), '-c', 'off']):
111 |         collate.main()
112 | 
113 |     with open(output_without_comments, 'r') as f:
114 |         content = f.read()
115 |     assert "# This is a comment" not in content
116 | 
117 |     # Assert log messages
118 |     assert "Starting code collation for directory:" in caplog.text
119 |     assert "Code collation completed." in caplog.text
120 | 
121 |     # Check if specific files were processed
122 |     assert f"File {mock_file_system}/test.py is binary: False" in caplog.text
123 |     assert f"File {mock_file_system}/test.pyc is binary: True" in caplog.text
124 | 


--------------------------------------------------------------------------------