├── .github └── CODEOWNERS ├── .gitignore ├── .gptignore ├── LICENSE ├── README.md ├── gpt_repository_loader.py ├── test_data ├── example_repo │ ├── .gptignore │ ├── file1.txt │ ├── file2.py │ └── folder1 │ │ ├── file3.py │ │ └── file4.txt └── expected_output.txt └── test_gpt_repository_loader.py /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @mpoon 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python artifacts 2 | __pycache__/ 3 | *.pyc 4 | *.pyo 5 | *.pyd 6 | 7 | # Output file 8 | output.txt 9 | 10 | # Unit test artifacts 11 | .coverage 12 | .coverage. 13 | htmlcov/ 14 | 15 | # Virtual environment 16 | venv/ 17 | *.egg-info/ 18 | 19 | # Jupyter Notebook 20 | .ipynb_checkpoints/ 21 | 22 | # Miscellaneous 23 | *.swp 24 | *.swo 25 | *.swn 26 | *~ -------------------------------------------------------------------------------- /.gptignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | *.log 4 | .git/* 5 | .gptignore 6 | LICENSE 7 | .github/* 8 | .tox/* 9 | .mypy_cache/* 10 | *.whl 11 | *.tar 12 | *.tar.gz 13 | .gitignore 14 | *.env* 15 | *.png 16 | *.jpeg 17 | *.jpg 18 | *bin/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Your Name 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gpt-repository-loader 2 | 3 | `gpt-repository-loader` is a command-line tool that converts the contents of a Git repository into a text format, preserving the structure of the files and file contents. The generated output can be interpreted by AI language models, allowing them to process the repository's contents for various tasks, such as code review or documentation generation. 4 | 5 | ## Contributing 6 | Some context around building this is [located here](https://github.com/mpoon/gpt-repository-loader/discussions/18). Appreciate any issues and pull requests in the spirit of having mostly GPT build out this tool. Using [ChatGPT Plus](https://chat.openai.com/) is recommended for quick access to GPT-4. 7 | 8 | ## Getting Started 9 | 10 | To get started with `gpt-repository-loader`, follow these steps: 11 | 12 | 1. Ensure you have Python 3 installed on your system. 13 | 2. Clone or download the `gpt-repository-loader` repository. 14 | 3. Navigate to the repository's root directory in your terminal. 15 | 4. Run `gpt-repository-loader` with the following command: 16 | 17 | ```bash 18 | python gpt_repository_loader.py /path/to/git/repository [-p /path/to/preamble.txt] [-o /path/to/output_file.txt] 19 | ``` 20 | Replace `/path/to/git/repository` with the path to the Git repository you want to process. Optionally, you can specify a preamble file with -p or an output file with -o. If not specified, the default output file will be named output.txt in the current directory. 21 | 22 | 5. The tool will generate an output.txt file containing the text representation of the repository. You can now use this file as input for AI language models or other text-based processing tasks. 23 | 24 | ## Running Tests 25 | 26 | To run the tests for `gpt-repository-loader`, follow these steps: 27 | 28 | 1. Ensure you have Python 3 installed on your system. 29 | 2. Navigate to the repository's root directory in your terminal. 30 | 3. Run the tests with the following command: 31 | 32 | ```bash 33 | python -m unittest test_gpt_repository_loader.py 34 | ``` 35 | Now, the test harness is added to the `gpt-repository-loader` project. You can run the tests by executing the command `python -m unittest test_gpt_repository_loader.py` in your terminal. 36 | 37 | ## License 38 | This project is licensed under the MIT License - see the LICENSE file for details. 39 | -------------------------------------------------------------------------------- /gpt_repository_loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import fnmatch 6 | 7 | def get_ignore_list(ignore_file_path): 8 | ignore_list = [] 9 | with open(ignore_file_path, 'r') as ignore_file: 10 | for line in ignore_file: 11 | if sys.platform == "win32": 12 | line = line.replace("/", "\\") 13 | ignore_list.append(line.strip()) 14 | return ignore_list 15 | 16 | def should_ignore(file_path, ignore_list): 17 | for pattern in ignore_list: 18 | if fnmatch.fnmatch(file_path, pattern): 19 | return True 20 | return False 21 | 22 | def process_repository(repo_path, ignore_list, output_file): 23 | for root, _, files in os.walk(repo_path): 24 | for file in files: 25 | file_path = os.path.join(root, file) 26 | relative_file_path = os.path.relpath(file_path, repo_path) 27 | 28 | if not should_ignore(relative_file_path, ignore_list): 29 | with open(file_path, 'r', errors='ignore') as file: 30 | contents = file.read() 31 | output_file.write("-" * 4 + "\n") 32 | output_file.write(f"{relative_file_path}\n") 33 | output_file.write(f"{contents}\n") 34 | 35 | if __name__ == "__main__": 36 | if len(sys.argv) < 2: 37 | print("Usage: python git_to_text.py /path/to/git/repository [-p /path/to/preamble.txt] [-o /path/to/output_file.txt]") 38 | sys.exit(1) 39 | 40 | repo_path = sys.argv[1] 41 | ignore_file_path = os.path.join(repo_path, ".gptignore") 42 | if sys.platform == "win32": 43 | ignore_file_path = ignore_file_path.replace("/", "\\") 44 | 45 | if not os.path.exists(ignore_file_path): 46 | # try and use the .gptignore file in the current directory as a fallback. 47 | HERE = os.path.dirname(os.path.abspath(__file__)) 48 | ignore_file_path = os.path.join(HERE, ".gptignore") 49 | 50 | preamble_file = None 51 | if "-p" in sys.argv: 52 | preamble_file = sys.argv[sys.argv.index("-p") + 1] 53 | 54 | output_file_path = 'output.txt' 55 | if "-o" in sys.argv: 56 | output_file_path = sys.argv[sys.argv.index("-o") + 1] 57 | 58 | if os.path.exists(ignore_file_path): 59 | ignore_list = get_ignore_list(ignore_file_path) 60 | else: 61 | ignore_list = [] 62 | 63 | with open(output_file_path, 'w') as output_file: 64 | if preamble_file: 65 | with open(preamble_file, 'r') as pf: 66 | preamble_text = pf.read() 67 | output_file.write(f"{preamble_text}\n") 68 | else: 69 | output_file.write("The following text is a Git repository with code. The structure of the text are sections that begin with ----, followed by a single line containing the file path and file name, followed by a variable amount of lines containing the file contents. The text representing the Git repository ends when the symbols --END-- are encounted. Any further text beyond --END-- are meant to be interpreted as instructions using the aforementioned Git repository as context.\n") 70 | process_repository(repo_path, ignore_list, output_file) 71 | with open(output_file_path, 'a') as output_file: 72 | output_file.write("--END--") 73 | print(f"Repository contents written to {output_file_path}.") 74 | -------------------------------------------------------------------------------- /test_data/example_repo/.gptignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | .gptignore -------------------------------------------------------------------------------- /test_data/example_repo/file1.txt: -------------------------------------------------------------------------------- 1 | This is file1.txt 2 | -------------------------------------------------------------------------------- /test_data/example_repo/file2.py: -------------------------------------------------------------------------------- 1 | def hello(): 2 | print("Hello, World!") 3 | -------------------------------------------------------------------------------- /test_data/example_repo/folder1/file3.py: -------------------------------------------------------------------------------- 1 | def add(x, y): 2 | return x + y 3 | -------------------------------------------------------------------------------- /test_data/example_repo/folder1/file4.txt: -------------------------------------------------------------------------------- 1 | This is file4.txt 2 | -------------------------------------------------------------------------------- /test_data/expected_output.txt: -------------------------------------------------------------------------------- 1 | ---- 2 | file2.py 3 | def hello(): 4 | print("Hello, World!") 5 | 6 | ---- 7 | folder1/file3.py 8 | def add(x, y): 9 | return x + y 10 | 11 | -------------------------------------------------------------------------------- /test_gpt_repository_loader.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import tempfile 4 | import shutil 5 | from gpt_repository_loader import process_repository, get_ignore_list 6 | 7 | 8 | class TestGPTRepositoryLoader(unittest.TestCase): 9 | 10 | def setUp(self): 11 | self.test_data_path = os.path.join(os.path.dirname(__file__), 'test_data') 12 | self.example_repo_path = os.path.join(self.test_data_path, 'example_repo') 13 | 14 | def test_end_to_end(self): 15 | # Set up the output file and the expected output file paths 16 | output_file_path = os.path.join(tempfile.mkdtemp(), 'output.txt') 17 | expected_output_file_path = os.path.join(self.test_data_path, 'expected_output.txt') 18 | 19 | # Create an ignore list for the example repository 20 | ignore_file_path = os.path.join(self.example_repo_path, ".gptignore") 21 | if os.path.exists(ignore_file_path): 22 | ignore_list = get_ignore_list(ignore_file_path) 23 | else: 24 | ignore_list = [] 25 | 26 | # Run the gpt-repository-loader script on the example repository 27 | with open(output_file_path, 'w') as output_file: 28 | process_repository(self.example_repo_path, ignore_list, output_file) 29 | 30 | # Compare the output to the expected output 31 | with open(output_file_path, 'r') as output_file, open(expected_output_file_path, 'r') as expected_output_file: 32 | self.assertEqual(output_file.read(), expected_output_file.read()) 33 | 34 | # Clean up the output file 35 | shutil.rmtree(os.path.dirname(output_file_path)) 36 | 37 | def test_placeholder(self): 38 | self.assertTrue(True) 39 | 40 | 41 | if __name__ == '__main__': 42 | unittest.main() 43 |