├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── files_to_prompt
    ├── __init__.py
    ├── __main__.py
    └── cli.py
├── pyproject.toml
└── tests
    └── test_files_to_prompt.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | permissions:
 8 |   contents: read
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - name: Set up Python ${{ matrix.python-version }}
19 |       uses: actions/setup-python@v5
20 |       with:
21 |         python-version: ${{ matrix.python-version }}
22 |         cache: pip
23 |         cache-dependency-path: pyproject.toml
24 |     - name: Install dependencies
25 |       run: |
26 |         pip install '.[test]'
27 |     - name: Run tests
28 |       run: |
29 |         pytest
30 |   deploy:
31 |     runs-on: ubuntu-latest
32 |     needs: [test]
33 |     environment: release
34 |     permissions:
35 |       id-token: write
36 |     steps:
37 |     - uses: actions/checkout@v4
38 |     - name: Set up Python
39 |       uses: actions/setup-python@v5
40 |       with:
41 |         python-version: "3.13"
42 |         cache: pip
43 |         cache-dependency-path: pyproject.toml
44 |     - name: Install dependencies
45 |       run: |
46 |         pip install setuptools wheel build
47 |     - name: Build
48 |       run: |
49 |         python -m build
50 |     - name: Publish
51 |       uses: pypa/gh-action-pypi-publish@release/v1
52 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | permissions:
 6 |   contents: read
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
14 |     steps:
15 |     - uses: actions/checkout@v4
16 |     - name: Set up Python ${{ matrix.python-version }}
17 |       uses: actions/setup-python@v4
18 |       with:
19 |         python-version: ${{ matrix.python-version }}
20 |         cache: pip
21 |         cache-dependency-path: pyproject.toml
22 |     - name: Install dependencies
23 |       run: |
24 |         pip install '.[test]'
25 |     - name: Run tests
26 |       run: |
27 |         pytest
28 | 
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | venv
 6 | .eggs
 7 | .pytest_cache
 8 | *.egg-info
 9 | .DS_Store
10 | build/
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # files-to-prompt
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/files-to-prompt.svg)](https://pypi.org/project/files-to-prompt/)
  4 | [![Changelog](https://img.shields.io/github/v/release/simonw/files-to-prompt?include_prereleases&label=changelog)](https://github.com/simonw/files-to-prompt/releases)
  5 | [![Tests](https://github.com/simonw/files-to-prompt/actions/workflows/test.yml/badge.svg)](https://github.com/simonw/files-to-prompt/actions/workflows/test.yml)
  6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/files-to-prompt/blob/master/LICENSE)
  7 | 
  8 | Concatenate a directory full of files into a single prompt for use with LLMs
  9 | 
 10 | For background on this project see [Building files-to-prompt entirely using Claude 3 Opus](https://simonwillison.net/2024/Apr/8/files-to-prompt/).
 11 | 
 12 | ## Installation
 13 | 
 14 | Install this tool using `pip`:
 15 | 
 16 | ```bash
 17 | pip install files-to-prompt
 18 | ```
 19 | 
 20 | ## Usage
 21 | 
 22 | To use `files-to-prompt`, provide the path to one or more files or directories you want to process:
 23 | 
 24 | ```bash
 25 | files-to-prompt path/to/file_or_directory [path/to/another/file_or_directory ...]
 26 | ```
 27 | 
 28 | This will output the contents of every file, with each file preceded by its relative path and separated by `---`.
 29 | 
 30 | ### Options
 31 | 
 32 | - `-e/--extension <extension>`: Only include files with the specified extension. Can be used multiple times.
 33 | 
 34 |   ```bash
 35 |   files-to-prompt path/to/directory -e txt -e md
 36 |   ```
 37 | 
 38 | - `--include-hidden`: Include files and folders starting with `.` (hidden files and directories).
 39 | 
 40 |   ```bash
 41 |   files-to-prompt path/to/directory --include-hidden
 42 |   ```
 43 | 
 44 | - `--ignore <pattern>`: Specify one or more patterns to ignore. Can be used multiple times. Patterns may match file names and directory names, unless you also specify `--ignore-files-only`. Pattern syntax uses [fnmatch](https://docs.python.org/3/library/fnmatch.html), which supports `*`, `?`, `[anychar]`, `[!notchars]` and `[?]` for special character literals.
 45 |   ```bash
 46 |   files-to-prompt path/to/directory --ignore "*.log" --ignore "temp*"
 47 |   ```
 48 | 
 49 | - `--ignore-files-only`: Include directory paths which would otherwise be ignored by an `--ignore` pattern.
 50 | 
 51 |   ```bash
 52 |   files-to-prompt path/to/directory --ignore-files-only --ignore "*dir*"
 53 |   ```
 54 | 
 55 | - `--ignore-gitignore`: Ignore `.gitignore` files and include all files.
 56 | 
 57 |   ```bash
 58 |   files-to-prompt path/to/directory --ignore-gitignore
 59 |   ```
 60 | 
 61 | - `-c/--cxml`: Output in Claude XML format.
 62 | 
 63 |   ```bash
 64 |   files-to-prompt path/to/directory --cxml
 65 |   ```
 66 | 
 67 | - `-m/--markdown`: Output as Markdown with fenced code blocks.
 68 | 
 69 |   ```bash
 70 |   files-to-prompt path/to/directory --markdown
 71 |   ```
 72 | 
 73 | - `-o/--output <file>`: Write the output to a file instead of printing it to the console.
 74 | 
 75 |   ```bash
 76 |   files-to-prompt path/to/directory -o output.txt
 77 |   ```
 78 | 
 79 | - `-n/--line-numbers`: Include line numbers in the output.
 80 | 
 81 |   ```bash
 82 |   files-to-prompt path/to/directory -n
 83 |   ```
 84 |   Example output:
 85 |   ```
 86 |   files_to_prompt/cli.py
 87 |   ---
 88 |     1  import os
 89 |     2  from fnmatch import fnmatch
 90 |     3
 91 |     4  import click
 92 |     ...
 93 |   ```
 94 | 
 95 | - `-0/--null`: Use NUL character as separator when reading paths from stdin. Useful when filenames may contain spaces.
 96 | 
 97 |   ```bash
 98 |   find . -name "*.py" -print0 | files-to-prompt --null
 99 |   ```
100 | 
101 | ### Example
102 | 
103 | Suppose you have a directory structure like this:
104 | 
105 | ```
106 | my_directory/
107 | ├── file1.txt
108 | ├── file2.txt
109 | ├── .hidden_file.txt
110 | ├── temp.log
111 | └── subdirectory/
112 |     └── file3.txt
113 | ```
114 | 
115 | Running `files-to-prompt my_directory` will output:
116 | 
117 | ```
118 | my_directory/file1.txt
119 | ---
120 | Contents of file1.txt
121 | ---
122 | my_directory/file2.txt
123 | ---
124 | Contents of file2.txt
125 | ---
126 | my_directory/subdirectory/file3.txt
127 | ---
128 | Contents of file3.txt
129 | ---
130 | ```
131 | 
132 | If you run `files-to-prompt my_directory --include-hidden`, the output will also include `.hidden_file.txt`:
133 | 
134 | ```
135 | my_directory/.hidden_file.txt
136 | ---
137 | Contents of .hidden_file.txt
138 | ---
139 | ...
140 | ```
141 | 
142 | If you run `files-to-prompt my_directory --ignore "*.log"`, the output will exclude `temp.log`:
143 | 
144 | ```
145 | my_directory/file1.txt
146 | ---
147 | Contents of file1.txt
148 | ---
149 | my_directory/file2.txt
150 | ---
151 | Contents of file2.txt
152 | ---
153 | my_directory/subdirectory/file3.txt
154 | ---
155 | Contents of file3.txt
156 | ---
157 | ```
158 | 
159 | If you run `files-to-prompt my_directory --ignore "sub*"`, the output will exclude all files in `subdirectory/` (unless you also specify `--ignore-files-only`):
160 | 
161 | ```
162 | my_directory/file1.txt
163 | ---
164 | Contents of file1.txt
165 | ---
166 | my_directory/file2.txt
167 | ---
168 | Contents of file2.txt
169 | ---
170 | ```
171 | 
172 | ### Reading from stdin
173 | 
174 | The tool can also read paths from standard input. This can be used to pipe in the output of another command:
175 | 
176 | ```bash
177 | # Find files modified in the last day
178 | find . -mtime -1 | files-to-prompt
179 | ```
180 | 
181 | When using the `--null` (or `-0`) option, paths are expected to be NUL-separated (useful when dealing with filenames containing spaces):
182 | 
183 | ```bash
184 | find . -name "*.txt" -print0 | files-to-prompt --null
185 | ```
186 | 
187 | You can mix and match paths from command line arguments and stdin:
188 | 
189 | ```bash
190 | # Include files modified in the last day, and also include README.md
191 | find . -mtime -1 | files-to-prompt README.md
192 | ```
193 | 
194 | ### Claude XML Output
195 | 
196 | Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.
197 | 
198 | To structure the output in this way, use the optional `--cxml` flag, which will produce output like this:
199 | 
200 | ```xml
201 | <documents>
202 | <document index="1">
203 | <source>my_directory/file1.txt</source>
204 | <document_content>
205 | Contents of file1.txt
206 | </document_content>
207 | </document>
208 | <document index="2">
209 | <source>my_directory/file2.txt</source>
210 | <document_content>
211 | Contents of file2.txt
212 | </document_content>
213 | </document>
214 | </documents>
215 | ```
216 | 
217 | ## --markdown fenced code block output
218 | 
219 | The `--markdown` option will output the files as fenced code blocks, which can be useful for pasting into Markdown documents.
220 | 
221 | ```bash
222 | files-to-prompt path/to/directory --markdown
223 | ```
224 | The language tag will be guessed based on the filename.
225 | 
226 | If the code itself contains triple backticks the wrapper around it will use one additional backtick.
227 | 
228 | Example output:
229 | `````
230 | myfile.py
231 | ```python
232 | def my_function():
233 |     return "Hello, world!"
234 | ```
235 | other.js
236 | ```javascript
237 | function myFunction() {
238 |     return "Hello, world!";
239 | }
240 | ```
241 | file_with_triple_backticks.md
242 | ````markdown
243 | This file has its own
244 | ```
245 | fenced code blocks
246 | ```
247 | Inside it.
248 | ````
249 | `````
250 | 
251 | ## Development
252 | 
253 | To contribute to this tool, first checkout the code. Then create a new virtual environment:
254 | 
255 | ```bash
256 | cd files-to-prompt
257 | python -m venv venv
258 | source venv/bin/activate
259 | ```
260 | 
261 | Now install the dependencies and test dependencies:
262 | 
263 | ```bash
264 | pip install -e '.[test]'
265 | ```
266 | 
267 | To run the tests:
268 | 
269 | ```bash
270 | pytest
271 | ```
272 | 


--------------------------------------------------------------------------------
/files_to_prompt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonw/files-to-prompt/1b234ff6dccb2ca3e56b5c256696558fb85306dc/files_to_prompt/__init__.py


--------------------------------------------------------------------------------
/files_to_prompt/__main__.py:
--------------------------------------------------------------------------------
1 | from .cli import cli
2 | 
3 | if __name__ == "__main__":
4 |     cli()
5 | 


--------------------------------------------------------------------------------
/files_to_prompt/cli.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from fnmatch import fnmatch
  4 | 
  5 | import click
  6 | 
  7 | global_index = 1
  8 | 
  9 | EXT_TO_LANG = {
 10 |     "py": "python",
 11 |     "c": "c",
 12 |     "cpp": "cpp",
 13 |     "java": "java",
 14 |     "js": "javascript",
 15 |     "ts": "typescript",
 16 |     "html": "html",
 17 |     "css": "css",
 18 |     "xml": "xml",
 19 |     "json": "json",
 20 |     "yaml": "yaml",
 21 |     "yml": "yaml",
 22 |     "sh": "bash",
 23 |     "rb": "ruby",
 24 | }
 25 | 
 26 | 
 27 | def should_ignore(path, gitignore_rules):
 28 |     for rule in gitignore_rules:
 29 |         if fnmatch(os.path.basename(path), rule):
 30 |             return True
 31 |         if os.path.isdir(path) and fnmatch(os.path.basename(path) + "/", rule):
 32 |             return True
 33 |     return False
 34 | 
 35 | 
 36 | def read_gitignore(path):
 37 |     gitignore_path = os.path.join(path, ".gitignore")
 38 |     if os.path.isfile(gitignore_path):
 39 |         with open(gitignore_path, "r") as f:
 40 |             return [
 41 |                 line.strip() for line in f if line.strip() and not line.startswith("#")
 42 |             ]
 43 |     return []
 44 | 
 45 | 
 46 | def add_line_numbers(content):
 47 |     lines = content.splitlines()
 48 | 
 49 |     padding = len(str(len(lines)))
 50 | 
 51 |     numbered_lines = [f"{i + 1:{padding}}  {line}" for i, line in enumerate(lines)]
 52 |     return "\n".join(numbered_lines)
 53 | 
 54 | 
 55 | def print_path(writer, path, content, cxml, markdown, line_numbers):
 56 |     if cxml:
 57 |         print_as_xml(writer, path, content, line_numbers)
 58 |     elif markdown:
 59 |         print_as_markdown(writer, path, content, line_numbers)
 60 |     else:
 61 |         print_default(writer, path, content, line_numbers)
 62 | 
 63 | 
 64 | def print_default(writer, path, content, line_numbers):
 65 |     writer(path)
 66 |     writer("---")
 67 |     if line_numbers:
 68 |         content = add_line_numbers(content)
 69 |     writer(content)
 70 |     writer("")
 71 |     writer("---")
 72 | 
 73 | 
 74 | def print_as_xml(writer, path, content, line_numbers):
 75 |     global global_index
 76 |     writer(f'<document index="{global_index}">')
 77 |     writer(f"<source>{path}</source>")
 78 |     writer("<document_content>")
 79 |     if line_numbers:
 80 |         content = add_line_numbers(content)
 81 |     writer(content)
 82 |     writer("</document_content>")
 83 |     writer("</document>")
 84 |     global_index += 1
 85 | 
 86 | 
 87 | def print_as_markdown(writer, path, content, line_numbers):
 88 |     lang = EXT_TO_LANG.get(path.split(".")[-1], "")
 89 |     # Figure out how many backticks to use
 90 |     backticks = "```"
 91 |     while backticks in content:
 92 |         backticks += "`"
 93 |     writer(path)
 94 |     writer(f"{backticks}{lang}")
 95 |     if line_numbers:
 96 |         content = add_line_numbers(content)
 97 |     writer(content)
 98 |     writer(f"{backticks}")
 99 | 
100 | 
101 | def process_path(
102 |     path,
103 |     extensions,
104 |     include_hidden,
105 |     ignore_files_only,
106 |     ignore_gitignore,
107 |     gitignore_rules,
108 |     ignore_patterns,
109 |     writer,
110 |     claude_xml,
111 |     markdown,
112 |     line_numbers=False,
113 | ):
114 |     if os.path.isfile(path):
115 |         try:
116 |             with open(path, "r") as f:
117 |                 print_path(writer, path, f.read(), claude_xml, markdown, line_numbers)
118 |         except UnicodeDecodeError:
119 |             warning_message = f"Warning: Skipping file {path} due to UnicodeDecodeError"
120 |             click.echo(click.style(warning_message, fg="red"), err=True)
121 |     elif os.path.isdir(path):
122 |         for root, dirs, files in os.walk(path):
123 |             if not include_hidden:
124 |                 dirs[:] = [d for d in dirs if not d.startswith(".")]
125 |                 files = [f for f in files if not f.startswith(".")]
126 | 
127 |             if not ignore_gitignore:
128 |                 gitignore_rules.extend(read_gitignore(root))
129 |                 dirs[:] = [
130 |                     d
131 |                     for d in dirs
132 |                     if not should_ignore(os.path.join(root, d), gitignore_rules)
133 |                 ]
134 |                 files = [
135 |                     f
136 |                     for f in files
137 |                     if not should_ignore(os.path.join(root, f), gitignore_rules)
138 |                 ]
139 | 
140 |             if ignore_patterns:
141 |                 if not ignore_files_only:
142 |                     dirs[:] = [
143 |                         d
144 |                         for d in dirs
145 |                         if not any(fnmatch(d, pattern) for pattern in ignore_patterns)
146 |                     ]
147 |                 files = [
148 |                     f
149 |                     for f in files
150 |                     if not any(fnmatch(f, pattern) for pattern in ignore_patterns)
151 |                 ]
152 | 
153 |             if extensions:
154 |                 files = [f for f in files if f.endswith(extensions)]
155 | 
156 |             for file in sorted(files):
157 |                 file_path = os.path.join(root, file)
158 |                 try:
159 |                     with open(file_path, "r") as f:
160 |                         print_path(
161 |                             writer,
162 |                             file_path,
163 |                             f.read(),
164 |                             claude_xml,
165 |                             markdown,
166 |                             line_numbers,
167 |                         )
168 |                 except UnicodeDecodeError:
169 |                     warning_message = (
170 |                         f"Warning: Skipping file {file_path} due to UnicodeDecodeError"
171 |                     )
172 |                     click.echo(click.style(warning_message, fg="red"), err=True)
173 | 
174 | 
175 | def read_paths_from_stdin(use_null_separator):
176 |     if sys.stdin.isatty():
177 |         # No ready input from stdin, don't block for input
178 |         return []
179 | 
180 |     stdin_content = sys.stdin.read()
181 |     if use_null_separator:
182 |         paths = stdin_content.split("\0")
183 |     else:
184 |         paths = stdin_content.split()  # split on whitespace
185 |     return [p for p in paths if p]
186 | 
187 | 
188 | @click.command()
189 | @click.argument("paths", nargs=-1, type=click.Path(exists=True))
190 | @click.option("extensions", "-e", "--extension", multiple=True)
191 | @click.option(
192 |     "--include-hidden",
193 |     is_flag=True,
194 |     help="Include files and folders starting with .",
195 | )
196 | @click.option(
197 |     "--ignore-files-only",
198 |     is_flag=True,
199 |     help="--ignore option only ignores files",
200 | )
201 | @click.option(
202 |     "--ignore-gitignore",
203 |     is_flag=True,
204 |     help="Ignore .gitignore files and include all files",
205 | )
206 | @click.option(
207 |     "ignore_patterns",
208 |     "--ignore",
209 |     multiple=True,
210 |     default=[],
211 |     help="List of patterns to ignore",
212 | )
213 | @click.option(
214 |     "output_file",
215 |     "-o",
216 |     "--output",
217 |     type=click.Path(writable=True),
218 |     help="Output to a file instead of stdout",
219 | )
220 | @click.option(
221 |     "claude_xml",
222 |     "-c",
223 |     "--cxml",
224 |     is_flag=True,
225 |     help="Output in XML-ish format suitable for Claude's long context window.",
226 | )
227 | @click.option(
228 |     "markdown",
229 |     "-m",
230 |     "--markdown",
231 |     is_flag=True,
232 |     help="Output Markdown with fenced code blocks",
233 | )
234 | @click.option(
235 |     "line_numbers",
236 |     "-n",
237 |     "--line-numbers",
238 |     is_flag=True,
239 |     help="Add line numbers to the output",
240 | )
241 | @click.option(
242 |     "--null",
243 |     "-0",
244 |     is_flag=True,
245 |     help="Use NUL character as separator when reading from stdin",
246 | )
247 | @click.version_option()
248 | def cli(
249 |     paths,
250 |     extensions,
251 |     include_hidden,
252 |     ignore_files_only,
253 |     ignore_gitignore,
254 |     ignore_patterns,
255 |     output_file,
256 |     claude_xml,
257 |     markdown,
258 |     line_numbers,
259 |     null,
260 | ):
261 |     """
262 |     Takes one or more paths to files or directories and outputs every file,
263 |     recursively, each one preceded with its filename like this:
264 | 
265 |     \b
266 |         path/to/file.py
267 |         ----
268 |         Contents of file.py goes here
269 |         ---
270 |         path/to/file2.py
271 |         ---
272 |         ...
273 | 
274 |     If the `--cxml` flag is provided, the output will be structured as follows:
275 | 
276 |     \b
277 |         <documents>
278 |         <document path="path/to/file1.txt">
279 |         Contents of file1.txt
280 |         </document>
281 |         <document path="path/to/file2.txt">
282 |         Contents of file2.txt
283 |         </document>
284 |         ...
285 |         </documents>
286 | 
287 |     If the `--markdown` flag is provided, the output will be structured as follows:
288 | 
289 |     \b
290 |         path/to/file1.py
291 |         ```python
292 |         Contents of file1.py
293 |         ```
294 |     """
295 |     # Reset global_index for pytest
296 |     global global_index
297 |     global_index = 1
298 | 
299 |     # Read paths from stdin if available
300 |     stdin_paths = read_paths_from_stdin(use_null_separator=null)
301 | 
302 |     # Combine paths from arguments and stdin
303 |     paths = [*paths, *stdin_paths]
304 | 
305 |     gitignore_rules = []
306 |     writer = click.echo
307 |     fp = None
308 |     if output_file:
309 |         fp = open(output_file, "w", encoding="utf-8")
310 |         writer = lambda s: print(s, file=fp)
311 |     for path in paths:
312 |         if not os.path.exists(path):
313 |             raise click.BadArgumentUsage(f"Path does not exist: {path}")
314 |         if not ignore_gitignore:
315 |             gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
316 |         if claude_xml and path == paths[0]:
317 |             writer("<documents>")
318 |         process_path(
319 |             path,
320 |             extensions,
321 |             include_hidden,
322 |             ignore_files_only,
323 |             ignore_gitignore,
324 |             gitignore_rules,
325 |             ignore_patterns,
326 |             writer,
327 |             claude_xml,
328 |             markdown,
329 |             line_numbers,
330 |         )
331 |     if claude_xml:
332 |         writer("</documents>")
333 |     if fp:
334 |         fp.close()
335 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "files-to-prompt"
 3 | version = "0.6"
 4 | description = "Concatenate a directory full of files into a single prompt for use with LLMs"
 5 | readme = "README.md"
 6 | authors = [{name = "Simon Willison"}]
 7 | license = {text = "Apache-2.0"}
 8 | requires-python = ">=3.8"
 9 | classifiers = [
10 |     "License :: OSI Approved :: Apache Software License"
11 | ]
12 | dependencies = [
13 |     "click"
14 | ]
15 | 
16 | [project.urls]
17 | Homepage = "https://github.com/simonw/files-to-prompt"
18 | Changelog = "https://github.com/simonw/files-to-prompt/releases"
19 | Issues = "https://github.com/simonw/files-to-prompt/issues"
20 | CI = "https://github.com/simonw/files-to-prompt/actions"
21 | 
22 | [project.entry-points.console_scripts]
23 | files-to-prompt = "files_to_prompt.cli:cli"
24 | 
25 | [project.optional-dependencies]
26 | test = ["pytest"]
27 | 


--------------------------------------------------------------------------------
/tests/test_files_to_prompt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | import re
  4 | 
  5 | from click.testing import CliRunner
  6 | 
  7 | from files_to_prompt.cli import cli
  8 | 
  9 | 
 10 | def filenames_from_cxml(cxml_string):
 11 |     "Return set of filenames from <source>...</source> tags"
 12 |     return set(re.findall(r"<source>(.*?)</source>", cxml_string))
 13 | 
 14 | 
 15 | def test_basic_functionality(tmpdir):
 16 |     runner = CliRunner()
 17 |     with tmpdir.as_cwd():
 18 |         os.makedirs("test_dir")
 19 |         with open("test_dir/file1.txt", "w") as f:
 20 |             f.write("Contents of file1")
 21 |         with open("test_dir/file2.txt", "w") as f:
 22 |             f.write("Contents of file2")
 23 | 
 24 |         result = runner.invoke(cli, ["test_dir"])
 25 |         assert result.exit_code == 0
 26 |         assert "test_dir/file1.txt" in result.output
 27 |         assert "Contents of file1" in result.output
 28 |         assert "test_dir/file2.txt" in result.output
 29 |         assert "Contents of file2" in result.output
 30 | 
 31 | 
 32 | def test_include_hidden(tmpdir):
 33 |     runner = CliRunner()
 34 |     with tmpdir.as_cwd():
 35 |         os.makedirs("test_dir")
 36 |         with open("test_dir/.hidden.txt", "w") as f:
 37 |             f.write("Contents of hidden file")
 38 | 
 39 |         result = runner.invoke(cli, ["test_dir"])
 40 |         assert result.exit_code == 0
 41 |         assert "test_dir/.hidden.txt" not in result.output
 42 | 
 43 |         result = runner.invoke(cli, ["test_dir", "--include-hidden"])
 44 |         assert result.exit_code == 0
 45 |         assert "test_dir/.hidden.txt" in result.output
 46 |         assert "Contents of hidden file" in result.output
 47 | 
 48 | 
 49 | def test_ignore_gitignore(tmpdir):
 50 |     runner = CliRunner()
 51 |     with tmpdir.as_cwd():
 52 |         os.makedirs("test_dir")
 53 |         os.makedirs("test_dir/nested_include")
 54 |         os.makedirs("test_dir/nested_ignore")
 55 |         with open("test_dir/.gitignore", "w") as f:
 56 |             f.write("ignored.txt")
 57 |         with open("test_dir/ignored.txt", "w") as f:
 58 |             f.write("This file should be ignored")
 59 |         with open("test_dir/included.txt", "w") as f:
 60 |             f.write("This file should be included")
 61 |         with open("test_dir/nested_include/included2.txt", "w") as f:
 62 |             f.write("This nested file should be included")
 63 |         with open("test_dir/nested_ignore/.gitignore", "w") as f:
 64 |             f.write("nested_ignore.txt")
 65 |         with open("test_dir/nested_ignore/nested_ignore.txt", "w") as f:
 66 |             f.write("This nested file should not be included")
 67 |         with open("test_dir/nested_ignore/actually_include.txt", "w") as f:
 68 |             f.write("This nested file should actually be included")
 69 | 
 70 |         result = runner.invoke(cli, ["test_dir", "-c"])
 71 |         assert result.exit_code == 0
 72 |         filenames = filenames_from_cxml(result.output)
 73 | 
 74 |         assert filenames == {
 75 |             "test_dir/included.txt",
 76 |             "test_dir/nested_include/included2.txt",
 77 |             "test_dir/nested_ignore/actually_include.txt",
 78 |         }
 79 | 
 80 |         result2 = runner.invoke(cli, ["test_dir", "-c", "--ignore-gitignore"])
 81 |         assert result2.exit_code == 0
 82 |         filenames2 = filenames_from_cxml(result2.output)
 83 | 
 84 |         assert filenames2 == {
 85 |             "test_dir/included.txt",
 86 |             "test_dir/ignored.txt",
 87 |             "test_dir/nested_include/included2.txt",
 88 |             "test_dir/nested_ignore/nested_ignore.txt",
 89 |             "test_dir/nested_ignore/actually_include.txt",
 90 |         }
 91 | 
 92 | 
 93 | def test_multiple_paths(tmpdir):
 94 |     runner = CliRunner()
 95 |     with tmpdir.as_cwd():
 96 |         os.makedirs("test_dir1")
 97 |         with open("test_dir1/file1.txt", "w") as f:
 98 |             f.write("Contents of file1")
 99 |         os.makedirs("test_dir2")
100 |         with open("test_dir2/file2.txt", "w") as f:
101 |             f.write("Contents of file2")
102 |         with open("single_file.txt", "w") as f:
103 |             f.write("Contents of single file")
104 | 
105 |         result = runner.invoke(cli, ["test_dir1", "test_dir2", "single_file.txt"])
106 |         assert result.exit_code == 0
107 |         assert "test_dir1/file1.txt" in result.output
108 |         assert "Contents of file1" in result.output
109 |         assert "test_dir2/file2.txt" in result.output
110 |         assert "Contents of file2" in result.output
111 |         assert "single_file.txt" in result.output
112 |         assert "Contents of single file" in result.output
113 | 
114 | 
115 | def test_ignore_patterns(tmpdir):
116 |     runner = CliRunner()
117 |     with tmpdir.as_cwd():
118 |         os.makedirs("test_dir", exist_ok=True)
119 |         with open("test_dir/file_to_ignore.txt", "w") as f:
120 |             f.write("This file should be ignored due to ignore patterns")
121 |         with open("test_dir/file_to_include.txt", "w") as f:
122 |             f.write("This file should be included")
123 | 
124 |         result = runner.invoke(cli, ["test_dir", "--ignore", "*.txt"])
125 |         assert result.exit_code == 0
126 |         assert "test_dir/file_to_ignore.txt" not in result.output
127 |         assert "This file should be ignored due to ignore patterns" not in result.output
128 |         assert "test_dir/file_to_include.txt" not in result.output
129 | 
130 |         os.makedirs("test_dir/test_subdir", exist_ok=True)
131 |         with open("test_dir/test_subdir/any_file.txt", "w") as f:
132 |             f.write("This entire subdirectory should be ignored due to ignore patterns")
133 |         result = runner.invoke(cli, ["test_dir", "--ignore", "*subdir*"])
134 |         assert result.exit_code == 0
135 |         assert "test_dir/test_subdir/any_file.txt" not in result.output
136 |         assert (
137 |             "This entire subdirectory should be ignored due to ignore patterns"
138 |             not in result.output
139 |         )
140 |         assert "test_dir/file_to_include.txt" in result.output
141 |         assert "This file should be included" in result.output
142 |         assert "This file should be included" in result.output
143 | 
144 |         result = runner.invoke(
145 |             cli, ["test_dir", "--ignore", "*subdir*", "--ignore-files-only"]
146 |         )
147 |         assert result.exit_code == 0
148 |         assert "test_dir/test_subdir/any_file.txt" in result.output
149 | 
150 |         result = runner.invoke(cli, ["test_dir", "--ignore", ""])
151 | 
152 | 
153 | def test_specific_extensions(tmpdir):
154 |     runner = CliRunner()
155 |     with tmpdir.as_cwd():
156 |         # Write one.txt one.py two/two.txt two/two.py three.md
157 |         os.makedirs("test_dir/two")
158 |         with open("test_dir/one.txt", "w") as f:
159 |             f.write("This is one.txt")
160 |         with open("test_dir/one.py", "w") as f:
161 |             f.write("This is one.py")
162 |         with open("test_dir/two/two.txt", "w") as f:
163 |             f.write("This is two/two.txt")
164 |         with open("test_dir/two/two.py", "w") as f:
165 |             f.write("This is two/two.py")
166 |         with open("test_dir/three.md", "w") as f:
167 |             f.write("This is three.md")
168 | 
169 |         # Try with -e py -e md
170 |         result = runner.invoke(cli, ["test_dir", "-e", "py", "-e", "md"])
171 |         assert result.exit_code == 0
172 |         assert ".txt" not in result.output
173 |         assert "test_dir/one.py" in result.output
174 |         assert "test_dir/two/two.py" in result.output
175 |         assert "test_dir/three.md" in result.output
176 | 
177 | 
178 | def test_mixed_paths_with_options(tmpdir):
179 |     runner = CliRunner()
180 |     with tmpdir.as_cwd():
181 |         os.makedirs("test_dir")
182 |         with open("test_dir/.gitignore", "w") as f:
183 |             f.write("ignored_in_gitignore.txt\n.hidden_ignored_in_gitignore.txt")
184 |         with open("test_dir/ignored_in_gitignore.txt", "w") as f:
185 |             f.write("This file should be ignored by .gitignore")
186 |         with open("test_dir/.hidden_ignored_in_gitignore.txt", "w") as f:
187 |             f.write("This hidden file should be ignored by .gitignore")
188 |         with open("test_dir/included.txt", "w") as f:
189 |             f.write("This file should be included")
190 |         with open("test_dir/.hidden_included.txt", "w") as f:
191 |             f.write("This hidden file should be included")
192 |         with open("single_file.txt", "w") as f:
193 |             f.write("Contents of single file")
194 | 
195 |         result = runner.invoke(cli, ["test_dir", "single_file.txt"])
196 |         assert result.exit_code == 0
197 |         assert "test_dir/ignored_in_gitignore.txt" not in result.output
198 |         assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
199 |         assert "test_dir/included.txt" in result.output
200 |         assert "test_dir/.hidden_included.txt" not in result.output
201 |         assert "single_file.txt" in result.output
202 |         assert "Contents of single file" in result.output
203 | 
204 |         result = runner.invoke(cli, ["test_dir", "single_file.txt", "--include-hidden"])
205 |         assert result.exit_code == 0
206 |         assert "test_dir/ignored_in_gitignore.txt" not in result.output
207 |         assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
208 |         assert "test_dir/included.txt" in result.output
209 |         assert "test_dir/.hidden_included.txt" in result.output
210 |         assert "single_file.txt" in result.output
211 |         assert "Contents of single file" in result.output
212 | 
213 |         result = runner.invoke(
214 |             cli, ["test_dir", "single_file.txt", "--ignore-gitignore"]
215 |         )
216 |         assert result.exit_code == 0
217 |         assert "test_dir/ignored_in_gitignore.txt" in result.output
218 |         assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output
219 |         assert "test_dir/included.txt" in result.output
220 |         assert "test_dir/.hidden_included.txt" not in result.output
221 |         assert "single_file.txt" in result.output
222 |         assert "Contents of single file" in result.output
223 | 
224 |         result = runner.invoke(
225 |             cli,
226 |             ["test_dir", "single_file.txt", "--ignore-gitignore", "--include-hidden"],
227 |         )
228 |         assert result.exit_code == 0
229 |         assert "test_dir/ignored_in_gitignore.txt" in result.output
230 |         assert "test_dir/.hidden_ignored_in_gitignore.txt" in result.output
231 |         assert "test_dir/included.txt" in result.output
232 |         assert "test_dir/.hidden_included.txt" in result.output
233 |         assert "single_file.txt" in result.output
234 |         assert "Contents of single file" in result.output
235 | 
236 | 
237 | def test_binary_file_warning(tmpdir):
238 |     runner = CliRunner(mix_stderr=False)
239 |     with tmpdir.as_cwd():
240 |         os.makedirs("test_dir")
241 |         with open("test_dir/binary_file.bin", "wb") as f:
242 |             f.write(b"\xff")
243 |         with open("test_dir/text_file.txt", "w") as f:
244 |             f.write("This is a text file")
245 | 
246 |         result = runner.invoke(cli, ["test_dir"])
247 |         assert result.exit_code == 0
248 | 
249 |         stdout = result.stdout
250 |         stderr = result.stderr
251 | 
252 |         assert "test_dir/text_file.txt" in stdout
253 |         assert "This is a text file" in stdout
254 |         assert "\ntest_dir/binary_file.bin" not in stdout
255 |         assert (
256 |             "Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError"
257 |             in stderr
258 |         )
259 | 
260 | 
261 | @pytest.mark.parametrize(
262 |     "args", (["test_dir"], ["test_dir/file1.txt", "test_dir/file2.txt"])
263 | )
264 | def test_xml_format_dir(tmpdir, args):
265 |     runner = CliRunner()
266 |     with tmpdir.as_cwd():
267 |         os.makedirs("test_dir")
268 |         with open("test_dir/file1.txt", "w") as f:
269 |             f.write("Contents of file1.txt")
270 |         with open("test_dir/file2.txt", "w") as f:
271 |             f.write("Contents of file2.txt")
272 |         result = runner.invoke(cli, args + ["--cxml"])
273 |         assert result.exit_code == 0
274 |         actual = result.output
275 |         expected = """
276 | <documents>
277 | <document index="1">
278 | <source>test_dir/file1.txt</source>
279 | <document_content>
280 | Contents of file1.txt
281 | </document_content>
282 | </document>
283 | <document index="2">
284 | <source>test_dir/file2.txt</source>
285 | <document_content>
286 | Contents of file2.txt
287 | </document_content>
288 | </document>
289 | </documents>
290 | """
291 |         assert expected.strip() == actual.strip()
292 | 
293 | 
294 | @pytest.mark.parametrize("arg", ("-o", "--output"))
295 | def test_output_option(tmpdir, arg):
296 |     runner = CliRunner()
297 |     with tmpdir.as_cwd():
298 |         os.makedirs("test_dir")
299 |         with open("test_dir/file1.txt", "w") as f:
300 |             f.write("Contents of file1.txt")
301 |         with open("test_dir/file2.txt", "w") as f:
302 |             f.write("Contents of file2.txt")
303 |         output_file = "output.txt"
304 |         result = runner.invoke(
305 |             cli, ["test_dir", arg, output_file], catch_exceptions=False
306 |         )
307 |         assert result.exit_code == 0
308 |         assert not result.output
309 |         with open(output_file, "r") as f:
310 |             actual = f.read()
311 |         expected = """
312 | test_dir/file1.txt
313 | ---
314 | Contents of file1.txt
315 | 
316 | ---
317 | test_dir/file2.txt
318 | ---
319 | Contents of file2.txt
320 | 
321 | ---
322 | """
323 |         assert expected.strip() == actual.strip()
324 | 
325 | 
326 | def test_line_numbers(tmpdir):
327 |     runner = CliRunner()
328 |     with tmpdir.as_cwd():
329 |         os.makedirs("test_dir")
330 |         test_content = "First line\nSecond line\nThird line\nFourth line\n"
331 |         with open("test_dir/multiline.txt", "w") as f:
332 |             f.write(test_content)
333 | 
334 |         result = runner.invoke(cli, ["test_dir"])
335 |         assert result.exit_code == 0
336 |         assert "1  First line" not in result.output
337 |         assert test_content in result.output
338 | 
339 |         result = runner.invoke(cli, ["test_dir", "-n"])
340 |         assert result.exit_code == 0
341 |         assert "1  First line" in result.output
342 |         assert "2  Second line" in result.output
343 |         assert "3  Third line" in result.output
344 |         assert "4  Fourth line" in result.output
345 | 
346 |         result = runner.invoke(cli, ["test_dir", "--line-numbers"])
347 |         assert result.exit_code == 0
348 |         assert "1  First line" in result.output
349 |         assert "2  Second line" in result.output
350 |         assert "3  Third line" in result.output
351 |         assert "4  Fourth line" in result.output
352 | 
353 | 
354 | @pytest.mark.parametrize(
355 |     "input,extra_args",
356 |     (
357 |         ("test_dir1/file1.txt\ntest_dir2/file2.txt", []),
358 |         ("test_dir1/file1.txt\ntest_dir2/file2.txt", []),
359 |         ("test_dir1/file1.txt\0test_dir2/file2.txt", ["--null"]),
360 |         ("test_dir1/file1.txt\0test_dir2/file2.txt", ["-0"]),
361 |     ),
362 | )
363 | def test_reading_paths_from_stdin(tmpdir, input, extra_args):
364 |     runner = CliRunner()
365 |     with tmpdir.as_cwd():
366 |         # Create test files
367 |         os.makedirs("test_dir1")
368 |         os.makedirs("test_dir2")
369 |         with open("test_dir1/file1.txt", "w") as f:
370 |             f.write("Contents of file1")
371 |         with open("test_dir2/file2.txt", "w") as f:
372 |             f.write("Contents of file2")
373 | 
374 |         # Test space-separated paths from stdin
375 |         result = runner.invoke(cli, args=extra_args, input=input)
376 |         assert result.exit_code == 0
377 |         assert "test_dir1/file1.txt" in result.output
378 |         assert "Contents of file1" in result.output
379 |         assert "test_dir2/file2.txt" in result.output
380 |         assert "Contents of file2" in result.output
381 | 
382 | 
383 | def test_paths_from_arguments_and_stdin(tmpdir):
384 |     runner = CliRunner()
385 |     with tmpdir.as_cwd():
386 |         # Create test files
387 |         os.makedirs("test_dir1")
388 |         os.makedirs("test_dir2")
389 |         with open("test_dir1/file1.txt", "w") as f:
390 |             f.write("Contents of file1")
391 |         with open("test_dir2/file2.txt", "w") as f:
392 |             f.write("Contents of file2")
393 | 
394 |         # Test paths from arguments and stdin
395 |         result = runner.invoke(
396 |             cli,
397 |             args=["test_dir1"],
398 |             input="test_dir2/file2.txt",
399 |         )
400 |         assert result.exit_code == 0
401 |         assert "test_dir1/file1.txt" in result.output
402 |         assert "Contents of file1" in result.output
403 |         assert "test_dir2/file2.txt" in result.output
404 |         assert "Contents of file2" in result.output
405 | 
406 | 
407 | @pytest.mark.parametrize("option", ("-m", "--markdown"))
408 | def test_markdown(tmpdir, option):
409 |     runner = CliRunner()
410 |     with tmpdir.as_cwd():
411 |         os.makedirs("test_dir")
412 |         with open("test_dir/python.py", "w") as f:
413 |             f.write("This is python")
414 |         with open("test_dir/python_with_quad_backticks.py", "w") as f:
415 |             f.write("This is python with ```` in it already")
416 |         with open("test_dir/code.js", "w") as f:
417 |             f.write("This is javascript")
418 |         with open("test_dir/code.unknown", "w") as f:
419 |             f.write("This is an unknown file type")
420 |         result = runner.invoke(cli, ["test_dir", option])
421 |         assert result.exit_code == 0
422 |         actual = result.output
423 |         expected = (
424 |             "test_dir/code.js\n"
425 |             "```javascript\n"
426 |             "This is javascript\n"
427 |             "```\n"
428 |             "test_dir/code.unknown\n"
429 |             "```\n"
430 |             "This is an unknown file type\n"
431 |             "```\n"
432 |             "test_dir/python.py\n"
433 |             "```python\n"
434 |             "This is python\n"
435 |             "```\n"
436 |             "test_dir/python_with_quad_backticks.py\n"
437 |             "`````python\n"
438 |             "This is python with ```` in it already\n"
439 |             "`````\n"
440 |         )
441 |         assert expected.strip() == actual.strip()
442 | 


--------------------------------------------------------------------------------