├── .devcontainer └── devcontainer.json ├── .gitignore ├── README.md ├── github2file-tkinter-GUI.py ├── github2file.py ├── requirements.txt └── ts-js-rust2file.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | "image": "mcr.microsoft.com/devcontainers/python:3.12-bookworm", 4 | "customizations": { 5 | "vscode": { 6 | "settings": { 7 | "python.defaultInterpreterPath": "/usr/local/bin/python", 8 | "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", 9 | "python.formatting.blackPath": "/usr/local/py-utils/bin/black", 10 | "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", 11 | "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", 12 | "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", 13 | "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", 14 | "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", 15 | "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", 16 | "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint" 17 | }, 18 | "extensions": [ 19 | "ms-python.python", 20 | "ms-python.vscode-pylance" 21 | ] 22 | } 23 | }, 24 | "postCreateCommand": "sudo apt-get update && sudo apt-get install -y python3-tk && pip3 install -r requirements.txt" 25 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | venv/ 3 | pharmaink.git_python.txt 4 | pharmaink.git_symfony.txt 5 | 6 | # idx 7 | .idx 8 | 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GitHub Repository to File Converter 2 | 3 | This Python script allows you to download and process files from a GitHub repository, making it easier to share code with chatbots that have large context capabilities but don't automatically download code from GitHub. 4 | 5 | ## Features 6 | 7 | - Download and process files from a GitHub repository 8 | - Support for both public and private repositories 9 | - Filter files based on programming language (Python, Markdown, Go, JavaScript) 10 | - Exclude certain directories, file types, and test files 11 | - Remove comments and docstrings from Python source code (optional) 12 | - Specify a branch or tag to download from (default: "master") 13 | - New GUI feature implemented in `github2file-tkinter-GUI.py` 14 | - New `--claude` option for formatting output for Claude 15 | - New script `ts-js-rust2file.py` for handling TypeScript, JavaScript, Svelte, and Rust files 16 | 17 | ## Install 18 | 19 | - conda create -n g2f python=3.10 20 | - conda activate g2f 21 | - pip install -r requirements.txt 22 | 23 | ## Usage 24 | 25 | To download and process files from a public GitHub repository, run the following command: 26 | 27 | ``` 28 | python github2file.py https://github.com/username/repository 29 | ``` 30 | 31 | For a private repository, use the following format: 32 | 33 | ``` 34 | python github2file.py https://:@github.com/username/repository 35 | ``` 36 | 37 | Replace `` with your GitHub username and `` with your GitHub personal access token. 38 | 39 | ### Optional Arguments 40 | 41 | - `--lang`: Specify the programming language of the repository. Choices: "md", "go", "javascript" or "python" (default: "python"). 42 | - `--keep-comments`: Keep comments and docstrings in the source code (only applicable for Python). 43 | - `--branch_or_tag`: Specify the branch or tag of the repository to download (default: "master"). 44 | - `--claude`: Format the output for Claude with document tags 45 | 46 | ### Example 47 | 48 | To download and process files from the Hugging Face Transformers repository, run: 49 | 50 | ``` 51 | python github2file.py https://github.com/huggingface/transformers 52 | ``` 53 | 54 | This will create a file named `transformers_python.txt` containing the combined Python source code from the repository. 55 | 56 | To download and process files from a private repository, run: 57 | 58 | ``` 59 | python github2file.py https://:@github.com/username/private-repo 60 | ``` 61 | 62 | ## Output 63 | 64 | The script will create a file named `repository_language.txt` (e.g., `transformers_python.txt`) containing the combined source code from the specified repository. You can then share this file with chatbots like Claude for further analysis or discussion. 65 | 66 | ## GUI Usage 67 | 68 | To use the GUI feature, run the following command: 69 | 70 | ``` 71 | python github2file-tkinter-GUI.py 72 | ``` 73 | 74 | This will open a graphical user interface where you can enter the GitHub repository URL and download the combined source code. 75 | 76 | ## New Script for TypeScript, JavaScript, Svelte, and Rust 77 | 78 | To handle TypeScript, JavaScript, Svelte, and Rust files, use the `ts-js-rust2file.py` script. Run the following command: 79 | 80 | ``` 81 | python ts-js-rust2file.py 82 | ``` 83 | 84 | This will create a file named `_code.txt` containing the combined source code from the specified repository. 85 | 86 | ## Requirements 87 | 88 | - Python 3.x 89 | - `requests` library 90 | 91 | ## License 92 | 93 | This project is open-source and available under the [MIT License](LICENSE). 94 | -------------------------------------------------------------------------------- /github2file-tkinter-GUI.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import requests 4 | import zipfile 5 | import io 6 | import ast 7 | import tkinter as tk 8 | from tkinter import filedialog, messagebox, font, ttk 9 | 10 | def is_python_file(file_path): 11 | """Check if the file is a Python file.""" 12 | return file_path.endswith(".py") 13 | 14 | def is_likely_useful_file(file_path): 15 | """Determine if the file is likely to be useful by excluding certain directories and specific file types.""" 16 | excluded_dirs = ["docs", "examples", "tests", "test", "__pycache__", "scripts", "utils", "benchmarks"] 17 | utility_or_config_files = ["hubconf.py", "setup.py"] 18 | github_workflow_or_docs = ["stale.py", "gen-card-", "write_model_card"] 19 | 20 | if any(part.startswith('.') for part in file_path.split('/')): 21 | return False 22 | 23 | if 'test' in file_path.lower(): 24 | return False 25 | 26 | for excluded_dir in excluded_dirs: 27 | if f"/{excluded_dir}/" in file_path or file_path.startswith(excluded_dir + "/"): 28 | return False 29 | 30 | for file_name in utility_or_config_files: 31 | if file_name in file_path: 32 | return False 33 | 34 | for doc_file in github_workflow_or_docs: 35 | if doc_file in file_path: 36 | return False 37 | 38 | return True 39 | 40 | def is_test_file(file_content): 41 | """Determine if the file content suggests it is a test file.""" 42 | test_indicators = ["import unittest", "import pytest", "from unittest", "from pytest"] 43 | return any(indicator in file_content for indicator in test_indicators) 44 | 45 | def has_sufficient_content(file_content, min_line_count=10): 46 | """Check if the file has a minimum number of substantive lines.""" 47 | lines = [line for line in file_content.split('\n') if line.strip() and not line.strip().startswith('#')] 48 | return len(lines) >= min_line_count 49 | 50 | def remove_comments_and_docstrings(source): 51 | """Remove comments and docstrings from the Python source code.""" 52 | tree = ast.parse(source) 53 | for node in ast.walk(tree): 54 | if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)) and ast.get_docstring(node): 55 | node.body = node.body[1:] # Remove docstring 56 | elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str): 57 | node.value.s = "" # Remove comments 58 | 59 | return ast.unparse(tree) 60 | 61 | def download_repo(repo_url, output_file): 62 | """Download and process files from a GitHub repository.""" 63 | response = requests.get(repo_url + "/archive/master.zip") 64 | zip_file = zipfile.ZipFile(io.BytesIO(response.content)) 65 | 66 | with open(output_file, "w", encoding="utf-8") as outfile: 67 | for file_path in zip_file.namelist(): 68 | # Skip directories, non-Python files, less likely useful files, hidden directories, and test files 69 | if file_path.endswith("/") or not is_python_file(file_path) or not is_likely_useful_file(file_path): 70 | continue 71 | 72 | file_content = zip_file.read(file_path).decode("utf-8") 73 | 74 | # Skip test files based on content and files with insufficient substantive content 75 | if is_test_file(file_content) or not has_sufficient_content(file_content): 76 | continue 77 | 78 | try: 79 | file_content = remove_comments_and_docstrings(file_content) 80 | except SyntaxError: 81 | # Skip files with syntax errors 82 | continue 83 | 84 | outfile.write(f"# File: {file_path}\n") 85 | outfile.write(file_content) 86 | outfile.write("\n\n") 87 | 88 | def main(): 89 | root = tk.Tk() 90 | root.title("GitHub Repo Downloader") 91 | root.geometry("500x140") # Make the window 10% shorter 92 | root.configure(bg="#1c1c1c") # Set the background color to a dark shade 93 | 94 | # Custom font 95 | custom_font = font.Font(family="Consolas", size=12) 96 | 97 | # Custom button style 98 | style = ttk.Style() 99 | style.theme_use("clam") 100 | style.configure("TButton", padding=6, relief="flat", background="#00d0ff", foreground="#1c1c1c", font=custom_font) 101 | style.map("TButton", background=[("active", "#00a0c0")]) 102 | 103 | def browse_repo(): 104 | repo_url = repo_entry.get() 105 | if repo_url: 106 | repo_name = repo_url.split("/")[-1] 107 | output_file = f"{repo_name}_python.txt" 108 | download_repo(repo_url, output_file) 109 | messagebox.showinfo("Success", f"Combined Python source code saved to {output_file}", parent=root) 110 | else: 111 | messagebox.showerror("Error", "Please enter a valid GitHub repository URL.", parent=root) 112 | 113 | def browse_file(): 114 | file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt")], parent=root) 115 | if file_path: 116 | repo_url = repo_entry.get() 117 | if repo_url: 118 | download_repo(repo_url, file_path) 119 | messagebox.showinfo("Success", f"Combined Python source code saved to {file_path}", parent=root) 120 | else: 121 | messagebox.showerror("Error", "Please enter a valid GitHub repository URL.", parent=root) 122 | 123 | repo_label = tk.Label(root, text="GitHub Repository URL:", font=custom_font, fg="#00d0ff", bg="#1c1c1c") # Light blue text on dark background 124 | repo_label.pack(pady=10) 125 | 126 | repo_entry = tk.Entry(root, width=40, font=custom_font, bg="#333333", fg="#ffffff") # Light text on dark background 127 | repo_entry.pack() 128 | 129 | button_frame = tk.Frame(root, bg="#1c1c1c") # Dark background for the button frame 130 | button_frame.pack(pady=10) 131 | 132 | download_button = ttk.Button(button_frame, text="Download", command=browse_repo) 133 | download_button.pack(side=tk.LEFT, padx=10) 134 | 135 | save_button = ttk.Button(button_frame, text="Save As...", command=browse_file) 136 | save_button.pack(side=tk.LEFT) 137 | 138 | root.mainloop() 139 | 140 | if __name__ == "__main__": 141 | main() 142 | -------------------------------------------------------------------------------- /github2file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import requests 4 | import zipfile 5 | import io 6 | import ast 7 | import argparse 8 | from typing import List 9 | 10 | def get_language_extensions(language: str) -> List[str]: 11 | """Return a list of file extensions for the specified programming language.""" 12 | language_extensions = { 13 | "python": [".py", ".pyw"], # Add .ipynb extension for Python notebooks 14 | #TODO convert python notebooks to python files or some format that allow conversion between notebook and python file. 15 | "go": [".go"], 16 | "javascript": [".js", ".jsx", ".ts", ".tsx"], 17 | "java": [".java"], 18 | "md": [".md"], # Add .md extension for Markdown files 19 | } 20 | return language_extensions[language.lower()] 21 | 22 | def is_file_type(file_path: str, language: str) -> bool: 23 | """Check if the file has a valid extension for the specified language.""" 24 | extensions = get_language_extensions(language) 25 | return any(file_path.endswith(ext) for ext in extensions) 26 | 27 | def is_likely_useful_file(file_path, lang): 28 | """Determine if the file is likely useful by applying various filters.""" 29 | excluded_dirs = ["examples", "tests", "test", "scripts", "utils", "benchmarks"] 30 | utility_or_config_files = [] 31 | workflow_or_docs = [".github", ".gitlab-ci.yml", ".gitignore", "LICENSE", "README"] 32 | 33 | if lang == "python": 34 | excluded_dirs.append("__pycache__") 35 | utility_or_config_files.extend(["hubconf.py", "setup.py"]) 36 | workflow_or_docs.extend(["stale.py", "gen-card-", "write_model_card"]) 37 | elif lang == "go": 38 | excluded_dirs.append("vendor") 39 | utility_or_config_files.extend(["go.mod", "go.sum", "Makefile"]) 40 | 41 | if any(part.startswith('.') for part in file_path.split('/')): 42 | return False 43 | if 'test' in file_path.lower(): 44 | return False 45 | for excluded_dir in excluded_dirs: 46 | if f"/{excluded_dir}/" in file_path or file_path.startswith(excluded_dir + "/"): 47 | return False 48 | for file_name in utility_or_config_files: 49 | if file_name in file_path: 50 | return False 51 | for doc_file in workflow_or_docs: 52 | if doc_file in file_path: 53 | return False 54 | return True 55 | 56 | def is_test_file(file_content, lang): 57 | """Determine if the file content suggests it is a test file.""" 58 | test_indicators = { 59 | "python": ["import unittest", "import pytest", "from unittest", "from pytest"], 60 | "go": ["import testing", "func Test"] 61 | } 62 | indicators = test_indicators.get(lang, []) 63 | return any(indicator in file_content for indicator in indicators) 64 | 65 | def has_sufficient_content(file_content, min_line_count=10): 66 | """Check if the file has a minimum number of substantive lines.""" 67 | lines = [line for line in file_content.split('\n') if line.strip() and not line.strip().startswith(('#', '//'))] 68 | return len(lines) >= min_line_count 69 | 70 | def remove_comments_and_docstrings(source): 71 | """Remove comments and docstrings from the Python source code.""" 72 | tree = ast.parse(source) 73 | for node in ast.walk(tree): 74 | if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)) and ast.get_docstring(node): 75 | node.body = node.body[1:] # Remove docstring 76 | elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Constant): 77 | node.value.value = "" # Remove comments 78 | return ast.unparse(tree) 79 | 80 | def construct_download_url(repo_url, branch_or_tag): 81 | """Construct the appropriate download URL for GitHub or GitLab based on the provided URL.""" 82 | if "github.com" in repo_url: 83 | return f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip" 84 | elif "gitlab.com" in repo_url: 85 | repo_name = repo_url.rstrip('/').split('/')[-1].replace('.git', '') 86 | return f"{repo_url.rstrip('.git')}/-/archive/{branch_or_tag}/{repo_name}-{branch_or_tag}.zip" 87 | else: 88 | raise ValueError("Unsupported repository URL. Only GitHub and GitLab URLs are supported.") 89 | 90 | def download_repo(repo_url, output_file, lang, keep_comments=False, branch_or_tag="main", token=None, claude=False): 91 | """Download and process files from a GitHub or GitLab repository.""" 92 | download_url = construct_download_url(repo_url, branch_or_tag) 93 | headers = {} 94 | 95 | if token: 96 | if "gitlab.com" in repo_url: 97 | headers['PRIVATE-TOKEN'] = token 98 | elif "github.com" in repo_url: 99 | headers['Authorization'] = f'token {token}' 100 | 101 | print(download_url) 102 | response = requests.get(download_url, headers=headers) 103 | 104 | try: 105 | zip_file = zipfile.ZipFile(io.BytesIO(response.content)) 106 | except zipfile.BadZipFile: 107 | print(f"Error: The downloaded file is not a valid ZIP archive.") 108 | sys.exit(1) 109 | 110 | repo_name = repo_url.split('/')[-1] 111 | output_file = os.path.join(output_folder, f"{repo_name}_{lang}.txt") 112 | if claude: 113 | output_file = os.path.join(output_folder, f"{repo_name}_{lang}-claude.txt") 114 | 115 | with open(output_file, "w", encoding="utf-8") as outfile: 116 | # Include the README file 117 | readme_file_path, readme_content = find_readme_content(zip_file) 118 | 119 | if claude and isinstance(claude, bool): 120 | outfile.write("Here are some documents for you to reference for your task:\n\n") 121 | outfile.write("\n") 122 | 123 | outfile.write("\n") 124 | outfile.write(f"{readme_file_path}\n") 125 | outfile.write(f"\n{readme_content}\n\n") 126 | outfile.write("\n\n") 127 | else: 128 | outfile.write(f"{'// ' if lang == 'go' else '# '}File: {readme_file_path}\n") 129 | outfile.write(readme_content) 130 | outfile.write("\n\n") 131 | 132 | index = 1 133 | for file_path in zip_file.namelist(): 134 | # Skip directories, non-language files, less likely useful files, hidden directories, and test files 135 | if file_path.endswith("/") or not is_file_type(file_path, lang) or not is_likely_useful_file(file_path, lang): 136 | continue 137 | 138 | try: 139 | file_content = zip_file.read(file_path).decode("utf-8", errors="replace") 140 | except UnicodeDecodeError: 141 | print(f"Warning: Skipping file {file_path} due to decoding error.") 142 | continue 143 | 144 | # Skip test files based on content and files with insufficient substantive content 145 | if is_test_file(file_content, lang) or not has_sufficient_content(file_content): 146 | continue 147 | if lang == "python" and not keep_comments: 148 | file_content = remove_comments_and_docstrings(file_content) 149 | 150 | if claude and isinstance(claude, bool): 151 | outfile.write(f"\n") 152 | outfile.write(f"{file_path}\n") 153 | outfile.write(f"\n{file_content}\n\n") 154 | outfile.write("\n\n") 155 | index += 1 156 | else: 157 | outfile.write(f"{'// ' if lang == 'go' else '# '}File: {file_path}\n") 158 | outfile.write(file_content) 159 | outfile.write("\n\n") 160 | 161 | if claude and isinstance(claude, bool): 162 | outfile.write("") 163 | 164 | def find_readme_content(zip_file): 165 | """ 166 | Recursively search for the README file within the ZIP archive and return its content and file path. 167 | """ 168 | readme_file_path = "" 169 | readme_content = "" 170 | for file_path in zip_file.namelist(): 171 | if file_path.endswith("/README.md") or file_path == "README.md": 172 | try: 173 | readme_content = zip_file.read(file_path).decode("utf-8", errors="replace") 174 | readme_file_path = file_path 175 | break 176 | except UnicodeDecodeError: 177 | print(f"Warning: Skipping README.md file due to decoding error.") 178 | 179 | if not readme_content: 180 | for file_path in zip_file.namelist(): 181 | if file_path.endswith("/README") or file_path == "README": 182 | try: 183 | readme_content = zip_file.read(file_path).decode("utf-8", errors="replace") 184 | readme_file_path = file_path 185 | break 186 | except UnicodeDecodeError: 187 | print(f"Warning: Skipping README file due to decoding error.") 188 | 189 | if not readme_content: 190 | readme_content = "No README file found in the repository." 191 | 192 | return readme_file_path, readme_content 193 | 194 | def print_usage(): 195 | print("Usage: python github2file.py [--lang ] [--keep-comments] [--branch_or_tag ] [--claude]") 196 | print("Options:") 197 | print(" The URL of the GitHub repository") 198 | print(" --lang The programming language of the repository (choices: go, python, md). Default: python") 199 | print(" --keep-comments Keep comments and docstrings in the source code (only applicable for Python)") 200 | print(" --branch_or_tag The branch or tag of the repository to download. Default: master") 201 | print(" --claude Format the output for Claude with document tags") 202 | 203 | if __name__ == "__main__": 204 | 205 | parser = argparse.ArgumentParser(description='Download and process files from a GitHub or GitLab repository.') 206 | parser.add_argument('repo_url', type=str, help='The URL of the GitHub or GitLab repository') 207 | parser.add_argument('--lang', type=str, choices=['go', 'python', 'md'], default='python', help='The programming language of the repository') 208 | parser.add_argument('--keep-comments', action='store_true', help='Keep comments and docstrings in the source code (only applicable for Python)') 209 | parser.add_argument('--branch_or_tag', type=str, help='The branch or tag of the repository to download', default="main") 210 | parser.add_argument('--token', type=str, help='Personal access token for private repositories', default=None) 211 | parser.add_argument('--claude', action='store_true', help='Format the output for Claude with document tags') 212 | 213 | args = parser.parse_args() 214 | output_folder = "repos" 215 | os.makedirs(output_folder, exist_ok=True) 216 | output_file_base = f"{args.repo_url.split('/')[-1]}_{args.lang}.txt" 217 | output_file = output_file_base if not args.claude else f"{output_file_base}-claude.txt" 218 | 219 | download_repo(repo_url=args.repo_url, output_file=output_folder, lang=args.lang, keep_comments=args.keep_comments, branch_or_tag=args.branch_or_tag, token=args.token, claude=args.claude) 220 | 221 | print(f"Combined {args.lang.capitalize()} source code saved to {output_file}") 222 | 223 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2024.2.2 2 | charset-normalizer==3.3.2 3 | idna==3.6 4 | requests==2.31.0 5 | tk==0.1.0 6 | urllib3==2.2.1 7 | -------------------------------------------------------------------------------- /ts-js-rust2file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import requests 4 | import zipfile 5 | import io 6 | import ast 7 | 8 | def is_desired_file(file_path): 9 | """Check if the file is a Python, JavaScript, TypeScript, Svelte, or Rust file.""" 10 | return file_path.endswith(".py") or file_path.endswith(".js") or file_path.endswith(".ts") or file_path.endswith(".svelte") or file_path.endswith(".rs") 11 | 12 | def is_likely_useful_file(file_path): 13 | """Determine if the file is likely to be useful by excluding certain directories and specific file types.""" 14 | excluded_dirs = ["docs", "examples", "tests", "test", "__pycache__", "scripts", "utils", "benchmarks", "node_modules", ".venv"] 15 | utility_or_config_files = ["hubconf.py", "setup.py", "package-lock.json"] 16 | github_workflow_or_docs = ["stale.py", "gen-card-", "write_model_card"] 17 | 18 | if any(part.startswith('.') for part in file_path.split('/')): 19 | return False 20 | if 'test' in file_path.lower(): 21 | return False 22 | for excluded_dir in excluded_dirs: 23 | if f"/{excluded_dir}/" in file_path or file_path.startswith( 24 | f"{excluded_dir}/" 25 | ): 26 | return False 27 | for file_name in utility_or_config_files: 28 | if file_name in file_path: 29 | return False 30 | return all(doc_file not in file_path for doc_file in github_workflow_or_docs) 31 | 32 | def has_sufficient_content(file_content, min_line_count=10): 33 | """Check if the file has a minimum number of substantive lines.""" 34 | lines = [line for line in file_content.split('\n') if line.strip() and not line.strip().startswith('#')] 35 | return len(lines) >= min_line_count 36 | 37 | def remove_comments_and_docstrings(source): 38 | """Remove comments and docstrings from the Python source code.""" 39 | tree = ast.parse(source) 40 | for node in ast.walk(tree): 41 | if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)) and ast.get_docstring(node): 42 | node.body = node.body[1:] # Remove docstring 43 | elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str): 44 | node.value.s = "" # Remove comments 45 | return ast.unparse(tree) 46 | 47 | 48 | 49 | def download_repo(repo_url, output_file): 50 | """Download and process files from a GitHub repository.""" 51 | if '/tree/' in repo_url: 52 | repo_url = f'https://download-directory.github.io/?{repo_url}' 53 | 54 | response = requests.get(f"{repo_url}/archive/master.zip") 55 | zip_file = zipfile.ZipFile(io.BytesIO(response.content)) 56 | 57 | with open(output_file, "w", encoding="utf-8") as outfile: 58 | for file_path in zip_file.namelist(): 59 | # Skip directories, non-Python files, less likely useful files, hidden directories, and test files 60 | if file_path.endswith("/") or not is_desired_file(file_path) or not is_likely_useful_file(file_path): 61 | continue 62 | 63 | file_content = zip_file.read(file_path).decode("utf-8") 64 | 65 | # Skip test files based on content and files with insufficient substantive content 66 | if is_desired_file(file_content) or not has_sufficient_content(file_content): 67 | continue 68 | 69 | try: 70 | file_content = remove_comments_and_docstrings(file_content) 71 | except SyntaxError: 72 | # Skip files with syntax errors 73 | continue 74 | 75 | outfile.write(f"# File: {file_path}\n") 76 | outfile.write(file_content) 77 | outfile.write("\n\n") 78 | 79 | if __name__ == "__main__": 80 | if len(sys.argv) != 2: 81 | print("Usage: python script.py ") 82 | sys.exit(1) 83 | 84 | repo_url = sys.argv[1] 85 | repo_name = repo_url.split("/")[-1] 86 | output_file = f"{repo_name}_code.txt" 87 | 88 | download_repo(repo_url, output_file) 89 | print(f"Combined source code saved to {output_file}") 90 | --------------------------------------------------------------------------------