├── LICENSE ├── README.md ├── backend ├── .env.template ├── .gitignore ├── README.md ├── app.py ├── crons │ └── repo_metadata_cron.py ├── poetry.lock ├── pyproject.toml ├── src │ ├── gitsummarize │ │ ├── __init__.py │ │ ├── auth │ │ │ ├── auth.py │ │ │ └── key_manager.py │ │ ├── clients │ │ │ ├── ai_client_abc.py │ │ │ ├── github.py │ │ │ ├── google_genai.py │ │ │ ├── openai.py │ │ │ └── supabase.py │ │ ├── constants │ │ │ └── constants.py │ │ ├── exceptions │ │ │ └── exceptions.py │ │ ├── model │ │ │ └── repo_metadata.py │ │ └── prompts │ │ │ ├── business_logic.py │ │ │ ├── resource_repo.py │ │ │ └── technical_documentation.py │ └── scripts │ │ ├── get_popular_gh_repos.py │ │ └── precache_repos.py └── tests │ └── __init__.py └── frontend ├── LICENSE ├── README.md ├── components.json ├── next-env.d.ts ├── next.config.js ├── package-lock.json ├── package.json ├── postcss.config.js ├── prettier.config.js ├── public ├── favicon.ico ├── favicon.png └── og-image.png ├── src ├── app │ ├── [username] │ │ └── [repo] │ │ │ └── page.tsx │ ├── _actions │ │ ├── cache.ts │ │ ├── github.ts │ │ └── repo.ts │ ├── api │ │ └── generate │ │ │ └── route.ts │ ├── layout.tsx │ ├── page.tsx │ └── providers.tsx ├── components │ ├── action-button.tsx │ ├── api-key-button.tsx │ ├── api-key-dialog.tsx │ ├── copy-button.tsx │ ├── customization-dropdown.tsx │ ├── export-dropdown.tsx │ ├── footer.tsx │ ├── header.tsx │ ├── hero.tsx │ ├── loading-animation.tsx │ ├── loading.tsx │ ├── main-card.tsx │ ├── mermaid-diagram.tsx │ ├── private-repos-dialog.tsx │ └── ui │ │ ├── button.tsx │ │ ├── card.tsx │ │ ├── dialog.tsx │ │ ├── input.tsx │ │ ├── progress.tsx │ │ ├── switch.tsx │ │ ├── textarea.tsx │ │ └── tooltip.tsx ├── env.js ├── lib │ ├── exampleRepos.ts │ ├── github.ts │ ├── supabase.ts │ └── utils.ts ├── server │ └── db │ │ ├── index.ts │ │ └── schema.ts └── styles │ └── globals.css ├── tailwind.config.ts └── tsconfig.json /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2025 Antarix 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🧠 GitSummarize 2 | 3 | Image 4 | 5 |
6 | 7 | ## **Generate beautiful, world-class documentation from any GitHub repository — instantly.** 8 | 9 | Just replace `hub` with `summarize` in any GitHub URL to generate a live, interactive documentation hub. 10 | 11 | https://gitsummarize.com/ 12 |
13 | 14 | --- 15 | 16 | ## 🚀 Features 17 | 18 | GitSummarize analyzes any GitHub repo (public or private) and generates: 19 | 20 | - 📄 **System-level architecture overviews** 21 | - 📁 **Per-directory and file summaries** 22 | - 🧠 **Natural language descriptions of purpose, flow, and structure** 23 | - 🔗 **Business Logic and Rules Extraction** 24 | - 📊 **Architecture diagrams and flows** 25 | 26 | It’s perfect for onboarding, exploring unfamiliar codebases, and writing technical documentation — all powered by Gemini. 27 | 28 | --- 29 | 30 | ## 🧰 Tech Stack 31 | 32 | | Area | Stack | 33 | |------------|-------| 34 | | **Frontend** | Next.js, TypeScript, Tailwind CSS, ShadCN | 35 | | **Backend** | FastAPI, Python, Server Actions | 36 | | **Database** | PostgreSQL (Supabase) | 37 | | **AI** | Gemini 2.5 Pro | 38 | | **Analytics**| PostHog | 39 | | **Hosting** | Vercel (Frontend), Render (Backend) | 40 | 41 | --- 42 | 43 | ## 🤔 Why GitSummarize? 44 | 45 | We wanted to contribute to open-source projects but found it difficult to understand massive codebases quickly. 46 | 47 | GitSummarize automates the hardest part: figuring out *what the code does* and *how it's structured* — giving you useful documentation and high level overview of the codebase. 48 | 49 | --- 50 | 51 | ## 🧪 Local Development / Self-Hosting 52 | 53 | 1. **Clone the repo** 54 | ```bash 55 | git clone https://github.com/antarixxx/gitsummarize 56 | cd gitsummarize 57 | ``` 58 | 59 | 2. **Run the Next.js Project** 60 | ```bash 61 | npm run dev 62 | ``` 63 | 64 | You can now access the website at `localhost:3000`. 65 | 66 | 67 | ## Contributing 68 | 69 | Contributions are welcome! Please feel free to submit a Pull Request. 70 | 71 | ## Acknowledgements 72 | 73 | Shoutout to [GitIngest](https://gitingest.com/) and [GitDiagram](https://gitdiagram.com/) for the inspiration and styling. 74 | 75 | ## 📈 Rate Limits 76 | 77 | We are currently hosting it for free with rate limits though this is somewhat likely to change in the future based on Gemini's API policies. 78 | 79 | ## 🤔 Future Steps 80 | 81 | - Expand documentation to cover more topics (Setup, Onboarding Guide) 82 | - Add Architecture Diagrams 83 | 84 | 85 | ## Star History 86 | 87 | [![Star History Chart](https://api.star-history.com/svg?repos=antarixxx/gitsummarize&type=Date)](https://www.star-history.com/#antarixxx/gitsummarize&Date) 88 | -------------------------------------------------------------------------------- /backend/.env.template: -------------------------------------------------------------------------------- 1 | PYTHON_VERSION=3.13.2 2 | POETRY_VERSION=2.1.2 3 | PORT=8000 4 | API_TOKEN= 5 | NUM_GEMINI_KEYS=1 6 | GEMINI_API_KEY_1= 7 | GITHUB_TOKEN= 8 | SUPABASE_URL= 9 | SUPABASE_ANON_KEY= 10 | SUPABASE_ADMIN_KEY= 11 | OPENAI_API_KEY= -------------------------------------------------------------------------------- /backend/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | 176 | tmp/ 177 | 178 | .DS_Store 179 | -------------------------------------------------------------------------------- /backend/README.md: -------------------------------------------------------------------------------- 1 | # gitsummarize backend 2 | 3 | ## Setup 4 | 5 | Make sure that you have `cd`-ed into `backend`. 6 | 7 | 1. Create a new Python environment (Python 3.13 or above) using `conda`, `venv`, `pipx`, etc. 8 | 2. Install Poetry: `pip install poetry==2.1.2`. 9 | 3. Install dependencies: `poetry install`. 10 | 4. Create a new `.env` file and copy the contents of `.env.template` to it. 11 | 5. Add your GitHub API key to `.env`. 12 | 6. Create a new Supabase project with a table with the following columns: 13 | ```psql 14 | id uuid 15 | repo_url text 16 | business_summary text 17 | technical_documentation text 18 | created_at timestamptz 19 | ``` 20 | 7. Add Supabase keys to `.env`. 21 | 8. Add Gemini API keys to `.env`. You can add as many keys as you want. 22 | 9. Set `API_TOKEN` in `.env` to anything you want (preferably something secure). 23 | 24 | ## Running 25 | Again, make sure that you have `cd`-ed into `backend`. 26 | 27 | Run `fastapi run app.py`. Go to `http://0.0.0.0:8000/docs` to see the OpenAPI documentation. 28 | -------------------------------------------------------------------------------- /backend/app.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | from typing import Optional 5 | from dotenv import load_dotenv 6 | from fastapi import FastAPI, HTTPException, Depends 7 | from fastapi.responses import JSONResponse 8 | from gitsummarize.exceptions.exceptions import GitHubAccessError 9 | from pydantic import BaseModel 10 | 11 | from gitsummarize.auth.auth import verify_token 12 | from gitsummarize.auth.key_manager import KeyGroup, KeyManager 13 | from gitsummarize.clients.openai import OpenAIClient 14 | from gitsummarize.clients.supabase import SupabaseClient 15 | from src.gitsummarize.clients.github import GithubClient 16 | from src.gitsummarize.clients.google_genai import GoogleGenAI 17 | 18 | load_dotenv() 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | gh = GithubClient(os.getenv("GITHUB_TOKEN")) 23 | openai = OpenAIClient(os.getenv("OPENAI_API_KEY")) 24 | supabase = SupabaseClient(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_ADMIN_KEY")) 25 | 26 | key_manager = KeyManager() 27 | for i in range(1, int(os.getenv("NUM_GEMINI_KEYS")) + 1): 28 | key_manager.add_key(KeyGroup.GEMINI, os.getenv(f"GEMINI_API_KEY_{i}")) 29 | 30 | 31 | app = FastAPI() 32 | 33 | 34 | class SummarizeRequest(BaseModel): 35 | repo_url: str 36 | gemini_key: Optional[str] = None 37 | 38 | 39 | @app.post("/summarize", operation_id="summarize_repo") 40 | async def summarize(request: SummarizeRequest, _: str = Depends(verify_token)): 41 | if not _validate_repo_url(request.repo_url): 42 | raise HTTPException(status_code=400, detail="Invalid GitHub URL") 43 | logger.info(f"Summarizing repository: {request.repo_url}") 44 | 45 | directory_structure = await gh.get_directory_structure_from_url(request.repo_url) 46 | all_content = await gh.get_all_content_from_url(request.repo_url) 47 | 48 | key_1 = request.gemini_key or key_manager.get_key(KeyGroup.GEMINI) 49 | key_2 = request.gemini_key or key_manager.get_key(KeyGroup.GEMINI) 50 | 51 | try: 52 | client_1, client_2 = GoogleGenAI(key_1), GoogleGenAI(key_2) 53 | business_summary = await client_1.get_business_summary( 54 | directory_structure, all_content 55 | ) 56 | technical_documentation = await client_2.get_technical_documentation( 57 | directory_structure, all_content 58 | ) 59 | except Exception as e: 60 | raise HTTPException(status_code=500, detail=str(e)) 61 | 62 | supabase.insert_repo_summary( 63 | request.repo_url, business_summary, technical_documentation 64 | ) 65 | try: 66 | await _update_repo_metadata(request.repo_url) 67 | except GitHubAccessError as e: 68 | logger.error(f"Error updating repo metadata for {request.repo_url}: {e}") 69 | return JSONResponse(content={"message": "Repository summarized successfully"}) 70 | 71 | 72 | @app.post("/repo-metadata-cron") 73 | async def repo_metadata_cron(_: str = Depends(verify_token)): 74 | repo_urls = supabase.get_all_repo_urls() 75 | for repo_url in repo_urls: 76 | try: 77 | metadata = await gh.get_repo_metadata_from_url(repo_url) 78 | supabase.upsert_repo_metadata(repo_url, metadata) 79 | except GitHubAccessError as e: 80 | logger.error(f"Error updating repo metadata for {repo_url}: {e}") 81 | 82 | 83 | @app.post("/summarize-local", operation_id="summarize_store_local") 84 | async def summarize_store_local( 85 | request: SummarizeRequest, _: str = Depends(verify_token) 86 | ): 87 | if not _validate_repo_url(request.repo_url): 88 | raise HTTPException(status_code=400, detail="Invalid GitHub URL") 89 | logger.info(f"Summarizing repository: {request.repo_url}") 90 | 91 | directory_structure = await gh.get_directory_structure_from_url(request.repo_url) 92 | all_content = await gh.get_all_content_from_url(request.repo_url) 93 | 94 | business_summary, technical_documentation = await asyncio.gather( 95 | openai.get_business_summary(directory_structure, all_content), 96 | openai.get_technical_documentation(directory_structure, all_content), 97 | ) 98 | 99 | with open("tmp/openai/business_summary.txt", "w") as f: 100 | f.write(business_summary) 101 | with open("tmp/openai/technical_documentation.txt", "w") as f: 102 | f.write(technical_documentation) 103 | 104 | 105 | def _validate_repo_url(repo_url: str) -> bool: 106 | return repo_url.startswith("https://github.com/") 107 | 108 | 109 | async def _update_repo_metadata(repo_url: str): 110 | try: 111 | metadata = await gh.get_repo_metadata_from_url(repo_url) 112 | supabase.upsert_repo_metadata(repo_url, metadata) 113 | except GitHubAccessError as e: 114 | logger.error(f"Error updating repo metadata for {repo_url}: {e}") 115 | -------------------------------------------------------------------------------- /backend/crons/repo_metadata_cron.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from gitsummarize.clients import supabase 4 | from gitsummarize.clients.github import GithubClient 5 | import os 6 | 7 | from gitsummarize.exceptions.exceptions import GitHubAccessError 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | gh = GithubClient(os.getenv("GITHUB_TOKEN")) 12 | supabase = supabase.SupabaseClient(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_ADMIN_KEY")) 13 | 14 | async def main(): 15 | repo_urls = supabase.get_all_repo_urls() 16 | for repo_url in repo_urls: 17 | try: 18 | metadata = await gh.get_repo_metadata_from_url(repo_url) 19 | supabase.upsert_repo_metadata(repo_url, metadata) 20 | except GitHubAccessError as e: 21 | logger.error(f"Error updating repo metadata for {repo_url}: {e}") 22 | 23 | 24 | if __name__ == "__main__": 25 | asyncio.run(main()) 26 | -------------------------------------------------------------------------------- /backend/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "gitsummarize" 3 | version = "0.1.0" 4 | description = "" 5 | authors = [ 6 | {name = "Antarix N",email = "antarix18@gmail.com"} 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.13,<4.0" 10 | dependencies = [ 11 | "google-genai (>=1.8.0,<2.0.0)", 12 | "fastapi[standard] (>=0.115.12,<0.116.0)", 13 | "aiohttp (>=3.11.14,<4.0.0)", 14 | "python-dotenv (>=1.1.0,<2.0.0)", 15 | "supabase (>=2.15.0,<3.0.0)", 16 | "openai (>=1.69.0,<2.0.0)", 17 | ] 18 | 19 | [tool.poetry] 20 | packages = [{include = "gitsummarize", from = "src"}] 21 | 22 | 23 | [build-system] 24 | requires = ["poetry-core>=2.0.0,<3.0.0"] 25 | build-backend = "poetry.core.masonry.api" 26 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antarixxx/gitsummarize/0c6781f953f51b5539a01ebbbffec5ff15f4e7b8/backend/src/gitsummarize/__init__.py -------------------------------------------------------------------------------- /backend/src/gitsummarize/auth/auth.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | from fastapi import Security, HTTPException 3 | from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer 4 | import os 5 | 6 | load_dotenv() 7 | 8 | API_TOKEN = os.getenv("API_TOKEN") 9 | 10 | security_scheme = HTTPBearer(description="Enter your API token") 11 | 12 | def verify_token(credentials: HTTPAuthorizationCredentials = Security(security_scheme)): 13 | if not credentials: 14 | raise HTTPException(status_code=401, detail="Authorization header missing") 15 | 16 | token = credentials.credentials 17 | if token != API_TOKEN: 18 | raise HTTPException(status_code=401, detail="Invalid token") 19 | 20 | return token -------------------------------------------------------------------------------- /backend/src/gitsummarize/auth/key_manager.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from enum import StrEnum 3 | import itertools 4 | import random 5 | 6 | 7 | class KeyGroup(StrEnum): 8 | OPENAI = "openai" 9 | GEMINI = "gemini" 10 | 11 | 12 | class KeyManager: 13 | def __init__(self): 14 | self.keys = defaultdict(list) 15 | self.iterators = {} 16 | 17 | def add_key(self, group: KeyGroup, key: str): 18 | self.keys[group].append(key) 19 | self.iterators[group] = itertools.cycle(self.keys[group]) 20 | random.shuffle(self.keys[group]) 21 | 22 | def get_key(self, group: KeyGroup) -> str: 23 | return next(self.iterators[group]) 24 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/clients/ai_client_abc.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class AIBaseClient(ABC): 5 | @abstractmethod 6 | def get_business_summary(self, prompt: str) -> str: 7 | pass 8 | 9 | @abstractmethod 10 | def get_technical_documentation(self, prompt: str) -> str: 11 | pass 12 | 13 | def _truncate_text(self, text: str, max_tokens: int, multiplier: float = 3.7) -> str: 14 | return text[: int(max_tokens * multiplier)] -------------------------------------------------------------------------------- /backend/src/gitsummarize/clients/github.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import base64 3 | from itertools import batched 4 | import logging 5 | from pathlib import Path 6 | from textwrap import dedent 7 | import zipfile 8 | import aiohttp 9 | from typing import Dict, List 10 | 11 | from gitsummarize.constants.constants import VALID_FILE_EXTENSIONS 12 | from gitsummarize.exceptions.exceptions import ( 13 | GitHubAccessError, 14 | GitHubNotFoundError, 15 | GitHubRateLimitError, 16 | GitHubTreeError, 17 | ) 18 | from gitsummarize.model.repo_metadata import RepoMetadata 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | FILE_LIMIT = 100 * 1000 # 100kb 23 | 24 | 25 | class GithubClient: 26 | def __init__(self, token: str): 27 | self.token = token 28 | self.headers = {"Authorization": f"Bearer {self.token}"} 29 | 30 | async def get_repo_metadata_from_url(self, gh_url: str) -> RepoMetadata: 31 | owner, repo = self._parse_gh_url(gh_url) 32 | return await self.get_repo_metadata(owner, repo) 33 | 34 | async def get_repo_metadata(self, owner: str, repo: str) -> RepoMetadata: 35 | url = f"https://api.github.com/repos/{owner}/{repo}" 36 | async with aiohttp.ClientSession() as session: 37 | async with session.get(url, headers=self.headers) as response: 38 | data = await response.json() 39 | if response.status != 200: 40 | raise GitHubAccessError(owner, repo) 41 | return RepoMetadata( 42 | num_stars=data["stargazers_count"], 43 | num_forks=data["forks_count"], 44 | language=data["language"], 45 | description=data["description"], 46 | ) 47 | 48 | async def get_all_content_from_url(self, gh_url: str) -> str: 49 | owner, repo = self._parse_gh_url(gh_url) 50 | zip_path = await self.download_repository_zip(owner, repo) 51 | return await self.get_all_content_from_zip(zip_path) 52 | 53 | async def get_directory_structure_from_url(self, gh_url: str) -> str: 54 | owner, repo = self._parse_gh_url(gh_url) 55 | return await self.get_directory_structure(owner, repo) 56 | 57 | async def get_all_content_from_zip(self, path: Path) -> str: 58 | with zipfile.ZipFile(path, "r") as zip_ref: 59 | valid_files = [ 60 | file 61 | for file in zip_ref.namelist() 62 | if file.endswith(VALID_FILE_EXTENSIONS) 63 | ] 64 | 65 | formatted_content = [] 66 | for file in valid_files: 67 | try: 68 | if zip_ref.getinfo(file).file_size > FILE_LIMIT: 69 | logger.warning(f"Skipping file: {file} because it is too large") 70 | continue 71 | decoded_content = zip_ref.read(file).decode("utf-8") 72 | except UnicodeDecodeError: 73 | logger.warning(f"Failed to decode content for file: {file}") 74 | continue 75 | formatted_content.append( 76 | self._get_formatted_content( 77 | self._get_file_name_from_zip_name(file), 78 | decoded_content, 79 | ) 80 | ) 81 | 82 | return "\n\n".join(formatted_content) 83 | 84 | async def download_repository_zip(self, owner: str, repo: str) -> Path: 85 | url = f"https://api.github.com/repos/{owner}/{repo}/zipball" 86 | async with aiohttp.ClientSession() as session: 87 | async with session.get(url, headers=self.headers) as response: 88 | with open(f"/tmp/{repo}.zip", "wb") as f: 89 | f.write(await response.content.read()) 90 | return Path(f"/tmp/{repo}.zip") 91 | 92 | async def get_directory_structure(self, owner: str, repo: str) -> str: 93 | """Get the directory structure of a repository in a tree-like format.""" 94 | default_branch = await self._get_default_branch(owner, repo) 95 | latest_commit = await self._get_latest_commit(owner, repo, default_branch) 96 | tree_sha = await self._get_tree_sha(owner, repo, latest_commit) 97 | 98 | url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{tree_sha}?recursive=1" 99 | async with aiohttp.ClientSession() as session: 100 | async with session.get(url, headers=self.headers) as response: 101 | await self._raise_for_status(owner, repo, response) 102 | try: 103 | data = await response.json() 104 | except Exception as e: 105 | logger.error(f"Error parsing JSON response: {e}") 106 | raise GitHubTreeError(owner, repo) 107 | 108 | # Build directory structure 109 | structure = self._build_directory_structure(data["tree"]) 110 | return self._format_directory_structure(structure) 111 | 112 | async def get_popular_repos(self, num_repos: int = 1000) -> list[dict]: 113 | items = [] 114 | page = 1 115 | per_page = min(100, num_repos) # GitHub max per page is 100 116 | remaining = num_repos 117 | 118 | while remaining > 0: 119 | current_per_page = min(per_page, remaining) 120 | url = f"https://api.github.com/search/repositories?q=stars:>1000&sort=stars&order=desc&page={page}&per_page={current_per_page}" 121 | 122 | async with aiohttp.ClientSession() as session: 123 | async with session.get(url, headers=self.headers) as response: 124 | await self._raise_for_status("search", "repositories", response) 125 | data = await response.json() 126 | 127 | if not data.get("items"): 128 | break 129 | 130 | items.extend(data["items"]) 131 | if len(data["items"]) < current_per_page: 132 | break 133 | 134 | remaining -= len(data["items"]) 135 | page += 1 136 | 137 | if len(items) >= num_repos: 138 | break 139 | print(f"Fetched {len(items)} repos so far") 140 | await asyncio.sleep(1) 141 | 142 | return items[:num_repos] # Ensure we don't return more than requested 143 | 144 | def _parse_gh_url(self, gh_url: str) -> tuple[str, str]: 145 | """Parse a GitHub URL into owner, repo, and path.""" 146 | # Remove the protocol part if present 147 | gh_url = gh_url.replace("https://", "").replace("http://", "") 148 | 149 | # Split by '/' and take the last two parts 150 | parts = gh_url.split("/") 151 | if len(parts) < 2: 152 | raise ValueError("Invalid GitHub URL") 153 | owner = parts[-2] 154 | repo = parts[-1] 155 | return owner, repo 156 | 157 | def _get_file_name_from_zip_name(self, zip_name: str) -> str: 158 | zip_name = zip_name.split("/") 159 | root_dir = zip_name[0] 160 | return "-".join(root_dir.split("-")[1:-1]) + "/" + "/".join(zip_name[1:]) 161 | 162 | async def _raise_for_status( 163 | self, owner: str, repo: str, response: aiohttp.ClientResponse 164 | ): 165 | if response.status == 404: 166 | raise GitHubNotFoundError(owner, repo) 167 | elif response.status in (429, 403): 168 | raise GitHubRateLimitError(owner, repo) 169 | elif response.status != 200: 170 | raise GitHubAccessError(owner, repo) 171 | 172 | async def _get_default_branch(self, owner: str, repo: str) -> str: 173 | url = f"https://api.github.com/repos/{owner}/{repo}" 174 | async with aiohttp.ClientSession() as session: 175 | async with session.get(url, headers=self.headers) as response: 176 | data = await response.json() 177 | return data["default_branch"] 178 | 179 | async def _get_latest_commit(self, owner: str, repo: str, branch: str) -> str: 180 | url = f"https://api.github.com/repos/{owner}/{repo}/commits/{branch}" 181 | async with aiohttp.ClientSession() as session: 182 | async with session.get(url, headers=self.headers) as response: 183 | data = await response.json() 184 | return data["sha"] 185 | 186 | async def _get_tree_sha(self, owner: str, repo: str, commit_sha: str) -> str: 187 | url = f"https://api.github.com/repos/{owner}/{repo}/commits/{commit_sha}" 188 | async with aiohttp.ClientSession() as session: 189 | async with session.get(url, headers=self.headers) as response: 190 | data = await response.json() 191 | return data["commit"]["tree"]["sha"] 192 | 193 | def _build_directory_structure(self, tree: List[Dict]) -> Dict: 194 | """Build a nested dictionary representing the directory structure.""" 195 | structure = {} 196 | for item in tree: 197 | 198 | path_parts = item["path"].split("/") 199 | current = structure 200 | for i, part in enumerate(path_parts[:-1]): 201 | if part not in current: 202 | current[part] = {} 203 | current = current[part] 204 | if path_parts[-1]: # Don't add empty string for root 205 | current[path_parts[-1]] = item 206 | return structure 207 | 208 | def _format_directory_structure( 209 | self, structure: Dict, prefix: str = "", is_last: bool = True 210 | ) -> str: 211 | """Format the directory structure into a tree-like string.""" 212 | if not isinstance(structure, dict): 213 | return "" 214 | 215 | lines = [] 216 | items = sorted(structure.items()) 217 | for i, (name, value) in enumerate(items): 218 | is_last_item = i == len(items) - 1 219 | connector = "└── " if is_last_item else "├── " 220 | if isinstance(value, dict) and value["type"] == "tree": 221 | # Directory 222 | lines.append(f"{prefix}{connector}{name}/") 223 | new_prefix = prefix + (" " if is_last_item else "│ ") 224 | lines.append( 225 | self._format_directory_structure(value, new_prefix, is_last_item) 226 | ) 227 | elif isinstance(value, dict) and value["type"] == "blob": 228 | lines.append(f"{prefix}{connector}{name}") 229 | 230 | return "\n".join(lines) 231 | 232 | def _get_formatted_content(self, path: str, content: str) -> str: 233 | return dedent( 234 | f""" 235 | ============================================================================= 236 | File: {path} 237 | ============================================================================= 238 | {content} 239 | """ 240 | ) 241 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/clients/google_genai.py: -------------------------------------------------------------------------------- 1 | import re 2 | from aiohttp import ClientError 3 | from google import genai 4 | from google.genai import types 5 | 6 | from gitsummarize.clients.ai_client_abc import AIBaseClient 7 | from gitsummarize.prompts.business_logic import BUSINESS_SUMMARY_PROMPT 8 | from gitsummarize.prompts.technical_documentation import TECHNICAL_DOCUMENTATION_PROMPT 9 | 10 | ALLOWED_INPUT_TOKENS_COUNT = 1_048_576 11 | TIMEOUT = 1000 * 60 * 20 # 20 minutes 12 | 13 | 14 | class GoogleGenAI(AIBaseClient): 15 | def __init__(self, api_key: str): 16 | self.client = genai.Client(api_key=api_key) 17 | 18 | async def get_business_summary( 19 | self, directory_structure: str, codebase: str 20 | ) -> str: 21 | prompt = BUSINESS_SUMMARY_PROMPT.format( 22 | directory_structure=directory_structure, codebase=codebase 23 | ) 24 | truncated_prompt = self._truncate_text(prompt, 800_000) 25 | try: 26 | response = await self.client.aio.models.generate_content( 27 | model="gemini-2.5-pro-exp-03-25", 28 | contents=truncated_prompt, 29 | config=types.GenerateContentConfig( 30 | http_options=types.HttpOptions( 31 | timeout=TIMEOUT, 32 | ), 33 | ), 34 | ) 35 | except Exception as e: 36 | if e.code == 400 and e.status == "INVALID_ARGUMENT": 37 | truncated_prompt = self._truncate_text_from_error(truncated_prompt, e) 38 | response = await self.client.aio.models.generate_content( 39 | model="gemini-2.5-pro-exp-03-25", 40 | contents=truncated_prompt, 41 | config=types.GenerateContentConfig( 42 | http_options=types.HttpOptions( 43 | timeout=TIMEOUT, 44 | ), 45 | ), 46 | ) 47 | else: 48 | raise e 49 | return response.text 50 | 51 | async def get_technical_documentation( 52 | self, directory_structure: str, codebase: str 53 | ) -> str: 54 | prompt = TECHNICAL_DOCUMENTATION_PROMPT.format( 55 | directory_structure=directory_structure, codebase=codebase 56 | ) 57 | truncated_prompt = self._truncate_text(prompt, 800_000) 58 | try: 59 | response = await self.client.aio.models.generate_content( 60 | model="gemini-2.5-pro-exp-03-25", 61 | contents=truncated_prompt, 62 | config=types.GenerateContentConfig( 63 | http_options=types.HttpOptions( 64 | timeout=TIMEOUT, 65 | ), 66 | ), 67 | ) 68 | except Exception as e: 69 | if e.code == 400 and e.status == "INVALID_ARGUMENT": 70 | truncated_prompt = self._truncate_text_from_error(truncated_prompt, e) 71 | response = await self.client.aio.models.generate_content( 72 | model="gemini-2.5-pro-exp-03-25", 73 | contents=truncated_prompt, 74 | config=types.GenerateContentConfig( 75 | http_options=types.HttpOptions( 76 | timeout=TIMEOUT, 77 | ), 78 | ), 79 | ) 80 | else: 81 | raise e 82 | 83 | return response.text 84 | 85 | def _truncate_text_from_error(self, prompt: str, error: ClientError) -> str: 86 | input_tokens_count = self._extract_input_tokens_count_from_error(error) 87 | difference = input_tokens_count - ALLOWED_INPUT_TOKENS_COUNT 88 | if difference > 0: 89 | prompt = prompt[: -(difference * 4)] 90 | return prompt 91 | 92 | def _extract_input_tokens_count_from_error(self, error: ClientError) -> int: 93 | regex = r"The input token count \((\d+)\) exceeds the maximum number" 94 | match = re.search(regex, error.message) 95 | if match: 96 | return int(match.group(1)) 97 | return 0 98 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/clients/openai.py: -------------------------------------------------------------------------------- 1 | from openai import AsyncOpenAI 2 | from pydantic import BaseModel 3 | 4 | from gitsummarize.clients.ai_client_abc import AIBaseClient 5 | from gitsummarize.prompts.business_logic import BUSINESS_SUMMARY_PROMPT 6 | from gitsummarize.prompts.resource_repo import RESOURCE_REPO_PROMPT 7 | from gitsummarize.prompts.technical_documentation import TECHNICAL_DOCUMENTATION_PROMPT 8 | 9 | 10 | class IsResourceRepo(BaseModel): 11 | is_resource_repo: bool 12 | reason: str 13 | 14 | 15 | class OpenAIClient(AIBaseClient): 16 | def __init__(self, api_key: str): 17 | self.client = AsyncOpenAI(api_key=api_key) 18 | 19 | async def get_business_summary( 20 | self, directory_structure: str, codebase: str 21 | ) -> str: 22 | prompt = BUSINESS_SUMMARY_PROMPT.format( 23 | directory_structure=directory_structure, codebase=codebase 24 | ) 25 | response = await self.client.chat.completions.create( 26 | model="o3-mini", 27 | messages=[ 28 | {"role": "user", "content": self._truncate_text(prompt, 200_000, 4.1)} 29 | ], 30 | ) 31 | return response.choices[0].message.content 32 | 33 | async def get_technical_documentation( 34 | self, directory_structure: str, codebase: str 35 | ) -> str: 36 | prompt = TECHNICAL_DOCUMENTATION_PROMPT.format( 37 | directory_structure=directory_structure, codebase=codebase 38 | ) 39 | response = await self.client.chat.completions.create( 40 | model="o3-mini", 41 | messages=[ 42 | {"role": "user", "content": self._truncate_text(prompt, 200_000, 4.1)} 43 | ], 44 | ) 45 | return response.choices[0].message.content 46 | 47 | async def get_is_resource_repo(self, repo_info: str) -> IsResourceRepo: 48 | prompt = RESOURCE_REPO_PROMPT.format(repo_info=repo_info) 49 | response = await self.client.beta.chat.completions.parse( 50 | model="gpt-4o-mini", 51 | messages=[{"role": "user", "content": prompt}], 52 | response_format=IsResourceRepo, 53 | ) 54 | return response.choices[0].message.parsed 55 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/clients/supabase.py: -------------------------------------------------------------------------------- 1 | from gitsummarize.model.repo_metadata import RepoMetadata 2 | from supabase import create_client, Client 3 | 4 | 5 | class SupabaseClient: 6 | def __init__(self, url: str, key: str): 7 | self.client = create_client(supabase_url=url, supabase_key=key) 8 | 9 | def insert_repo_summary(self, repo_url: str, business_summary: str, technical_documentation: str): 10 | self.client.table("repo_summaries").insert({ 11 | "repo_url": repo_url, 12 | "business_summary": business_summary, 13 | "technical_documentation": technical_documentation 14 | }).execute() 15 | 16 | def check_repo_url_exists(self, repo_url: str) -> str | None: 17 | response = self.client.table("repo_summaries").select("repo_url").eq("repo_url", repo_url).execute() 18 | if len(response.data) == 0: 19 | return None 20 | return response.data[0] 21 | 22 | def get_all_repo_urls(self) -> list[str]: 23 | response = self.client.table("repo_summaries").select("repo_url").execute() 24 | return [row["repo_url"] for row in response.data] 25 | 26 | def upsert_repo_metadata(self, repo_url: str, metadata: RepoMetadata): 27 | self.client.table("repo_metadata").upsert({ 28 | "repo_url": repo_url, 29 | "num_stars": metadata.num_stars, 30 | "num_forks": metadata.num_forks, 31 | "language": metadata.language, 32 | "description": metadata.description 33 | }, on_conflict="repo_url").execute() 34 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/constants/constants.py: -------------------------------------------------------------------------------- 1 | VALID_FILE_EXTENSIONS = ( 2 | # Documentation 3 | ".md", ".rst", ".txt", ".adoc", ".asciidoc", ".wiki", 4 | 5 | # Web Development 6 | ".html", ".htm", ".css", ".scss", ".sass", ".less", ".js", ".jsx", ".ts", ".tsx", ".vue", 7 | 8 | # Configuration 9 | ".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", ".config", ".properties", 10 | ".env", ".env.*", ".rc", ".rc.*", ".lock", ".lockb", ".lock.json", ".lock.yaml", ".lock.yml", 11 | ".lock.toml", ".lock.lockb", 12 | 13 | # Programming Languages 14 | # Python 15 | ".py", ".pyi", ".pyw", ".pyx", ".pxd", ".pxi", ".pyd", ".so", 16 | 17 | # Java & JVM 18 | ".java", ".kt", ".kts", ".scala", ".groovy", ".clj", ".cljs", ".cljc", ".edn", 19 | 20 | # JavaScript/TypeScript 21 | ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", 22 | 23 | # C/C++ 24 | ".c", ".h", ".cpp", ".hpp", ".cc", ".hh", ".cxx", ".hxx", ".inl", 25 | 26 | # Go 27 | ".go", ".mod", ".sum", 28 | 29 | # Rust 30 | ".rs", ".rlib", 31 | 32 | # PHP 33 | ".php", ".phtml", ".php3", ".php4", ".php5", ".php7", ".phps", 34 | 35 | # Ruby 36 | ".rb", ".rbw", ".rake", ".gemspec", ".gemfile", ".gemfile.lock", 37 | 38 | # Swift 39 | ".swift", ".swiftmodule", ".swiftinterface", 40 | 41 | # Dart 42 | ".dart", 43 | 44 | # Shell Scripts 45 | ".sh", ".bash", ".zsh", ".fish", ".csh", ".tcsh", ".ksh", ".mksh", 46 | 47 | # SQL 48 | ".sql", ".psql", ".mysql", ".sqlite", ".db", 49 | 50 | # R 51 | ".r", ".rdata", ".rds", ".rda", 52 | 53 | # MATLAB 54 | ".m", ".mat", 55 | 56 | # Julia 57 | ".jl", 58 | 59 | # Lua 60 | ".lua", 61 | 62 | # Perl 63 | ".pl", ".pm", ".pod", ".t", 64 | 65 | # Haskell 66 | ".hs", ".lhs", ".cabal", ".hsc", 67 | 68 | # F# 69 | ".fs", ".fsi", ".fsx", 70 | 71 | # OCaml 72 | ".ml", ".mli", ".mll", ".mly", 73 | 74 | # Elixir 75 | ".ex", ".exs", ".heex", ".eex", 76 | 77 | # Erlang 78 | ".erl", ".hrl", ".escript", 79 | 80 | # Clojure 81 | ".clj", ".cljs", ".cljc", ".edn", 82 | 83 | # Kotlin 84 | ".kt", ".kts", 85 | 86 | # Nim 87 | ".nim", ".nimble", 88 | 89 | # Crystal 90 | ".cr", ".ecr", 91 | 92 | # Zig 93 | ".zig", 94 | 95 | # V 96 | ".v", 97 | 98 | # Odin 99 | ".odin", 100 | 101 | # Assembly 102 | ".asm", ".s", ".S", ".inc", 103 | 104 | # LaTeX 105 | ".tex", ".ltx", ".sty", ".cls", ".bbl", ".aux", ".log", 106 | 107 | # Markup 108 | ".xml", ".svg", ".xhtml", ".xslt", ".xsl", ".xsd", ".dtd", 109 | 110 | # Data Formats 111 | ".csv", ".tsv", ".jsonl", ".jsonc", ".yaml", ".yml", ".toml", ".ini", ".cfg", 112 | 113 | # Build Systems 114 | ".cmake", ".cmake.in", "CMakeLists.txt", ".ninja", ".gradle", ".gradle.kts", 115 | ".bazel", ".bzl", "BUILD", "WORKSPACE", ".buck", "BUCK", ".podspec", ".podspec.json", 116 | 117 | # Package Management 118 | "package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "Cargo.lock", 119 | "Pipfile", "Pipfile.lock", "poetry.lock", "requirements.txt", "setup.py", "setup.cfg", 120 | "pyproject.toml", "composer.json", "composer.lock", "Gemfile", "Gemfile.lock", 121 | "Podfile", "Podfile.lock", "pubspec.yaml", "pubspec.lock", "mix.exs", "mix.lock", 122 | "cabal.project", "stack.yaml", "package.yaml", "*.cabal", 123 | 124 | # IDE & Editor Config 125 | ".editorconfig", ".vscode/*", ".idea/*", ".project", ".classpath", ".settings/*", 126 | ".sublime-project", ".sublime-workspace", ".vimrc", ".viminfo", ".vim/*", 127 | 128 | # Git 129 | ".gitignore", ".gitattributes", ".gitmodules", 130 | 131 | # Docker 132 | "Dockerfile", ".dockerignore", 133 | 134 | # CI/CD 135 | ".github/workflows/*", ".gitlab-ci.yml", ".travis.yml", ".circleci/config.yml", 136 | "azure-pipelines.yml", ".drone.yml", ".woodpecker.yml", "Jenkinsfile", 137 | 138 | # Documentation 139 | "README.md", "CHANGELOG.md", "CONTRIBUTING.md", "LICENSE", "AUTHORS", 140 | "docs/*", "*.md", "*.rst", "*.txt", "*.adoc", "*.asciidoc", "*.wiki", 141 | ) -------------------------------------------------------------------------------- /backend/src/gitsummarize/exceptions/exceptions.py: -------------------------------------------------------------------------------- 1 | class GitHubAccessError(Exception): 2 | def __init__(self, owner: str, repo: str): 3 | self.message = f"Failed to access GitHub repository {owner}/{repo}" 4 | super().__init__(self.message) 5 | 6 | 7 | class GitHubRateLimitError(GitHubAccessError): 8 | def __init__(self, owner: str, repo: str): 9 | self.message = f"GitHub rate limit exceeded for repository {owner}/{repo}" 10 | super().__init__(owner, repo) 11 | 12 | 13 | class GitHubNotFoundError(GitHubAccessError): 14 | def __init__(self, owner: str, repo: str): 15 | self.message = f"Repository {owner}/{repo} not found" 16 | super().__init__(owner, repo) 17 | 18 | 19 | class GitHubTreeError(GitHubAccessError): 20 | def __init__(self, owner: str, repo: str): 21 | self.message = f"Failed to get tree for repository {owner}/{repo}" 22 | super().__init__(owner, repo) 23 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/model/repo_metadata.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class RepoMetadata(BaseModel): 5 | num_stars: int 6 | num_forks: int 7 | language: str | None 8 | description: str | None 9 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/prompts/business_logic.py: -------------------------------------------------------------------------------- 1 | BUSINESS_SUMMARY_PROMPT = """ 2 | You are a distinguished software architect reviewing the business logic layer of this codebase. Your goal is to produce a clear, structured, and high-level documentation of the business logic implemented in the provided files. 3 | 4 | Focus on **what the code does for the product or business**, not just how it works technically. 5 | 6 | --- 7 | 8 | Please generate a structured design document with the following format: 9 | 10 | 1. **Component Name** 11 | - The name of the business logic component (e.g. BillingService, PermissionsEngine) 12 | 13 | 2. **Purpose** 14 | - What business problem does this logic solve? 15 | - What domain concept does it represent? 16 | 17 | 3. **Key Responsibilities** 18 | - A bullet list of the core behaviors this component implements. 19 | - Try to phrase these as “rules,” “flows,” or “requirements” (e.g. “Users cannot access premium features until payment is verified”). 20 | 21 | 4. **Workflows / Use Cases** 22 | - Describe the primary workflows this logic supports. 23 | - Explain what triggers them, what the steps are, and what the outcomes are. 24 | 25 | 5. **Inputs and Outputs** 26 | - What data does this component operate on? 27 | - What does it return or affect? 28 | 29 | 6. **Dependencies** 30 | - What external services, database models, or internal modules does this depend on? 31 | 32 | 7. **Business Rules & Constraints** 33 | - Any conditional logic or constraints (e.g. pricing tiers, roles, quotas) 34 | - Anything that would be found in a product requirements doc 35 | 36 | 8. **Design Considerations** 37 | - Any notable trade-offs or reasons the logic is written this way 38 | - Any risks or edge cases 39 | 40 | --- 41 | 42 | Use concise, structured language. Think like a staff engineer writing for a new team member trying to understand how this part of the app maps to real product behavior. 43 | IMPORTANT: Please use the ## heading accurately. I will use it to divide the documentation into high level component sections, these headings should be business specific behaviors only. 44 | IMPORTANT: Just output the documentation in markdown format, no other text. 45 | 46 | --- 47 | Here is the directory structure of the codebase: 48 | 49 | {directory_structure} 50 | 51 | --- 52 | 53 | Here is the codebase: 54 | 55 | {codebase} 56 | """ -------------------------------------------------------------------------------- /backend/src/gitsummarize/prompts/resource_repo.py: -------------------------------------------------------------------------------- 1 | RESOURCE_REPO_PROMPT = """ 2 | You are a helpful assistant that determines if a repository is a codebase or just a resource repository (collection of resources). 3 | For example, react, pytorch, etc. are codebases, but free-programmin-books, awesome-python, etc. are resource repositories. 4 | 5 | Here is info about the repository: 6 | 7 | {repo_info} 8 | 9 | Please determine if this is a codebase or a resource repository. 10 | """ 11 | -------------------------------------------------------------------------------- /backend/src/gitsummarize/prompts/technical_documentation.py: -------------------------------------------------------------------------------- 1 | TECHNICAL_DOCUMENTATION_PROMPT = """ 2 | You are a distinguished software architect and expert technical writer. You specialize in deeply understanding codebases and producing high-quality technical documentation that is both comprehensive and easy to follow. 3 | 4 | Your role is to analyze the given code or repository and generate clear, well-structured documentation. This may include: 5 | • High-level architectural overviews 6 | • Component breakdowns 7 | • Class and function documentation 8 | • Usage examples and onboarding guides 9 | • Configuration and environment setup details 10 | • Diagrams (e.g., Mermaid flowcharts, class diagrams) where helpful 11 | 12 | You think like an engineer reading the code for the first time—curious, meticulous, and detail-oriented. You always strive to capture the “why” behind the implementation, not just the “how”. 13 | 14 | Write in a professional, concise tone. Use bullet points, headers, and code snippets where appropriate. Prioritize clarity, accuracy, and utility for both new contributors and experienced developers. 15 | 16 | When applicable, infer and document: 17 | • The purpose and responsibilities of each module 18 | • The relationships between components 19 | • Trade-offs or design patterns used 20 | • How to extend or customize the code 21 | 22 | Your output should be production-quality documentation suitable for internal engineering wikis or public open-source repositories. 23 | 24 | IMPORTANT: Please use the ## heading accurately. I will use it to divide the documentation into high level component sections. 25 | IMPORTANT: Just output the documentation in markdown format, no other text. 26 | 27 | --- 28 | 29 | Here is the directory structure of the codebase: 30 | 31 | {directory_structure} 32 | 33 | --- 34 | 35 | Here is the codebase: 36 | 37 | {codebase} 38 | """ -------------------------------------------------------------------------------- /backend/src/scripts/get_popular_gh_repos.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from itertools import batched 3 | from gitsummarize.clients.github import GithubClient 4 | 5 | from dotenv import load_dotenv 6 | import os 7 | 8 | from gitsummarize.clients.openai import OpenAIClient 9 | 10 | load_dotenv() 11 | openai_client = OpenAIClient(os.getenv("OPENAI_API_KEY")) 12 | 13 | 14 | async def main(): 15 | gh = GithubClient(os.getenv("GITHUB_TOKEN")) 16 | # Get up to 500 popular repositories 17 | repos = await gh.get_popular_repos(num_repos=1000) 18 | non_resource_repos = await filter_resource_repos(repos) 19 | print(non_resource_repos) 20 | 21 | 22 | async def filter_resource_repos(repos: list[dict]) -> list[dict]: 23 | non_resource_repos = [] 24 | for repos_batch in batched(repos, 100): 25 | tasks = [openai_client.get_is_resource_repo(str(repo)) for repo in repos_batch] 26 | results = await asyncio.gather(*tasks) 27 | for i, result in enumerate(results): 28 | if not result.is_resource_repo: 29 | non_resource_repos.append(repos_batch[i]) 30 | return non_resource_repos 31 | 32 | 33 | 34 | if __name__ == "__main__": 35 | asyncio.run(main()) 36 | -------------------------------------------------------------------------------- /backend/src/scripts/precache_repos.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | import requests 4 | 5 | from gitsummarize.clients.supabase import SupabaseClient 6 | 7 | load_dotenv() 8 | API_TOKEN = os.getenv("API_TOKEN") 9 | 10 | 11 | def main(): 12 | supabase = SupabaseClient(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_ANON_KEY")) 13 | 14 | with open("tmp/repolist.txt", "r") as f: 15 | repos = f.readlines() 16 | for repo in repos: 17 | repo_summary = supabase.check_repo_url_exists(repo) 18 | if repo_summary: 19 | print(f"Repo {repo} already exists") 20 | continue 21 | else: 22 | print(f"Summarizing repo {repo}") 23 | 24 | url = f"http://0.0.0.0:8000/summarize" 25 | headers = { 26 | "Content-Type": "application/json", 27 | "Authorization": f"Bearer {API_TOKEN}", 28 | } 29 | data = { 30 | "repo_url": repo, 31 | } 32 | 33 | response = requests.post(url, headers=headers, json=data) 34 | print(response.text) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /backend/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antarixxx/gitsummarize/0c6781f953f51b5539a01ebbbffec5ff15f4e7b8/backend/tests/__init__.py -------------------------------------------------------------------------------- /frontend/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2025 Antarix 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # 🧠 GitSummarize 2 | 3 | Image 4 | 5 |
6 | 7 | ## **Generate beautiful, world-class documentation from any GitHub repository — instantly.** 8 | 9 | Just replace `hub` with `summarize` in any GitHub URL to generate a live, interactive documentation hub. 10 |
11 | 12 | --- 13 | 14 | ## 🚀 Features 15 | 16 | GitSummarize analyzes any GitHub repo (public or private) and generates: 17 | 18 | - 📄 **System-level architecture overviews** 19 | - 📁 **Per-directory and file summaries** 20 | - 🧠 **Natural language descriptions of purpose, flow, and structure** 21 | - 🔗 **Business Logic and Rules Extraction** 22 | - 📊 **Architecture diagrams and flows** 23 | 24 | It’s perfect for onboarding, exploring unfamiliar codebases, and writing technical documentation — all powered by Gemini. 25 | 26 | --- 27 | 28 | ## 🧰 Tech Stack 29 | 30 | | Area | Stack | 31 | |------------|-------| 32 | | **Frontend** | Next.js, TypeScript, Tailwind CSS, ShadCN | 33 | | **Backend** | FastAPI, Python, Server Actions | 34 | | **Database** | PostgreSQL (Supabase) | 35 | | **AI** | Gemini 2.5 Pro | 36 | | **Analytics**| PostHog | 37 | | **Hosting** | Vercel (Frontend), Render (Backend) | 38 | 39 | --- 40 | 41 | ## 🤔 Why GitSummarize? 42 | 43 | We wanted to contribute to open-source projects but found it difficult to understand massive codebases quickly. 44 | 45 | GitSummarize automates the hardest part: figuring out *what the code does* and *how it's structured* — giving you useful documentation and high level overview of the codebase. 46 | 47 | --- 48 | 49 | ## 🧪 Local Development / Self-Hosting 50 | 51 | 1. **Clone the repo** 52 | ```bash 53 | git clone https://github.com/antarixxx/gitsummarize 54 | cd gitsummarize 55 | ``` 56 | 57 | 2. **Run the Next.js Project** 58 | ```bash 59 | npm run dev 60 | ``` 61 | 62 | You can now access the website at `localhost:3000`. 63 | 64 | 65 | ## Contributing 66 | 67 | Contributions are welcome! Please feel free to submit a Pull Request. 68 | 69 | ## Acknowledgements 70 | 71 | Shoutout to [GitIngest](https://gitingest.com/) and [GitDiagram](https://gitdiagram.com/) for the inspiration and styling. 72 | 73 | ## 📈 Rate Limits 74 | 75 | We are currently hosting it for free with rate limits though this is somewhat likely to change in the future based on Gemini's API policies. 76 | 77 | 83 | 84 | ## 🤔 Future Steps 85 | 86 | - Expand documentation to cover more topics (Setup, Onboarding Guide) 87 | - Add Architecture Diagrams -------------------------------------------------------------------------------- /frontend/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.ts", 8 | "css": "src/styles/globals.css", 9 | "baseColor": "neutral", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "~/components", 15 | "utils": "~/lib/utils", 16 | "ui": "~/components/ui", 17 | "lib": "~/lib", 18 | "hooks": "~/hooks" 19 | }, 20 | "iconLibrary": "lucide" 21 | } -------------------------------------------------------------------------------- /frontend/next-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | // NOTE: This file should not be edited 5 | // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. 6 | -------------------------------------------------------------------------------- /frontend/next.config.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation. This is especially useful 3 | * for Docker builds. 4 | */ 5 | import "./src/env.js"; 6 | 7 | /** @type {import("next").NextConfig} */ 8 | const config = { 9 | reactStrictMode: false, 10 | images: { 11 | domains: [ 12 | 'img.shields.io', 13 | 'shields.io', 14 | 'github.com', 15 | 'raw.githubusercontent.com', 16 | 'camo.githubusercontent.com', 17 | 'travis-ci.org', 18 | 'travis-ci.com', 19 | 'codecov.io', 20 | 'coveralls.io', 21 | 'badge.fury.io', 22 | ], 23 | dangerouslyAllowSVG: true, 24 | contentSecurityPolicy: "default-src 'self'; script-src 'none'; sandbox;", 25 | }, 26 | async rewrites() { 27 | return [ 28 | { 29 | source: "/ingest/static/:path*", 30 | destination: "https://us-assets.i.posthog.com/static/:path*", 31 | }, 32 | { 33 | source: "/ingest/:path*", 34 | destination: "https://us.i.posthog.com/:path*", 35 | }, 36 | { 37 | source: "/ingest/decide", 38 | destination: "https://us.i.posthog.com/decide", 39 | }, 40 | ]; 41 | }, 42 | // This is required to support PostHog trailing slash API requests 43 | skipTrailingSlashRedirect: true, 44 | eslint: { 45 | // Don't run ESLint during build 46 | ignoreDuringBuilds: true, 47 | }, 48 | typescript: { 49 | // Skip type checking for speed 50 | ignoreBuildErrors: true, 51 | }, 52 | output: 'standalone', 53 | }; 54 | 55 | export default config; 56 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gitsummarize", 3 | "version": "0.1.0", 4 | "private": true, 5 | "type": "module", 6 | "scripts": { 7 | "build": "next build", 8 | "check": "next lint && tsc --noEmit", 9 | "db:generate": "drizzle-kit generate", 10 | "db:migrate": "drizzle-kit migrate", 11 | "db:push": "drizzle-kit push", 12 | "db:studio": "drizzle-kit studio", 13 | "dev": "next dev --turbo", 14 | "lint": "next lint", 15 | "lint:fix": "next lint --fix", 16 | "preview": "next build && next start", 17 | "start": "next start", 18 | "typecheck": "tsc --noEmit", 19 | "format:write": "prettier --write \"**/*.{ts,tsx,js,jsx,mdx}\" --cache", 20 | "format:check": "prettier --check \"**/*.{ts,tsx,js,jsx,mdx}\" --cache" 21 | }, 22 | "dependencies": { 23 | "@heroicons/react": "^2.2.0", 24 | "@neondatabase/serverless": "^0.10.4", 25 | "@octokit/rest": "^21.1.1", 26 | "@radix-ui/react-dialog": "^1.1.4", 27 | "@radix-ui/react-progress": "^1.1.1", 28 | "@radix-ui/react-slot": "^1.1.1", 29 | "@radix-ui/react-switch": "^1.1.3", 30 | "@radix-ui/react-tooltip": "^1.1.6", 31 | "@supabase/supabase-js": "^2.49.4", 32 | "@t3-oss/env-nextjs": "^0.10.1", 33 | "class-variance-authority": "^0.7.1", 34 | "clsx": "^2.1.1", 35 | "dotenv": "^16.4.7", 36 | "drizzle-orm": "^0.33.0", 37 | "geist": "^1.3.0", 38 | "ldrs": "^1.0.2", 39 | "lucide-react": "^0.468.0", 40 | "mermaid": "^11.4.1", 41 | "next": "^15.0.1", 42 | "postgres": "^3.4.4", 43 | "posthog-js": "^1.234.1", 44 | "react": "^18.3.1", 45 | "react-code-blocks": "^0.1.6", 46 | "react-dom": "^18.3.1", 47 | "react-icons": "^5.4.0", 48 | "svg-pan-zoom": "^3.6.2", 49 | "tailwind-merge": "^2.5.5", 50 | "tailwindcss-animate": "^1.0.7", 51 | "zod": "^3.23.3" 52 | }, 53 | "devDependencies": { 54 | "@types/eslint": "^8.56.10", 55 | "@types/node": "^20.14.10", 56 | "@types/react": "^18.3.3", 57 | "@types/react-dom": "^18.3.0", 58 | "@types/svg-pan-zoom": "^3.4.0", 59 | "@typescript-eslint/eslint-plugin": "^8.1.0", 60 | "@typescript-eslint/parser": "^8.1.0", 61 | "drizzle-kit": "^0.24.0", 62 | "eslint": "^8.57.0", 63 | "eslint-config-next": "^15.0.1", 64 | "eslint-plugin-drizzle": "^0.2.3", 65 | "postcss": "^8.4.39", 66 | "prettier": "^3.3.2", 67 | "prettier-plugin-tailwindcss": "^0.6.5", 68 | "tailwind-scrollbar": "^4.0.0", 69 | "tailwindcss": "^3.4.3", 70 | "typescript": "^5.5.3" 71 | }, 72 | "ct3aMetadata": { 73 | "initVersion": "7.38.1" 74 | }, 75 | "packageManager": "pnpm@9.13.0" 76 | } 77 | -------------------------------------------------------------------------------- /frontend/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | }, 5 | }; 6 | -------------------------------------------------------------------------------- /frontend/prettier.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('prettier').Config & import('prettier-plugin-tailwindcss').PluginOptions} */ 2 | export default { 3 | plugins: ["prettier-plugin-tailwindcss"], 4 | }; 5 | -------------------------------------------------------------------------------- /frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antarixxx/gitsummarize/0c6781f953f51b5539a01ebbbffec5ff15f4e7b8/frontend/public/favicon.ico -------------------------------------------------------------------------------- /frontend/public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antarixxx/gitsummarize/0c6781f953f51b5539a01ebbbffec5ff15f4e7b8/frontend/public/favicon.png -------------------------------------------------------------------------------- /frontend/public/og-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antarixxx/gitsummarize/0c6781f953f51b5539a01ebbbffec5ff15f4e7b8/frontend/public/og-image.png -------------------------------------------------------------------------------- /frontend/src/app/_actions/cache.ts: -------------------------------------------------------------------------------- 1 | "use server"; 2 | 3 | import { db } from "~/server/db"; 4 | import { eq, and } from "drizzle-orm"; 5 | import { diagramCache } from "~/server/db/schema"; 6 | import { sql } from "drizzle-orm"; 7 | 8 | export async function getCachedDiagram(username: string, repo: string) { 9 | try { 10 | const cached = await db 11 | .select() 12 | .from(diagramCache) 13 | .where( 14 | and(eq(diagramCache.username, username), eq(diagramCache.repo, repo)), 15 | ) 16 | .limit(1); 17 | 18 | return cached[0]?.diagram ?? null; 19 | } catch (error) { 20 | console.error("Error fetching cached diagram:", error); 21 | return null; 22 | } 23 | } 24 | 25 | export async function getCachedExplanation(username: string, repo: string) { 26 | try { 27 | const cached = await db 28 | .select() 29 | .from(diagramCache) 30 | .where( 31 | and(eq(diagramCache.username, username), eq(diagramCache.repo, repo)), 32 | ) 33 | .limit(1); 34 | 35 | return cached[0]?.explanation ?? null; 36 | } catch (error) { 37 | console.error("Error fetching cached explanation:", error); 38 | return null; 39 | } 40 | } 41 | 42 | export async function cacheDiagramAndExplanation( 43 | username: string, 44 | repo: string, 45 | diagram: string, 46 | explanation: string, 47 | usedOwnKey = false, 48 | ) { 49 | try { 50 | await db 51 | .insert(diagramCache) 52 | .values({ 53 | username, 54 | repo, 55 | diagram, 56 | explanation, 57 | usedOwnKey, 58 | }) 59 | .onConflictDoUpdate({ 60 | target: [diagramCache.username, diagramCache.repo], 61 | set: { 62 | diagram, 63 | explanation, 64 | usedOwnKey, 65 | updatedAt: new Date(), 66 | }, 67 | }); 68 | } catch (error) { 69 | console.error("Error caching diagram:", error); 70 | } 71 | } 72 | 73 | export async function getDiagramStats() { 74 | try { 75 | const stats = await db 76 | .select({ 77 | totalDiagrams: sql`COUNT(*)`, 78 | ownKeyUsers: sql`COUNT(CASE WHEN ${diagramCache.usedOwnKey} = true THEN 1 END)`, 79 | freeUsers: sql`COUNT(CASE WHEN ${diagramCache.usedOwnKey} = false THEN 1 END)`, 80 | }) 81 | .from(diagramCache); 82 | 83 | return stats[0]; 84 | } catch (error) { 85 | console.error("Error getting diagram stats:", error); 86 | return null; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /frontend/src/app/_actions/github.ts: -------------------------------------------------------------------------------- 1 | import { cache } from "react"; 2 | 3 | interface GitHubResponse { 4 | stargazers_count: number; 5 | } 6 | 7 | export const getStarCount = cache(async () => { 8 | try { 9 | const response = await fetch( 10 | "https://api.github.com/repos/antarixxx/gitsummarize", 11 | { 12 | headers: { 13 | Accept: "application/vnd.github.v3+json", 14 | }, 15 | next: { 16 | revalidate: 300, // Cache for 5 minutes 17 | }, 18 | }, 19 | ); 20 | 21 | if (!response.ok) { 22 | throw new Error("Failed to fetch star count"); 23 | } 24 | 25 | const data = (await response.json()) as GitHubResponse; 26 | return data.stargazers_count; 27 | } catch (error) { 28 | console.error("Error fetching star count:", error); 29 | return null; 30 | } 31 | }); 32 | -------------------------------------------------------------------------------- /frontend/src/app/_actions/repo.ts: -------------------------------------------------------------------------------- 1 | "use server"; 2 | 3 | import { db } from "~/server/db"; 4 | import { eq, and } from "drizzle-orm"; 5 | import { diagramCache } from "~/server/db/schema"; 6 | 7 | export async function getLastGeneratedDate(username: string, repo: string) { 8 | const result = await db 9 | .select() 10 | .from(diagramCache) 11 | .where( 12 | and(eq(diagramCache.username, username), eq(diagramCache.repo, repo)), 13 | ); 14 | 15 | return result[0]?.updatedAt; 16 | } 17 | -------------------------------------------------------------------------------- /frontend/src/app/api/generate/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | 3 | // Set a longer timeout for the Next.js API route 4 | export const maxDuration = 240; // seconds (4 minutes) 5 | 6 | interface RequestBody { 7 | repo_url: string; 8 | } 9 | 10 | interface ErrorResponse { 11 | message: string; 12 | } 13 | 14 | export async function POST(request: Request) { 15 | try { 16 | const { repo_url } = await request.json() as RequestBody; 17 | 18 | if (!repo_url) { 19 | return NextResponse.json({ error: 'Repository URL is required' }, { status: 400 }); 20 | } 21 | 22 | // Call the external API with the bearer token and extended timeout 23 | const controller = new AbortController(); 24 | const timeoutId = setTimeout(() => controller.abort(), 240000); // 240 second timeout 25 | 26 | try { 27 | console.log(`Making request to external API for: ${repo_url}`); 28 | 29 | const response = await fetch('https://gitsummarize-kwzz.onrender.com/summarize', { 30 | method: 'POST', 31 | headers: { 32 | 'Content-Type': 'application/json', 33 | 'Authorization': `Bearer ${process.env.RENDER_API_KEY ?? ''}` 34 | }, 35 | body: JSON.stringify({ repo_url }), 36 | signal: controller.signal, 37 | }); 38 | 39 | clearTimeout(timeoutId); // Clear the timeout if request completes 40 | 41 | if (!response.ok) { 42 | const errorData = await response.json().catch(() => ({ message: 'Unknown error' })) as ErrorResponse; 43 | console.error('Error from external API:', errorData); 44 | return NextResponse.json( 45 | { error: 'Failed to process codebase. Please try again later or try adding your own Gemini API key.' }, 46 | { status: response.status } 47 | ); 48 | } 49 | 50 | // Return success response 51 | return NextResponse.json({ success: true }); 52 | } catch (fetchError) { 53 | clearTimeout(timeoutId); 54 | 55 | // Handle timeout specifically 56 | if (fetchError instanceof Error && fetchError.name === 'AbortError') { 57 | console.error('External API request timed out after 60 seconds'); 58 | return NextResponse.json( 59 | { error: 'External API request timed out. Try again later or try adding your own Gemini API key.' }, 60 | { status: 504 } 61 | ); 62 | } 63 | 64 | console.error('External API request failed:', fetchError); 65 | return NextResponse.json( 66 | { error: 'Failed to connect to external API service. The service might be unavailable. Try again later or try adding your own Gemini API key.' }, 67 | { status: 503 } 68 | ); 69 | } 70 | 71 | } catch (error) { 72 | console.error('Server error generating documentation:', error); 73 | return NextResponse.json( 74 | { error: 'Internal server error. Please try again later or try adding your own Gemini API key.' }, 75 | { status: 500 } 76 | ); 77 | } 78 | } -------------------------------------------------------------------------------- /frontend/src/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import "~/styles/globals.css"; 2 | 3 | import { GeistSans } from "geist/font/sans"; 4 | import { type Metadata } from "next"; 5 | import { Header } from "~/components/header"; 6 | import { Footer } from "~/components/footer"; 7 | import { CSPostHogProvider } from "./providers"; 8 | 9 | export const metadata: Metadata = { 10 | title: "GitSummarize", 11 | description: 12 | "Turn any GitHub repository into comprehensive documentation in seconds.", 13 | metadataBase: new URL("https://gitsummarize.com"), 14 | keywords: [ 15 | "github", 16 | "git summarize", 17 | "git documentation", 18 | "git documentation generator", 19 | "git documentation tool", 20 | "git documentation maker", 21 | "git documentation creator", 22 | "git summarize", 23 | "documentation", 24 | "repository", 25 | "visualization", 26 | "code structure", 27 | "system design", 28 | "software architecture", 29 | "software design", 30 | "software engineering", 31 | "software development", 32 | "software architecture", 33 | "software design", 34 | "software engineering", 35 | "software development", 36 | "open source", 37 | "open source software", 38 | "gitsummarize", 39 | "gitsummarize.com", 40 | ], 41 | authors: [ 42 | { name: "Antarixx", url: "https://github.com/antarixx" }, 43 | ], 44 | creator: "Antarixx", 45 | openGraph: { 46 | type: "website", 47 | locale: "en_US", 48 | url: "https://gitsummarize.com", 49 | title: "GitSummarize - Repository to Documentation in Seconds", 50 | description: 51 | "Turn any GitHub repository into comprehensive documentation in seconds.", 52 | siteName: "GitSummarize", 53 | images: [ 54 | { 55 | url: "/og-image.png", // You'll need to create this image 56 | width: 1200, 57 | height: 630, 58 | alt: "GitSummarize - Repository Documentation Tool", 59 | }, 60 | ], 61 | }, 62 | robots: { 63 | index: true, 64 | follow: true, 65 | googleBot: { 66 | index: true, 67 | follow: true, 68 | "max-snippet": -1, 69 | }, 70 | }, 71 | }; 72 | 73 | export default function RootLayout({ 74 | children, 75 | }: Readonly<{ children: React.ReactNode }>) { 76 | return ( 77 | 78 | 79 | 80 |
81 |
{children}
82 |