├── .codecov.yaml ├── .dockerignore ├── .env.example ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── feature_request.yml ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── docker_image.yml │ ├── pr-title-check.yml │ ├── publish_to_pypi.yml │ └── scorecard.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode └── launch.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── SECURITY.md ├── docs └── frontpage.png ├── eslint.config.cjs ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── src ├── gitingest │ ├── __init__.py │ ├── __main__.py │ ├── clone.py │ ├── config.py │ ├── entrypoint.py │ ├── ingestion.py │ ├── output_formatter.py │ ├── query_parser.py │ ├── schemas │ │ ├── __init__.py │ │ ├── filesystem.py │ │ └── ingestion.py │ └── utils │ │ ├── __init__.py │ │ ├── auth.py │ │ ├── compat_func.py │ │ ├── compat_typing.py │ │ ├── exceptions.py │ │ ├── file_utils.py │ │ ├── git_utils.py │ │ ├── ignore_patterns.py │ │ ├── ingestion_utils.py │ │ ├── notebook.py │ │ ├── os_utils.py │ │ ├── path_utils.py │ │ ├── query_parser_utils.py │ │ └── timeout_wrapper.py ├── server │ ├── __init__.py │ ├── form_types.py │ ├── main.py │ ├── metrics_server.py │ ├── models.py │ ├── query_processor.py │ ├── routers │ │ ├── __init__.py │ │ ├── dynamic.py │ │ ├── index.py │ │ └── ingest.py │ ├── routers_utils.py │ ├── server_config.py │ ├── server_utils.py │ └── templates │ │ ├── base.jinja │ │ ├── components │ │ ├── _macros.jinja │ │ ├── footer.jinja │ │ ├── git_form.jinja │ │ ├── navbar.jinja │ │ ├── result.jinja │ │ └── tailwind_components.html │ │ ├── git.jinja │ │ ├── index.jinja │ │ └── swagger_ui.jinja └── static │ ├── favicons │ ├── apple-touch-icon.png │ ├── favicon-64.png │ ├── favicon.ico │ └── favicon.svg │ ├── icons │ ├── chrome.svg │ ├── discord.svg │ ├── github.svg │ ├── python-color.svg │ └── python.svg │ ├── js │ ├── git.js │ ├── git_form.js │ ├── index.js │ ├── navbar.js │ ├── posthog.js │ └── utils.js │ ├── llms.txt │ ├── og-image.png │ ├── robots.txt │ └── svg │ ├── github-star.svg │ ├── sparkle-green.svg │ └── sparkle-red.svg └── tests ├── .pylintrc ├── __init__.py ├── conftest.py ├── query_parser ├── __init__.py ├── test_git_host_agnostic.py └── test_query_parser.py ├── test_cli.py ├── test_clone.py ├── test_flow_integration.py ├── test_git_utils.py ├── test_gitignore_feature.py ├── test_ingestion.py └── test_notebook_utils.py /.codecov.yaml: -------------------------------------------------------------------------------- 1 | comment: false 2 | github_checks: 3 | annotations: false 4 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Base: reuse patterns from .gitignore 3 | # ------------------------------------------------- 4 | 5 | # Operating-system 6 | .DS_Store 7 | Thumbs.db 8 | 9 | # Editor / IDE settings 10 | .vscode/ 11 | !.vscode/launch.json 12 | .idea/ 13 | *.swp 14 | 15 | # Python virtual-envs & tooling 16 | .venv*/ 17 | .python-version 18 | __pycache__/ 19 | *.egg-info/ 20 | *.egg 21 | .ruff_cache/ 22 | 23 | # Test artifacts & coverage 24 | .pytest_cache/ 25 | .coverage 26 | coverage.xml 27 | htmlcov/ 28 | 29 | # Build, distribution & docs 30 | build/ 31 | dist/ 32 | *.wheel 33 | 34 | # Logs & runtime output 35 | *.log 36 | logs/ 37 | *.tmp 38 | tmp/ 39 | 40 | # Project-specific files 41 | history.txt 42 | digest.txt 43 | 44 | 45 | # ------------------------------------------------- 46 | # Extra for Docker 47 | # ------------------------------------------------- 48 | 49 | # Git history 50 | .git/ 51 | .gitignore 52 | 53 | # Tests 54 | tests/ 55 | 56 | # Docs 57 | docs/ 58 | *.md 59 | LICENSE 60 | 61 | # Local overrides & secrets 62 | .env 63 | 64 | # Docker files 65 | .dockerignore 66 | Dockerfile* 67 | 68 | # ------------------------------------------------- 69 | # Files required during build 70 | # ------------------------------------------------- 71 | !pyproject.toml 72 | !src/ 73 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # Gitingest Environment Variables 2 | 3 | # Host Configuration 4 | # Comma-separated list of allowed hostnames 5 | # Default: "gitingest.com, *.gitingest.com, localhost, 127.0.0.1" 6 | ALLOWED_HOSTS=gitingest.com,*.gitingest.com,localhost,127.0.0.1 7 | 8 | # GitHub Authentication 9 | # Personal Access Token for accessing private repositories 10 | # Generate your token here: https://github.com/settings/tokens/new?description=gitingest&scopes=repo 11 | # GITHUB_TOKEN=your_github_token_here 12 | 13 | # Metrics Configuration 14 | # Set to any value to enable the Prometheus metrics server 15 | # GITINGEST_METRICS_ENABLED=true 16 | # Host for the metrics server (default: "127.0.0.1") 17 | GITINGEST_METRICS_HOST=127.0.0.1 18 | # Port for the metrics server (default: "9090") 19 | GITINGEST_METRICS_PORT=9090 20 | 21 | # Sentry Configuration 22 | # Set to any value to enable Sentry error tracking 23 | # GITINGEST_SENTRY_ENABLED=true 24 | # Sentry DSN (required if Sentry is enabled) 25 | # GITINGEST_SENTRY_DSN=your_sentry_dsn_here 26 | # Sampling rate for performance data (default: "1.0", range: 0.0-1.0) 27 | GITINGEST_SENTRY_TRACES_SAMPLE_RATE=1.0 28 | # Sampling rate for profile sessions (default: "1.0", range: 0.0-1.0) 29 | GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE=1.0 30 | # Profile lifecycle mode (default: "trace") 31 | GITINGEST_SENTRY_PROFILE_LIFECYCLE=trace 32 | # Send default personally identifiable information (default: "true") 33 | GITINGEST_SENTRY_SEND_DEFAULT_PII=true 34 | # Environment name for Sentry (default: "") 35 | GITINGEST_SENTRY_ENVIRONMENT=development 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 🐞 2 | description: Report a bug or internal server error when using Gitingest 3 | title: "(bug): " 4 | labels: ["bug"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to report a bug! :lady_beetle: 10 | 11 | Please fill out the following details to help us reproduce and fix the issue. :point_down: 12 | 13 | - type: dropdown 14 | id: interface 15 | attributes: 16 | label: Which interface did you use? 17 | default: 0 18 | options: 19 | - "Select one..." 20 | - Web UI 21 | - CLI 22 | - PyPI package 23 | validations: 24 | required: true 25 | 26 | - type: input 27 | id: repo_url 28 | attributes: 29 | label: Repository URL (if public) 30 | placeholder: e.g., https://github.com/<username>/<repo>/commit_branch_or_tag/blob_or_tree/subdir 31 | 32 | - type: dropdown 33 | id: git_host 34 | attributes: 35 | label: Git host 36 | description: The Git host of the repository. 37 | default: 0 38 | options: 39 | - "Select one..." 40 | - GitHub (github.com) 41 | - GitLab (gitlab.com) 42 | - Bitbucket (bitbucket.org) 43 | - Gitea (gitea.com) 44 | - Codeberg (codeberg.org) 45 | - Gist (gist.github.com) 46 | - Kaggle (kaggle.com) 47 | - GitHub Enterprise (github.company.com) 48 | - Other (specify below) 49 | validations: 50 | required: true 51 | 52 | - type: input 53 | id: git_host_other 54 | attributes: 55 | label: Other Git host 56 | placeholder: If you selected "Other", please specify the Git host here. 57 | 58 | - type: dropdown 59 | id: repo_visibility 60 | attributes: 61 | label: Repository visibility 62 | default: 0 63 | options: 64 | - "Select one..." 65 | - public 66 | - private 67 | validations: 68 | required: true 69 | 70 | - type: dropdown 71 | id: revision 72 | attributes: 73 | label: Commit, branch, or tag 74 | default: 0 75 | options: 76 | - "Select one..." 77 | - default branch 78 | - commit 79 | - branch 80 | - tag 81 | validations: 82 | required: true 83 | 84 | - type: dropdown 85 | id: ingest_scope 86 | attributes: 87 | label: Did you ingest the full repository or a subdirectory? 88 | default: 0 89 | options: 90 | - "Select one..." 91 | - full repository 92 | - subdirectory 93 | validations: 94 | required: true 95 | 96 | - type: dropdown 97 | id: os 98 | attributes: 99 | label: Operating system 100 | default: 0 101 | options: 102 | - "Select one..." 103 | - Not relevant (Web UI) 104 | - macOS 105 | - Windows 106 | - Linux 107 | validations: 108 | required: true 109 | 110 | - type: dropdown 111 | id: browser 112 | attributes: 113 | label: Browser (Web UI only) 114 | default: 0 115 | options: 116 | - "Select one..." 117 | - Not relevant (CLI / PyPI) 118 | - Chrome 119 | - Firefox 120 | - Safari 121 | - Edge 122 | - Other (specify below) 123 | validations: 124 | required: true 125 | 126 | - type: input 127 | id: browser_other 128 | attributes: 129 | label: Other browser 130 | placeholder: If you selected "Other", please specify the browser here. 131 | 132 | - type: input 133 | id: gitingest_version 134 | attributes: 135 | label: Gitingest version 136 | placeholder: e.g., v0.1.5 137 | description: Not required if you used the Web UI. 138 | 139 | - type: input 140 | id: python_version 141 | attributes: 142 | label: Python version 143 | placeholder: e.g., 3.11.5 144 | description: Not required if you used the Web UI. 145 | 146 | - type: textarea 147 | id: bug_description 148 | attributes: 149 | label: Bug description 150 | placeholder: Describe the bug here. 151 | description: A detailed but concise description of the bug. 152 | validations: 153 | required: true 154 | 155 | 156 | - type: textarea 157 | id: steps_to_reproduce 158 | attributes: 159 | label: Steps to reproduce 160 | placeholder: Include the exact commands or actions that led to the error. 161 | description: Include the exact commands or actions that led to the error *(if relevant)*. 162 | render: shell 163 | 164 | - type: textarea 165 | id: expected_behavior 166 | attributes: 167 | label: Expected behavior 168 | placeholder: Describe what you expected to happen. 169 | description: Describe what you expected to happen *(if relevant)*. 170 | 171 | - type: textarea 172 | id: actual_behavior 173 | attributes: 174 | label: Actual behavior 175 | description: Paste the full error message or stack trace here. 176 | 177 | - type: textarea 178 | id: additional_context 179 | attributes: 180 | label: Additional context, logs, or screenshots 181 | placeholder: Add any other context, links, or screenshots about the issue here. 182 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 💡 2 | description: Suggest a new feature or improvement for Gitingest 3 | title: "(feat): " 4 | labels: ["enhancement"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to help us improve **Gitingest**! :sparkles: 10 | 11 | Please fill in the sections below to describe your idea. The more detail you provide, the easier it is for us to evaluate and plan the work. :point_down: 12 | 13 | - type: input 14 | id: summary 15 | attributes: 16 | label: Feature summary 17 | placeholder: One-sentence description of the feature. 18 | validations: 19 | required: true 20 | 21 | - type: textarea 22 | id: problem 23 | attributes: 24 | label: Problem / motivation 25 | description: What problem does this feature solve? How does it affect your workflow? 26 | placeholder: Why is this feature important? Describe the pain point or limitation you're facing. 27 | validations: 28 | required: true 29 | 30 | - type: textarea 31 | id: proposal 32 | attributes: 33 | label: Proposed solution 34 | placeholder: Describe what you would like to see happen. 35 | description: Outline the feature as you imagine it. *(optional)* 36 | 37 | 38 | - type: textarea 39 | id: alternatives 40 | attributes: 41 | label: Alternatives considered 42 | placeholder: List other approaches you've considered or work-arounds you use today. 43 | description: Feel free to mention why those alternatives don't fully solve the problem. 44 | 45 | - type: dropdown 46 | id: interface 47 | attributes: 48 | label: Which interface would this affect? 49 | default: 0 50 | options: 51 | - "Select one..." 52 | - Web UI 53 | - CLI 54 | - PyPI package 55 | - CLI + PyPI package 56 | - All 57 | validations: 58 | required: true 59 | 60 | - type: dropdown 61 | id: priority 62 | attributes: 63 | label: How important is this to you? 64 | default: 0 65 | options: 66 | - "Select one..." 67 | - Nice to have 68 | - Important 69 | - Critical 70 | validations: 71 | required: true 72 | 73 | - type: dropdown 74 | id: willingness 75 | attributes: 76 | label: Would you like to work on this feature yourself? 77 | default: 0 78 | options: 79 | - "Select one..." 80 | - Yes, I'd like to implement it 81 | - Maybe, if I get some guidance 82 | - No, just requesting (absolutely fine!) 83 | validations: 84 | required: true 85 | 86 | - type: dropdown 87 | id: support_needed 88 | attributes: 89 | label: Would you need support from the maintainers (if you're implementing it yourself)? 90 | default: 0 91 | options: 92 | - "Select one..." 93 | - No, I can handle it solo 94 | - Yes, I'd need some guidance 95 | - Not sure yet 96 | - This is just a suggestion, I'm not planning to implement it myself (absolutely fine!) 97 | 98 | - type: textarea 99 | id: additional_context 100 | attributes: 101 | label: Additional context, screenshots, or examples 102 | placeholder: Add links, sketches, or any other context that would help us understand and implement the feature. 103 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # ─── Python (pip) ───────────────────────────── 4 | - package-ecosystem: "pip" 5 | directory: "/" 6 | schedule: { interval: "weekly" } 7 | labels: [ "dependencies", "pip" ] 8 | groups: # Group patches & minors from dev-only tools 9 | dev-py: 10 | dependency-type: "development" 11 | update-types: ["minor", "patch"] 12 | 13 | # ─── GitHub Actions ─────────────────────────── 14 | - package-ecosystem: "github-actions" 15 | directory: "/" 16 | schedule: { interval: "weekly" } 17 | labels: [ "dependencies", "gh-actions" ] 18 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | test: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | fail-fast: true 17 | matrix: 18 | os: [ubuntu-latest, macos-latest, windows-latest] 19 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] 20 | 21 | include: 22 | - os: ubuntu-latest 23 | python-version: "3.13" 24 | coverage: true 25 | 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - name: Set up Python 30 | uses: actions/setup-python@v5 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | 34 | - name: Locate pip cache 35 | id: pip-cache 36 | shell: bash 37 | run: echo "dir=$(python -m pip cache dir)" >> "$GITHUB_OUTPUT" 38 | 39 | - name: Cache pip 40 | uses: actions/cache@v4 41 | with: 42 | path: ${{ steps.pip-cache.outputs.dir }} 43 | key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} 44 | restore-keys: ${{ runner.os }}-pip- 45 | 46 | - name: Install dependencies 47 | run: | 48 | python -m pip install --upgrade pip 49 | python -m pip install ".[dev]" 50 | 51 | - name: Run tests 52 | if: ${{ matrix.coverage != true }} 53 | run: pytest 54 | 55 | - name: Run tests and collect coverage 56 | if: ${{ matrix.coverage == true }} 57 | run: | 58 | pytest \ 59 | --cov=gitingest \ 60 | --cov=server \ 61 | --cov-branch \ 62 | --cov-report=xml \ 63 | --cov-report=term 64 | 65 | - name: Upload coverage to Codecov 66 | if: ${{ matrix.coverage == true }} 67 | uses: codecov/codecov-action@v5 68 | with: 69 | token: ${{ secrets.CODECOV_TOKEN }} 70 | files: coverage.xml 71 | flags: ${{ matrix.os }}-py${{ matrix.python-version }} 72 | name: codecov-${{ matrix.os }}-${{ matrix.python-version }} 73 | fail_ci_if_error: true 74 | verbose: true 75 | 76 | - name: Run pre-commit hooks 77 | uses: pre-commit/action@v3.0.1 78 | if: ${{ matrix.python-version == '3.13' && matrix.os == 'ubuntu-latest' }} 79 | -------------------------------------------------------------------------------- /.github/workflows/docker_image.yml: -------------------------------------------------------------------------------- 1 | name: Build & Push Container 2 | on: 3 | push: 4 | branches: 5 | - 'main' 6 | tags: 7 | - '*' 8 | merge_group: 9 | pull_request: 10 | types: [labeled, synchronize, reopened, ready_for_review, opened] 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} 14 | cancel-in-progress: true 15 | 16 | env: 17 | REGISTRY: ghcr.io 18 | IMAGE_NAME: ${{ github.repository }} 19 | # Set to 'true' to allow pushing container from pull requests with the label 'push-container' 20 | PUSH_FROM_PR: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'push-container') }} 21 | 22 | jobs: 23 | docker-build: 24 | runs-on: ubuntu-latest 25 | permissions: 26 | contents: read 27 | packages: write 28 | attestations: write 29 | id-token: write 30 | steps: 31 | - uses: actions/checkout@v4 32 | 33 | - name: Set current timestamp 34 | id: vars 35 | run: | 36 | echo "timestamp=$(date +%s)" >> $GITHUB_OUTPUT 37 | echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT 38 | 39 | - name: Log in to the Container registry 40 | uses: docker/login-action@v3 41 | with: 42 | registry: ${{ env.REGISTRY }} 43 | username: ${{ github.actor }} 44 | password: ${{ secrets.GITHUB_TOKEN }} 45 | 46 | - name: Docker Meta 47 | id: meta 48 | uses: docker/metadata-action@v5 49 | with: 50 | images: | 51 | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 52 | flavor: | 53 | latest=false 54 | tags: | 55 | type=ref,event=branch,branch=main 56 | type=ref,event=branch,branch=main,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }} 57 | type=pep440,pattern={{raw}} 58 | type=ref,event=pr,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }} 59 | 60 | - name: Set up QEMU 61 | uses: docker/setup-qemu-action@v3 62 | 63 | - name: Set up Docker Buildx 64 | uses: docker/setup-buildx-action@v3 65 | 66 | - name: Build and push 67 | uses: docker/build-push-action@v6 68 | id: push 69 | with: 70 | context: . 71 | platforms: linux/amd64, linux/arm64 72 | push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }} 73 | tags: ${{ steps.meta.outputs.tags }} 74 | labels: ${{ steps.meta.outputs.labels }} 75 | cache-from: type=gha 76 | cache-to: type=gha,mode=max 77 | 78 | - name: Generate artifact attestation 79 | if: github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' 80 | uses: actions/attest-build-provenance@v2 81 | with: 82 | subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} 83 | subject-digest: ${{ steps.push.outputs.digest }} 84 | push-to-registry: true 85 | -------------------------------------------------------------------------------- /.github/workflows/pr-title-check.yml: -------------------------------------------------------------------------------- 1 | name: PR Conventional Commit Validation 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened, edited] 6 | 7 | jobs: 8 | validate-pr-title: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: PR Conventional Commit Validation 12 | uses: ytanikin/pr-conventional-commits@1.4.1 13 | with: 14 | task_types: '["feat","fix","docs","test","ci","refactor","perf","chore","revert"]' 15 | add_label: 'false' 16 | -------------------------------------------------------------------------------- /.github/workflows/publish_to_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | release: 5 | types: [created] # Run when you click “Publish release” 6 | workflow_dispatch: # ... or run it manually from the Actions tab 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | release-build: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Set up Python 3.13 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: "3.13" 22 | cache: pip 23 | cache-dependency-path: pyproject.toml 24 | 25 | - name: Build package 26 | run: | 27 | python -m pip install --upgrade pip 28 | python -m pip install build twine 29 | twine check dist/* 30 | - name: Upload dist artefact 31 | uses: actions/upload-artifact@v4 32 | with: 33 | name: dist 34 | path: dist/ 35 | 36 | # Publish to PyPI (only if “dist/” succeeded) 37 | pypi-publish: 38 | needs: release-build 39 | runs-on: ubuntu-latest 40 | environment: pypi 41 | 42 | permissions: 43 | id-token: write # OIDC token for trusted publishing 44 | 45 | steps: 46 | - uses: actions/download-artifact@v4 47 | with: 48 | name: dist 49 | path: dist/ 50 | 51 | - uses: pypa/gh-action-pypi-publish@release/v1 52 | with: 53 | verbose: true 54 | -------------------------------------------------------------------------------- /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | name: OSSF Scorecard 2 | on: 3 | branch_protection_rule: 4 | schedule: 5 | - cron: '33 11 * * 2' # Every Tuesday at 11:33 AM UTC 6 | push: 7 | branches: [ main ] 8 | 9 | permissions: read-all 10 | 11 | concurrency: # avoid overlapping runs 12 | group: scorecard-${{ github.ref }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | analysis: 17 | name: Scorecard analysis 18 | runs-on: ubuntu-latest 19 | permissions: 20 | security-events: write # upload SARIF to code-scanning 21 | id-token: write # publish results for the badge 22 | 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 26 | with: 27 | persist-credentials: false 28 | 29 | - name: Run Scorecard 30 | uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde 31 | with: 32 | results_file: results.sarif 33 | results_format: sarif 34 | publish_results: true # enables the public badge 35 | 36 | - name: Upload to code-scanning 37 | uses: github/codeql-action/upload-sarif@v3 38 | with: 39 | sarif_file: results.sarif 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Operating-system 2 | .DS_Store 3 | Thumbs.db 4 | 5 | # Editor / IDE settings 6 | .vscode/ 7 | !.vscode/launch.json 8 | .idea/ 9 | *.swp 10 | 11 | # Python virtual-envs & tooling 12 | .venv*/ 13 | venv/ 14 | .python-version 15 | __pycache__/ 16 | *.egg-info/ 17 | *.egg 18 | .ruff_cache/ 19 | 20 | # Test artifacts & coverage 21 | .pytest_cache/ 22 | .coverage 23 | coverage.xml 24 | htmlcov/ 25 | 26 | # Build, distribution & docs 27 | build/ 28 | dist/ 29 | *.wheel 30 | 31 | 32 | 33 | # Logs & runtime output 34 | *.log 35 | logs/ 36 | *.tmp 37 | tmp/ 38 | 39 | # Project-specific files 40 | history.txt 41 | digest.txt 42 | 43 | # Environment variables 44 | .env 45 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v5.0.0 4 | hooks: 5 | - id: check-added-large-files 6 | description: 'Prevent large files from being committed.' 7 | args: ['--maxkb=10000'] 8 | 9 | - id: check-case-conflict 10 | description: 'Check for files that would conflict in case-insensitive filesystems.' 11 | 12 | - id: fix-byte-order-marker 13 | description: 'Remove utf-8 byte order marker.' 14 | 15 | - id: mixed-line-ending 16 | description: 'Replace mixed line ending.' 17 | 18 | - id: destroyed-symlinks 19 | description: 'Detect symlinks which are changed to regular files with a content of a path which that symlink was pointing to.' 20 | 21 | - id: check-ast 22 | description: 'Check for parseable syntax.' 23 | 24 | - id: end-of-file-fixer 25 | description: 'Ensure that a file is either empty, or ends with one newline.' 26 | 27 | - id: trailing-whitespace 28 | description: 'Trim trailing whitespace.' 29 | 30 | - id: check-docstring-first 31 | description: 'Check a common error of defining a docstring after code.' 32 | 33 | - id: requirements-txt-fixer 34 | description: 'Sort entries in requirements.txt.' 35 | 36 | - repo: https://github.com/MarcoGorelli/absolufy-imports 37 | rev: v0.3.1 38 | hooks: 39 | - id: absolufy-imports 40 | description: 'Automatically convert relative imports to absolute. (Use `args: [--never]` to revert.)' 41 | 42 | - repo: https://github.com/asottile/pyupgrade 43 | rev: v3.20.0 44 | hooks: 45 | - id: pyupgrade 46 | description: 'Automatically upgrade syntax for newer versions.' 47 | args: [--py3-plus, --py36-plus] 48 | 49 | - repo: https://github.com/pre-commit/pygrep-hooks 50 | rev: v1.10.0 51 | hooks: 52 | - id: python-check-blanket-noqa 53 | description: 'Enforce that `# noqa` annotations always occur with specific codes.' 54 | 55 | - id: python-check-blanket-type-ignore 56 | description: 'Enforce that `# type: ignore` annotations always occur with specific codes.' 57 | 58 | - id: python-use-type-annotations 59 | description: 'Enforce that python3.6+ type annotations are used instead of type comments.' 60 | 61 | - repo: https://github.com/PyCQA/isort 62 | rev: 6.0.1 63 | hooks: 64 | - id: isort 65 | description: 'Sort imports alphabetically, and automatically separated into sections and by type.' 66 | 67 | - repo: https://github.com/pre-commit/mirrors-eslint 68 | rev: v9.30.1 69 | hooks: 70 | - id: eslint 71 | description: 'Lint javascript files.' 72 | files: \.js$ 73 | args: [--max-warnings=0, --fix] 74 | additional_dependencies: 75 | [ 76 | 'eslint@9.30.1', 77 | '@eslint/js@9.30.1', 78 | 'eslint-plugin-import@2.32.0', 79 | 'globals@16.3.0', 80 | ] 81 | 82 | - repo: https://github.com/djlint/djLint 83 | rev: v1.36.4 84 | hooks: 85 | - id: djlint-reformat-jinja 86 | 87 | - repo: https://github.com/igorshubovych/markdownlint-cli 88 | rev: v0.45.0 89 | hooks: 90 | - id: markdownlint 91 | description: 'Lint markdown files.' 92 | args: ['--disable=line-length'] 93 | 94 | - repo: https://github.com/astral-sh/ruff-pre-commit 95 | rev: v0.12.2 96 | hooks: 97 | - id: ruff-check 98 | - id: ruff-format 99 | 100 | - repo: https://github.com/jsh9/pydoclint 101 | rev: 0.6.7 102 | hooks: 103 | - id: pydoclint 104 | name: pydoclint for source 105 | args: [--style=numpy] 106 | files: ^src/ 107 | 108 | - repo: https://github.com/pycqa/pylint 109 | rev: v3.3.7 110 | hooks: 111 | - id: pylint 112 | name: pylint for source 113 | files: ^src/ 114 | additional_dependencies: 115 | [ 116 | click>=8.0.0, 117 | 'fastapi[standard]>=0.109.1', 118 | httpx, 119 | pathspec>=0.12.1, 120 | prometheus-client, 121 | pydantic, 122 | pytest-asyncio, 123 | pytest-mock, 124 | python-dotenv, 125 | slowapi, 126 | starlette>=0.40.0, 127 | tiktoken>=0.7.0, 128 | uvicorn>=0.11.7, 129 | ] 130 | 131 | - id: pylint 132 | name: pylint for tests 133 | files: ^tests/ 134 | args: 135 | - --rcfile=tests/.pylintrc 136 | additional_dependencies: 137 | [ 138 | click>=8.0.0, 139 | 'fastapi[standard]>=0.109.1', 140 | httpx, 141 | pathspec>=0.12.1, 142 | prometheus-client, 143 | pydantic, 144 | pytest-asyncio, 145 | pytest-mock, 146 | python-dotenv, 147 | slowapi, 148 | starlette>=0.40.0, 149 | tiktoken>=0.7.0, 150 | uvicorn>=0.11.7, 151 | ] 152 | 153 | - repo: meta 154 | hooks: 155 | - id: check-hooks-apply 156 | - id: check-useless-excludes 157 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "Python Debugger: Module", 5 | "type": "debugpy", 6 | "request": "launch", 7 | "module": "uvicorn", 8 | "args": ["server.main:app", "--host", "0.0.0.0", "--port", "8000"], 9 | "cwd": "${workspaceFolder}/src" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | <romain@coderamp.io>. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), 118 | version 2.0, available at 119 | <https://www.contributor-covenant.org/version/2/0/code_of_conduct.html>. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | <https://www.contributor-covenant.org/faq>. Translations are available at 126 | <https://www.contributor-covenant.org/translations>. 127 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Gitingest 2 | 3 | Thanks for your interest in contributing to **Gitingest** 🚀 Our goal is to keep the codebase friendly to first-time contributors. 4 | If you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK9EC). 5 | 6 | --- 7 | 8 | ## How to Contribute (non-technical) 9 | 10 | - **Create an Issue** – found a bug or have a feature idea? 11 | [Open an issue](https://github.com/coderamp-labs/gitingest/issues/new). 12 | - **Spread the Word** – tweet, blog, or tell a friend. 13 | - **Use Gitingest** – real-world usage gives the best feedback. File issues or ping us on [Discord](https://discord.com/invite/zerRaGK9EC) with anything you notice. 14 | 15 | --- 16 | 17 | ## How to submit a Pull Request 18 | 19 | > **Prerequisites**: The project uses **Python 3.9+** and `pre-commit` for development. 20 | 21 | 1. **Fork** the repository. 22 | 23 | 2. **Clone** your fork: 24 | 25 | ```bash 26 | git clone https://github.com/coderamp-labs/gitingest.git 27 | cd gitingest 28 | ``` 29 | 30 | 3. **Set up the dev environment**: 31 | 32 | ```bash 33 | python -m venv .venv 34 | source .venv/bin/activate 35 | pip install -e ".[dev]" 36 | pre-commit install 37 | ``` 38 | 39 | 4. **Create a branch** for your changes: 40 | 41 | ```bash 42 | git checkout -b your-branch 43 | ``` 44 | 45 | 5. **Make your changes** (and add tests when relevant). 46 | 47 | 6. **Stage** the changes: 48 | 49 | ```bash 50 | git add . 51 | ``` 52 | 53 | 7. **Run the backend test suite**: 54 | 55 | ```bash 56 | pytest 57 | ``` 58 | 59 | 8. *(Optional)* **Run `pre-commit` on all files** to check hooks without committing: 60 | 61 | ```bash 62 | pre-commit run --all-files 63 | ``` 64 | 65 | 9. **Run the local server** to sanity-check: 66 | 67 | ```bash 68 | cd src 69 | uvicorn server.main:app 70 | ``` 71 | 72 | Open [http://localhost:8000](http://localhost:8000) to confirm everything works. 73 | 74 | 10. **Commit** (signed): 75 | 76 | ```bash 77 | git commit -S -m "Your commit message" 78 | ``` 79 | 80 | If *pre-commit* complains, fix the problems and repeat **5 – 9**. 81 | 82 | 11. **Push** your branch: 83 | 84 | ```bash 85 | git push origin your-branch 86 | ``` 87 | 88 | 12. **Open a pull request** on GitHub with a clear description. 89 | 90 | 13. **Iterate** on any review feedback—update your branch and repeat **6 – 11** as needed. 91 | 92 | *(Optional) Invite a maintainer to your branch for easier collaboration.* 93 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Stage 1: Install Python dependencies 2 | FROM python:3.13-slim AS python-builder 3 | 4 | WORKDIR /build 5 | 6 | RUN set -eux; \ 7 | apt-get update; \ 8 | apt-get install -y --no-install-recommends gcc python3-dev; \ 9 | rm -rf /var/lib/apt/lists/* 10 | 11 | COPY pyproject.toml . 12 | COPY src/ ./src/ 13 | 14 | RUN set -eux; \ 15 | pip install --no-cache-dir --upgrade pip; \ 16 | pip install --no-cache-dir --timeout 1000 . 17 | 18 | # Stage 2: Runtime image 19 | FROM python:3.13-slim 20 | 21 | ARG UID=1000 22 | ARG GID=1000 23 | 24 | ENV PYTHONUNBUFFERED=1 \ 25 | PYTHONDONTWRITEBYTECODE=1 26 | 27 | RUN set -eux; \ 28 | apt-get update; \ 29 | apt-get install -y --no-install-recommends git curl; \ 30 | apt-get clean; \ 31 | rm -rf /var/lib/apt/lists/* 32 | 33 | WORKDIR /app 34 | RUN set -eux; \ 35 | groupadd -g "$GID" appuser; \ 36 | useradd -m -u "$UID" -g "$GID" appuser 37 | 38 | COPY --from=python-builder --chown=$UID:$GID /usr/local/lib/python3.13/site-packages/ /usr/local/lib/python3.13/site-packages/ 39 | COPY --chown=$UID:$GID src/ ./ 40 | 41 | RUN set -eux; \ 42 | chown -R appuser:appuser /app 43 | USER appuser 44 | 45 | EXPOSE 8000 46 | EXPOSE 9090 47 | CMD ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"] 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Romain Courtois 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | If you have discovered a vulnerability inside the project, report it privately at <romain@coderamp.io>. This way the maintainer can work on a proper fix without disclosing the problem to the public before it has been solved. 6 | -------------------------------------------------------------------------------- /docs/frontpage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/docs/frontpage.png -------------------------------------------------------------------------------- /eslint.config.cjs: -------------------------------------------------------------------------------- 1 | const js = require('@eslint/js'); 2 | const globals = require('globals'); 3 | const importPlugin = require('eslint-plugin-import'); 4 | 5 | module.exports = [ 6 | js.configs.recommended, 7 | 8 | { 9 | files: ['src/static/js/**/*.js'], 10 | 11 | languageOptions: { 12 | parserOptions: { ecmaVersion: 2021, sourceType: 'module' }, 13 | globals: { 14 | ...globals.browser, 15 | changePattern: 'readonly', 16 | copyFullDigest: 'readonly', 17 | copyText: 'readonly', 18 | downloadFullDigest: 'readonly', 19 | handleSubmit: 'readonly', 20 | posthog: 'readonly', 21 | submitExample: 'readonly', 22 | toggleAccessSettings: 'readonly', 23 | toggleFile: 'readonly', 24 | }, 25 | }, 26 | 27 | plugins: { import: importPlugin }, 28 | 29 | rules: { 30 | // Import hygiene (eslint-plugin-import) 31 | 'import/no-extraneous-dependencies': 'error', 32 | 'import/no-unresolved': 'error', 33 | 'import/order': ['warn', { alphabetize: { order: 'asc' } }], 34 | 35 | // Safety & bug-catchers 36 | 'consistent-return': 'error', 37 | 'default-case': 'error', 38 | 'no-implicit-globals': 'error', 39 | 'no-shadow': 'error', 40 | 41 | // Maintainability / complexity 42 | complexity: ['warn', 10], 43 | 'max-depth': ['warn', 4], 44 | 'max-lines': ['warn', 500], 45 | 'max-params': ['warn', 5], 46 | 47 | // Stylistic consistency (auto-fixable) 48 | 'arrow-parens': ['error', 'always'], 49 | curly: ['error', 'all'], 50 | indent: ['error', 4, { SwitchCase: 2 }], 51 | 'newline-per-chained-call': ['warn', { ignoreChainWithDepth: 2 }], 52 | 'no-multi-spaces': 'error', 53 | 'object-shorthand': ['error', 'always'], 54 | 'padding-line-between-statements': [ 55 | 'warn', 56 | { blankLine: 'always', prev: '*', next: 'return' }, 57 | { blankLine: 'always', prev: ['const', 'let', 'var'], next: '*' }, 58 | { blankLine: 'any', prev: ['const', 'let', 'var'], next: ['const', 'let', 'var'] }, 59 | ], 60 | 'quote-props': ['error', 'consistent-as-needed'], 61 | quotes: ['error', 'single', { avoidEscape: true }], 62 | semi: 'error', 63 | 64 | // Modern / performance tips 65 | 'arrow-body-style': ['warn', 'as-needed'], 66 | 'prefer-arrow-callback': 'error', 67 | 'prefer-exponentiation-operator': 'error', 68 | 'prefer-numeric-literals': 'error', 69 | 'prefer-object-has-own': 'warn', 70 | 'prefer-object-spread': 'error', 71 | 'prefer-template': 'error', 72 | }, 73 | }, 74 | ]; 75 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "gitingest" 3 | version = "0.1.5" 4 | description="CLI tool to analyze and create text dumps of codebases for LLMs" 5 | readme = {file = "README.md", content-type = "text/markdown" } 6 | requires-python = ">= 3.8" 7 | dependencies = [ 8 | "click>=8.0.0", 9 | "fastapi[standard]>=0.109.1", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38) 10 | "httpx", 11 | "pathspec>=0.12.1", 12 | "pydantic", 13 | "python-dotenv", 14 | "slowapi", 15 | "starlette>=0.40.0", # Minimum safe release (https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw) 16 | "tiktoken>=0.7.0", # Support for o200k_base encoding 17 | "typing_extensions>= 4.0.0; python_version < '3.10'", 18 | "uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150) 19 | "prometheus-client", 20 | ] 21 | 22 | license = {file = "LICENSE"} 23 | authors = [ 24 | { name = "Romain Courtois", email = "romain@coderamp.io" }, 25 | { name = "Filip Christiansen"}, 26 | ] 27 | classifiers=[ 28 | "Development Status :: 3 - Alpha", 29 | "Intended Audience :: Developers", 30 | "License :: OSI Approved :: MIT License", 31 | "Programming Language :: Python :: 3.8", 32 | "Programming Language :: Python :: 3.9", 33 | "Programming Language :: Python :: 3.10", 34 | "Programming Language :: Python :: 3.11", 35 | "Programming Language :: Python :: 3.12", 36 | "Programming Language :: Python :: 3.13", 37 | ] 38 | 39 | [project.optional-dependencies] 40 | dev = [ 41 | "eval-type-backport", 42 | "pre-commit", 43 | "pytest", 44 | "pytest-asyncio", 45 | "pytest-cov", 46 | "pytest-mock", 47 | ] 48 | 49 | [project.scripts] 50 | gitingest = "gitingest.__main__:main" 51 | 52 | [project.urls] 53 | homepage = "https://gitingest.com" 54 | github = "https://github.com/coderamp-labs/gitingest" 55 | 56 | [build-system] 57 | requires = ["setuptools>=61.0", "wheel"] 58 | build-backend = "setuptools.build_meta" 59 | 60 | [tool.setuptools] 61 | packages = {find = {where = ["src"]}} 62 | include-package-data = true 63 | 64 | # Linting configuration 65 | [tool.pylint.format] 66 | max-line-length = 119 67 | 68 | [tool.pylint.'MESSAGES CONTROL'] 69 | disable = [ 70 | "too-many-arguments", 71 | "too-many-positional-arguments", 72 | "too-many-locals", 73 | "too-few-public-methods", 74 | "broad-exception-caught", 75 | "duplicate-code", 76 | "fixme", 77 | ] 78 | 79 | [tool.ruff] 80 | line-length = 119 81 | fix = true 82 | 83 | [tool.ruff.lint] 84 | select = ["ALL"] 85 | ignore = [ # https://docs.astral.sh/ruff/rules/... 86 | "D107", # undocumented-public-init 87 | "FIX002", # line-contains-todo 88 | "TD002", # missing-todo-author 89 | "PLR0913", # too-many-arguments, 90 | 91 | # TODO: fix the following issues: 92 | "TD003", # missing-todo-link, TODO: add issue links 93 | "T201", # print, TODO: replace with logging 94 | "S108", # hardcoded-temp-file, TODO: replace with tempfile 95 | "BLE001", # blind-except, TODO: replace with specific exceptions 96 | "FAST003", # fast-api-unused-path-parameter, TODO: fix 97 | ] 98 | per-file-ignores = { "tests/**/*.py" = ["S101"] } # Skip the "assert used" warning 99 | 100 | [tool.ruff.lint.pylint] 101 | max-returns = 10 102 | 103 | [tool.ruff.lint.isort] 104 | order-by-type = true 105 | case-sensitive = true 106 | 107 | [tool.pycln] 108 | all = true 109 | 110 | # TODO: Remove this once we figure out how to use ruff-isort 111 | [tool.isort] 112 | profile = "black" 113 | line_length = 119 114 | remove_redundant_aliases = true 115 | float_to_top = true # https://github.com/astral-sh/ruff/issues/6514 116 | order_by_type = true 117 | filter_files = true 118 | 119 | # Test configuration 120 | [tool.pytest.ini_options] 121 | pythonpath = ["src"] 122 | testpaths = ["tests/"] 123 | python_files = "test_*.py" 124 | asyncio_mode = "auto" 125 | asyncio_default_fixture_loop_scope = "function" 126 | python_classes = "Test*" 127 | python_functions = "test_*" 128 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | eval-type-backport 3 | pre-commit 4 | pytest 5 | pytest-asyncio 6 | pytest-cov 7 | pytest-mock 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click>=8.0.0 2 | fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38 3 | httpx 4 | pathspec>=0.12.1 5 | prometheus-client 6 | pydantic 7 | python-dotenv 8 | sentry-sdk[fastapi] 9 | slowapi 10 | starlette>=0.40.0 # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw 11 | tiktoken>=0.7.0 # Support for o200k_base encoding 12 | uvicorn>=0.11.7 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150 13 | -------------------------------------------------------------------------------- /src/gitingest/__init__.py: -------------------------------------------------------------------------------- 1 | """Gitingest: A package for ingesting data from Git repositories.""" 2 | 3 | from gitingest.clone import clone_repo 4 | from gitingest.entrypoint import ingest, ingest_async 5 | from gitingest.ingestion import ingest_query 6 | from gitingest.query_parser import parse_query 7 | 8 | __all__ = ["clone_repo", "ingest", "ingest_async", "ingest_query", "parse_query"] 9 | -------------------------------------------------------------------------------- /src/gitingest/__main__.py: -------------------------------------------------------------------------------- 1 | """Command-line interface (CLI) for Gitingest.""" 2 | 3 | # pylint: disable=no-value-for-parameter 4 | from __future__ import annotations 5 | 6 | import asyncio 7 | from typing import TypedDict 8 | 9 | import click 10 | from typing_extensions import Unpack 11 | 12 | from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME 13 | from gitingest.entrypoint import ingest_async 14 | 15 | 16 | class _CLIArgs(TypedDict): 17 | source: str 18 | max_size: int 19 | exclude_pattern: tuple[str, ...] 20 | include_pattern: tuple[str, ...] 21 | branch: str | None 22 | include_gitignored: bool 23 | include_submodules: bool 24 | token: str | None 25 | output: str | None 26 | 27 | 28 | @click.command() 29 | @click.argument("source", type=str, default=".") 30 | @click.option( 31 | "--max-size", 32 | "-s", 33 | default=MAX_FILE_SIZE, 34 | show_default=True, 35 | help="Maximum file size to process in bytes", 36 | ) 37 | @click.option("--exclude-pattern", "-e", multiple=True, help="Shell-style patterns to exclude.") 38 | @click.option( 39 | "--include-pattern", 40 | "-i", 41 | multiple=True, 42 | help="Shell-style patterns to include.", 43 | ) 44 | @click.option("--branch", "-b", default=None, help="Branch to clone and ingest") 45 | @click.option( 46 | "--include-gitignored", 47 | is_flag=True, 48 | default=False, 49 | help="Include files matched by .gitignore and .gitingestignore", 50 | ) 51 | @click.option( 52 | "--include-submodules", 53 | is_flag=True, 54 | help="Include repository's submodules in the analysis", 55 | default=False, 56 | ) 57 | @click.option( 58 | "--token", 59 | "-t", 60 | envvar="GITHUB_TOKEN", 61 | default=None, 62 | help=( 63 | "GitHub personal access token (PAT) for accessing private repositories. " 64 | "If omitted, the CLI will look for the GITHUB_TOKEN environment variable." 65 | ), 66 | ) 67 | @click.option( 68 | "--output", 69 | "-o", 70 | default=None, 71 | help="Output file path (default: digest.txt in current directory). Use '-' for stdout.", 72 | ) 73 | def main(**cli_kwargs: Unpack[_CLIArgs]) -> None: 74 | """Run the CLI entry point to analyze a repo / directory and dump its contents. 75 | 76 | Parameters 77 | ---------- 78 | **cli_kwargs : Unpack[_CLIArgs] 79 | A dictionary of keyword arguments forwarded to ``ingest_async``. 80 | 81 | Notes 82 | ----- 83 | See ``ingest_async`` for a detailed description of each argument. 84 | 85 | Examples 86 | -------- 87 | Basic usage: 88 | $ gitingest 89 | $ gitingest /path/to/repo 90 | $ gitingest https://github.com/user/repo 91 | 92 | Output to stdout: 93 | $ gitingest -o - 94 | $ gitingest https://github.com/user/repo --output - 95 | 96 | With filtering: 97 | $ gitingest -i "*.py" -e "*.log" 98 | $ gitingest --include-pattern "*.js" --exclude-pattern "node_modules/*" 99 | 100 | Private repositories: 101 | $ gitingest https://github.com/user/private-repo -t ghp_token 102 | $ GITHUB_TOKEN=ghp_token gitingest https://github.com/user/private-repo 103 | 104 | Include submodules: 105 | $ gitingest https://github.com/user/repo --include-submodules 106 | 107 | """ 108 | asyncio.run(_async_main(**cli_kwargs)) 109 | 110 | 111 | async def _async_main( 112 | source: str, 113 | *, 114 | max_size: int = MAX_FILE_SIZE, 115 | exclude_pattern: tuple[str, ...] | None = None, 116 | include_pattern: tuple[str, ...] | None = None, 117 | branch: str | None = None, 118 | include_gitignored: bool = False, 119 | include_submodules: bool = False, 120 | token: str | None = None, 121 | output: str | None = None, 122 | ) -> None: 123 | """Analyze a directory or repository and create a text dump of its contents. 124 | 125 | This command scans the specified ``source`` (a local directory or Git repo), 126 | applies custom include and exclude patterns, and generates a text summary of 127 | the analysis. The summary is written to an output file or printed to ``stdout``. 128 | 129 | Parameters 130 | ---------- 131 | source : str 132 | A directory path or a Git repository URL. 133 | max_size : int 134 | Maximum file size in bytes to ingest (default: 10 MB). 135 | exclude_pattern : tuple[str, ...] | None 136 | Glob patterns for pruning the file set. 137 | include_pattern : tuple[str, ...] | None 138 | Glob patterns for including files in the output. 139 | branch : str | None 140 | Git branch to ingest. If ``None``, the repository's default branch is used. 141 | include_gitignored : bool 142 | If ``True``, also ingest files matched by ``.gitignore`` or ``.gitingestignore`` (default: ``False``). 143 | include_submodules : bool 144 | If ``True``, recursively include all Git submodules within the repository (default: ``False``). 145 | token : str | None 146 | GitHub personal access token (PAT) for accessing private repositories. 147 | Can also be set via the ``GITHUB_TOKEN`` environment variable. 148 | output : str | None 149 | The path where the output file will be written (default: ``digest.txt`` in current directory). 150 | Use ``"-"`` to write to ``stdout``. 151 | 152 | Raises 153 | ------ 154 | click.Abort 155 | Raised if an error occurs during execution and the command must be aborted. 156 | 157 | """ 158 | try: 159 | # Normalise pattern containers (the ingest layer expects sets) 160 | exclude_patterns = set(exclude_pattern) if exclude_pattern else set() 161 | include_patterns = set(include_pattern) if include_pattern else set() 162 | 163 | output_target = output if output is not None else OUTPUT_FILE_NAME 164 | 165 | if output_target == "-": 166 | click.echo("Analyzing source, preparing output for stdout...", err=True) 167 | else: 168 | click.echo(f"Analyzing source, output will be written to '{output_target}'...", err=True) 169 | 170 | summary, _, _ = await ingest_async( 171 | source, 172 | max_file_size=max_size, 173 | include_patterns=include_patterns, 174 | exclude_patterns=exclude_patterns, 175 | branch=branch, 176 | include_gitignored=include_gitignored, 177 | include_submodules=include_submodules, 178 | token=token, 179 | output=output_target, 180 | ) 181 | except Exception as exc: 182 | # Convert any exception into Click.Abort so that exit status is non-zero 183 | click.echo(f"Error: {exc}", err=True) 184 | raise click.Abort from exc 185 | 186 | if output_target == "-": # stdout 187 | click.echo("\n--- Summary ---", err=True) 188 | click.echo(summary, err=True) 189 | click.echo("--- End Summary ---", err=True) 190 | click.echo("Analysis complete! Output sent to stdout.", err=True) 191 | else: # file 192 | click.echo(f"Analysis complete! Output written to: {output_target}") 193 | click.echo("\nSummary:") 194 | click.echo(summary) 195 | 196 | 197 | if __name__ == "__main__": 198 | main() 199 | -------------------------------------------------------------------------------- /src/gitingest/clone.py: -------------------------------------------------------------------------------- 1 | """Module containing functions for cloning a Git repository to a local path.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import TYPE_CHECKING 7 | 8 | from gitingest.config import DEFAULT_TIMEOUT 9 | from gitingest.utils.git_utils import ( 10 | check_repo_exists, 11 | create_git_auth_header, 12 | create_git_command, 13 | ensure_git_installed, 14 | is_github_host, 15 | run_command, 16 | ) 17 | from gitingest.utils.os_utils import ensure_directory 18 | from gitingest.utils.timeout_wrapper import async_timeout 19 | 20 | if TYPE_CHECKING: 21 | from gitingest.schemas import CloneConfig 22 | 23 | 24 | @async_timeout(DEFAULT_TIMEOUT) 25 | async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None: 26 | """Clone a repository to a local path based on the provided configuration. 27 | 28 | This function handles the process of cloning a Git repository to the local file system. 29 | It can clone a specific branch, tag, or commit if provided, and it raises exceptions if 30 | any errors occur during the cloning process. 31 | 32 | Parameters 33 | ---------- 34 | config : CloneConfig 35 | The configuration for cloning the repository. 36 | token : str | None 37 | GitHub personal access token (PAT) for accessing private repositories. 38 | 39 | Raises 40 | ------ 41 | ValueError 42 | If the repository is not found, if the provided URL is invalid, or if the token format is invalid. 43 | 44 | """ 45 | # Extract and validate query parameters 46 | url: str = config.url 47 | local_path: str = config.local_path 48 | commit: str | None = config.commit 49 | branch: str | None = config.branch 50 | tag: str | None = config.tag 51 | partial_clone: bool = config.subpath != "/" 52 | 53 | # Create parent directory if it doesn't exist 54 | await ensure_directory(Path(local_path).parent) 55 | 56 | # Check if the repository exists 57 | if not await check_repo_exists(url, token=token): 58 | msg = "Repository not found. Make sure it is public or that you have provided a valid token." 59 | raise ValueError(msg) 60 | 61 | clone_cmd = ["git"] 62 | if token and is_github_host(url): 63 | clone_cmd += ["-c", create_git_auth_header(token, url=url)] 64 | 65 | clone_cmd += ["clone", "--single-branch"] 66 | 67 | if config.include_submodules: 68 | clone_cmd += ["--recurse-submodules"] 69 | 70 | if partial_clone: 71 | clone_cmd += ["--filter=blob:none", "--sparse"] 72 | 73 | # Shallow clone unless a specific commit is requested 74 | if not commit: 75 | clone_cmd += ["--depth=1"] 76 | 77 | # Prefer tag over branch when both are provided 78 | if tag: 79 | clone_cmd += ["--branch", tag] 80 | elif branch and branch.lower() not in ("main", "master"): 81 | clone_cmd += ["--branch", branch] 82 | 83 | clone_cmd += [url, local_path] 84 | 85 | # Clone the repository 86 | await ensure_git_installed() 87 | await run_command(*clone_cmd) 88 | 89 | # Checkout the subpath if it is a partial clone 90 | if partial_clone: 91 | await _checkout_partial_clone(config, token) 92 | 93 | # Checkout the commit if it is provided 94 | if commit: 95 | checkout_cmd = create_git_command(["git"], local_path, url, token) 96 | await run_command(*checkout_cmd, "checkout", commit) 97 | 98 | 99 | async def _checkout_partial_clone(config: CloneConfig, token: str | None) -> None: 100 | """Configure sparse-checkout for a partially cloned repository. 101 | 102 | Parameters 103 | ---------- 104 | config : CloneConfig 105 | The configuration for cloning the repository, including subpath and blob flag. 106 | token : str | None 107 | GitHub personal access token (PAT) for accessing private repositories. 108 | 109 | """ 110 | subpath = config.subpath.lstrip("/") 111 | if config.blob: 112 | # Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt) 113 | subpath = str(Path(subpath).parent.as_posix()) 114 | checkout_cmd = create_git_command(["git"], config.local_path, config.url, token) 115 | await run_command(*checkout_cmd, "sparse-checkout", "set", subpath) 116 | -------------------------------------------------------------------------------- /src/gitingest/config.py: -------------------------------------------------------------------------------- 1 | """Configuration file for the project.""" 2 | 3 | import tempfile 4 | from pathlib import Path 5 | 6 | MAX_FILE_SIZE = 10 * 1024 * 1024 # Maximum size of a single file to process (10 MB) 7 | MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal 8 | MAX_FILES = 10_000 # Maximum number of files to process 9 | MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # Maximum size of output file (500 MB) 10 | DEFAULT_TIMEOUT = 60 # seconds 11 | 12 | OUTPUT_FILE_NAME = "digest.txt" 13 | 14 | TMP_BASE_PATH = Path(tempfile.gettempdir()) / "gitingest" 15 | -------------------------------------------------------------------------------- /src/gitingest/output_formatter.py: -------------------------------------------------------------------------------- 1 | """Functions to ingest and analyze a codebase directory or single file.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import tiktoken 8 | 9 | from gitingest.schemas import FileSystemNode, FileSystemNodeType 10 | from gitingest.utils.compat_func import readlink 11 | 12 | if TYPE_CHECKING: 13 | from gitingest.query_parser import IngestionQuery 14 | 15 | _TOKEN_THRESHOLDS: list[tuple[int, str]] = [ 16 | (1_000_000, "M"), 17 | (1_000, "k"), 18 | ] 19 | 20 | 21 | def format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str, str]: 22 | """Generate a summary, directory structure, and file contents for a given file system node. 23 | 24 | If the node represents a directory, the function will recursively process its contents. 25 | 26 | Parameters 27 | ---------- 28 | node : FileSystemNode 29 | The file system node to be summarized. 30 | query : IngestionQuery 31 | The parsed query object containing information about the repository and query parameters. 32 | 33 | Returns 34 | ------- 35 | tuple[str, str, str] 36 | A tuple containing the summary, directory structure, and file contents. 37 | 38 | """ 39 | is_single_file = node.type == FileSystemNodeType.FILE 40 | summary = _create_summary_prefix(query, single_file=is_single_file) 41 | 42 | if node.type == FileSystemNodeType.DIRECTORY: 43 | summary += f"Files analyzed: {node.file_count}\n" 44 | elif node.type == FileSystemNodeType.FILE: 45 | summary += f"File: {node.name}\n" 46 | summary += f"Lines: {len(node.content.splitlines()):,}\n" 47 | 48 | tree = "Directory structure:\n" + _create_tree_structure(query, node=node) 49 | 50 | content = _gather_file_contents(node) 51 | 52 | token_estimate = _format_token_count(tree + content) 53 | if token_estimate: 54 | summary += f"\nEstimated tokens: {token_estimate}" 55 | 56 | return summary, tree, content 57 | 58 | 59 | def _create_summary_prefix(query: IngestionQuery, *, single_file: bool = False) -> str: 60 | """Create a prefix string for summarizing a repository or local directory. 61 | 62 | Includes repository name (if provided), commit/branch details, and subpath if relevant. 63 | 64 | Parameters 65 | ---------- 66 | query : IngestionQuery 67 | The parsed query object containing information about the repository and query parameters. 68 | single_file : bool 69 | A flag indicating whether the summary is for a single file (default: ``False``). 70 | 71 | Returns 72 | ------- 73 | str 74 | A summary prefix string containing repository, commit, branch, and subpath details. 75 | 76 | """ 77 | parts = [] 78 | 79 | if query.user_name: 80 | parts.append(f"Repository: {query.user_name}/{query.repo_name}") 81 | else: 82 | # Local scenario 83 | parts.append(f"Directory: {query.slug}") 84 | 85 | if query.commit: 86 | parts.append(f"Commit: {query.commit}") 87 | elif query.branch and query.branch not in ("main", "master"): 88 | parts.append(f"Branch: {query.branch}") 89 | 90 | if query.subpath != "/" and not single_file: 91 | parts.append(f"Subpath: {query.subpath}") 92 | 93 | return "\n".join(parts) + "\n" 94 | 95 | 96 | def _gather_file_contents(node: FileSystemNode) -> str: 97 | """Recursively gather contents of all files under the given node. 98 | 99 | This function recursively processes a directory node and gathers the contents of all files 100 | under that node. It returns the concatenated content of all files as a single string. 101 | 102 | Parameters 103 | ---------- 104 | node : FileSystemNode 105 | The current directory or file node being processed. 106 | 107 | Returns 108 | ------- 109 | str 110 | The concatenated content of all files under the given node. 111 | 112 | """ 113 | if node.type != FileSystemNodeType.DIRECTORY: 114 | return node.content_string 115 | 116 | # Recursively gather contents of all files under the current directory 117 | return "\n".join(_gather_file_contents(child) for child in node.children) 118 | 119 | 120 | def _create_tree_structure( 121 | query: IngestionQuery, 122 | *, 123 | node: FileSystemNode, 124 | prefix: str = "", 125 | is_last: bool = True, 126 | ) -> str: 127 | """Generate a tree-like string representation of the file structure. 128 | 129 | This function generates a string representation of the directory structure, formatted 130 | as a tree with appropriate indentation for nested directories and files. 131 | 132 | Parameters 133 | ---------- 134 | query : IngestionQuery 135 | The parsed query object containing information about the repository and query parameters. 136 | node : FileSystemNode 137 | The current directory or file node being processed. 138 | prefix : str 139 | A string used for indentation and formatting of the tree structure (default: ``""``). 140 | is_last : bool 141 | A flag indicating whether the current node is the last in its directory (default: ``True``). 142 | 143 | Returns 144 | ------- 145 | str 146 | A string representing the directory structure formatted as a tree. 147 | 148 | """ 149 | if not node.name: 150 | # If no name is present, use the slug as the top-level directory name 151 | node.name = query.slug 152 | 153 | tree_str = "" 154 | current_prefix = "└── " if is_last else "├── " 155 | 156 | # Indicate directories with a trailing slash 157 | display_name = node.name 158 | if node.type == FileSystemNodeType.DIRECTORY: 159 | display_name += "/" 160 | elif node.type == FileSystemNodeType.SYMLINK: 161 | display_name += " -> " + readlink(node.path).name 162 | 163 | tree_str += f"{prefix}{current_prefix}{display_name}\n" 164 | 165 | if node.type == FileSystemNodeType.DIRECTORY and node.children: 166 | prefix += " " if is_last else "│ " 167 | for i, child in enumerate(node.children): 168 | tree_str += _create_tree_structure(query, node=child, prefix=prefix, is_last=i == len(node.children) - 1) 169 | return tree_str 170 | 171 | 172 | def _format_token_count(text: str) -> str | None: 173 | """Return a human-readable token-count string (e.g. 1.2k, 1.2 M). 174 | 175 | Parameters 176 | ---------- 177 | text : str 178 | The text string for which the token count is to be estimated. 179 | 180 | Returns 181 | ------- 182 | str | None 183 | The formatted number of tokens as a string (e.g., ``"1.2k"``, ``"1.2M"``), or ``None`` if an error occurs. 184 | 185 | """ 186 | try: 187 | encoding = tiktoken.get_encoding("o200k_base") # gpt-4o, gpt-4o-mini 188 | total_tokens = len(encoding.encode(text, disallowed_special=())) 189 | except (ValueError, UnicodeEncodeError) as exc: 190 | print(exc) 191 | return None 192 | 193 | for threshold, suffix in _TOKEN_THRESHOLDS: 194 | if total_tokens >= threshold: 195 | return f"{total_tokens / threshold:.1f}{suffix}" 196 | 197 | return str(total_tokens) 198 | -------------------------------------------------------------------------------- /src/gitingest/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | """Module containing the schemas for the Gitingest package.""" 2 | 3 | from gitingest.schemas.filesystem import FileSystemNode, FileSystemNodeType, FileSystemStats 4 | from gitingest.schemas.ingestion import CloneConfig, IngestionQuery 5 | 6 | __all__ = ["CloneConfig", "FileSystemNode", "FileSystemNodeType", "FileSystemStats", "IngestionQuery"] 7 | -------------------------------------------------------------------------------- /src/gitingest/schemas/filesystem.py: -------------------------------------------------------------------------------- 1 | """Define the schema for the filesystem representation.""" 2 | 3 | from __future__ import annotations 4 | 5 | import os 6 | from dataclasses import dataclass, field 7 | from enum import Enum, auto 8 | from typing import TYPE_CHECKING 9 | 10 | from gitingest.utils.compat_func import readlink 11 | from gitingest.utils.file_utils import _decodes, _get_preferred_encodings, _read_chunk 12 | from gitingest.utils.notebook import process_notebook 13 | 14 | if TYPE_CHECKING: 15 | from pathlib import Path 16 | 17 | SEPARATOR = "=" * 48 # Tiktoken, the tokenizer openai uses, counts 2 tokens if we have more than 48 18 | 19 | 20 | class FileSystemNodeType(Enum): 21 | """Enum representing the type of a file system node (directory or file).""" 22 | 23 | DIRECTORY = auto() 24 | FILE = auto() 25 | SYMLINK = auto() 26 | 27 | 28 | @dataclass 29 | class FileSystemStats: 30 | """Class for tracking statistics during file system traversal.""" 31 | 32 | total_files: int = 0 33 | total_size: int = 0 34 | 35 | 36 | @dataclass 37 | class FileSystemNode: # pylint: disable=too-many-instance-attributes 38 | """Class representing a node in the file system (either a file or directory). 39 | 40 | Tracks properties of files/directories for comprehensive analysis. 41 | """ 42 | 43 | name: str 44 | type: FileSystemNodeType 45 | path_str: str 46 | path: Path 47 | size: int = 0 48 | file_count: int = 0 49 | dir_count: int = 0 50 | depth: int = 0 51 | children: list[FileSystemNode] = field(default_factory=list) 52 | 53 | def sort_children(self) -> None: 54 | """Sort the children nodes of a directory according to a specific order. 55 | 56 | Order of sorting: 57 | 2. Regular files (not starting with dot) 58 | 3. Hidden files (starting with dot) 59 | 4. Regular directories (not starting with dot) 60 | 5. Hidden directories (starting with dot) 61 | 62 | All groups are sorted alphanumerically within themselves. 63 | 64 | Raises 65 | ------ 66 | ValueError 67 | If the node is not a directory. 68 | 69 | """ 70 | if self.type != FileSystemNodeType.DIRECTORY: 71 | msg = "Cannot sort children of a non-directory node" 72 | raise ValueError(msg) 73 | 74 | def _sort_key(child: FileSystemNode) -> tuple[int, str]: 75 | # returns the priority order for the sort function, 0 is first 76 | # Groups: 0=README, 1=regular file, 2=hidden file, 3=regular dir, 4=hidden dir 77 | name = child.name.lower() 78 | if child.type == FileSystemNodeType.FILE: 79 | if name == "readme" or name.startswith("readme."): 80 | return (0, name) 81 | return (1 if not name.startswith(".") else 2, name) 82 | return (3 if not name.startswith(".") else 4, name) 83 | 84 | self.children.sort(key=_sort_key) 85 | 86 | @property 87 | def content_string(self) -> str: 88 | """Return the content of the node as a string, including path and content. 89 | 90 | Returns 91 | ------- 92 | str 93 | A string representation of the node's content. 94 | 95 | """ 96 | parts = [ 97 | SEPARATOR, 98 | f"{self.type.name}: {str(self.path_str).replace(os.sep, '/')}" 99 | + (f" -> {readlink(self.path).name}" if self.type == FileSystemNodeType.SYMLINK else ""), 100 | SEPARATOR, 101 | f"{self.content}", 102 | ] 103 | 104 | return "\n".join(parts) + "\n\n" 105 | 106 | @property 107 | def content(self) -> str: # pylint: disable=too-many-return-statements 108 | """Return file content (if text / notebook) or an explanatory placeholder. 109 | 110 | Heuristically decides whether the file is text or binary by decoding a small chunk of the file 111 | with multiple encodings and checking for common binary markers. 112 | 113 | Returns 114 | ------- 115 | str 116 | The content of the file, or an error message if the file could not be read. 117 | 118 | Raises 119 | ------ 120 | ValueError 121 | If the node is a directory. 122 | 123 | """ 124 | if self.type == FileSystemNodeType.DIRECTORY: 125 | msg = "Cannot read content of a directory node" 126 | raise ValueError(msg) 127 | 128 | if self.type == FileSystemNodeType.SYMLINK: 129 | return "" # TODO: are we including the empty content of symlinks? 130 | 131 | if self.path.suffix == ".ipynb": # Notebook 132 | try: 133 | return process_notebook(self.path) 134 | except Exception as exc: 135 | return f"Error processing notebook: {exc}" 136 | 137 | chunk = _read_chunk(self.path) 138 | 139 | if chunk is None: 140 | return "Error reading file" 141 | 142 | if chunk == b"": 143 | return "[Empty file]" 144 | 145 | if not _decodes(chunk, "utf-8"): 146 | return "[Binary file]" 147 | 148 | # Find the first encoding that decodes the sample 149 | good_enc: str | None = next( 150 | (enc for enc in _get_preferred_encodings() if _decodes(chunk, encoding=enc)), 151 | None, 152 | ) 153 | 154 | if good_enc is None: 155 | return "Error: Unable to decode file with available encodings" 156 | 157 | try: 158 | with self.path.open(encoding=good_enc) as fp: 159 | return fp.read() 160 | except (OSError, UnicodeDecodeError) as exc: 161 | return f"Error reading file with {good_enc!r}: {exc}" 162 | -------------------------------------------------------------------------------- /src/gitingest/schemas/ingestion.py: -------------------------------------------------------------------------------- 1 | """Module containing the dataclasses for the ingestion process.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass 6 | from pathlib import Path # noqa: TC003 (typing-only-standard-library-import) needed for type checking (pydantic) 7 | 8 | from pydantic import BaseModel, Field 9 | 10 | from gitingest.config import MAX_FILE_SIZE 11 | 12 | 13 | @dataclass 14 | class CloneConfig: # pylint: disable=too-many-instance-attributes 15 | """Configuration for cloning a Git repository. 16 | 17 | This class holds the necessary parameters for cloning a repository to a local path, including 18 | the repository's URL, the target local path, and optional parameters for a specific commit or branch. 19 | 20 | Attributes 21 | ---------- 22 | url : str 23 | The URL of the Git repository to clone. 24 | local_path : str 25 | The local directory where the repository will be cloned. 26 | commit : str | None 27 | The specific commit hash to check out after cloning. 28 | branch : str | None 29 | The branch to clone. 30 | tag: str | None 31 | The tag to clone. 32 | subpath : str 33 | The subpath to clone from the repository (default: ``"/"``). 34 | blob: bool 35 | Whether the repository is a blob (default: ``False``). 36 | include_submodules: bool 37 | Whether to clone submodules (default: ``False``). 38 | 39 | """ 40 | 41 | url: str 42 | local_path: str 43 | commit: str | None = None 44 | branch: str | None = None 45 | tag: str | None = None 46 | subpath: str = "/" 47 | blob: bool = False 48 | include_submodules: bool = False 49 | 50 | 51 | class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes 52 | """Pydantic model to store the parsed details of the repository or file path. 53 | 54 | Attributes 55 | ---------- 56 | user_name : str | None 57 | The username or owner of the repository. 58 | repo_name : str | None 59 | The name of the repository. 60 | local_path : Path 61 | The local path to the repository or file. 62 | url : str | None 63 | The URL of the repository. 64 | slug : str 65 | The slug of the repository. 66 | id : str 67 | The ID of the repository. 68 | subpath : str 69 | The subpath to the repository or file (default: ``"/"``). 70 | type : str | None 71 | The type of the repository or file. 72 | branch : str | None 73 | The branch of the repository. 74 | commit : str | None 75 | The commit of the repository. 76 | tag: str | None 77 | The tag of the repository. 78 | max_file_size : int 79 | The maximum file size to ingest (default: 10 MB). 80 | ignore_patterns : set[str] 81 | The patterns to ignore (default: ``set()``). 82 | include_patterns : set[str] | None 83 | The patterns to include. 84 | include_submodules : bool 85 | Whether to include all Git submodules within the repository. (default: ``False``) 86 | 87 | """ 88 | 89 | user_name: str | None = None 90 | repo_name: str | None = None 91 | local_path: Path 92 | url: str | None = None 93 | slug: str 94 | id: str 95 | subpath: str = "/" 96 | type: str | None = None 97 | branch: str | None = None 98 | commit: str | None = None 99 | tag: str | None = None 100 | max_file_size: int = Field(default=MAX_FILE_SIZE) 101 | ignore_patterns: set[str] = set() # TODO: ignore_patterns and include_patterns have the same type 102 | include_patterns: set[str] | None = None 103 | include_submodules: bool = False 104 | 105 | def extract_clone_config(self) -> CloneConfig: 106 | """Extract the relevant fields for the CloneConfig object. 107 | 108 | Returns 109 | ------- 110 | CloneConfig 111 | A CloneConfig object containing the relevant fields. 112 | 113 | Raises 114 | ------ 115 | ValueError 116 | If the ``url`` parameter is not provided. 117 | 118 | """ 119 | if not self.url: 120 | msg = "The 'url' parameter is required." 121 | raise ValueError(msg) 122 | 123 | return CloneConfig( 124 | url=self.url, 125 | local_path=str(self.local_path), 126 | commit=self.commit, 127 | branch=self.branch, 128 | tag=self.tag, 129 | subpath=self.subpath, 130 | blob=self.type == "blob", 131 | include_submodules=self.include_submodules, 132 | ) 133 | 134 | def ensure_url(self) -> None: 135 | """Raise if the parsed query has no URL (invalid user input). 136 | 137 | Raises 138 | ------ 139 | ValueError 140 | If the parsed query has no URL (invalid user input). 141 | 142 | """ 143 | if not self.url: 144 | msg = "The 'url' parameter is required." 145 | raise ValueError(msg) 146 | -------------------------------------------------------------------------------- /src/gitingest/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility functions for the gitingest package.""" 2 | -------------------------------------------------------------------------------- /src/gitingest/utils/auth.py: -------------------------------------------------------------------------------- 1 | """Utilities for handling authentication.""" 2 | 3 | from __future__ import annotations 4 | 5 | import os 6 | 7 | from gitingest.utils.git_utils import validate_github_token 8 | 9 | 10 | def resolve_token(token: str | None) -> str | None: 11 | """Resolve the token to use for the query. 12 | 13 | Parameters 14 | ---------- 15 | token : str | None 16 | GitHub personal access token (PAT) for accessing private repositories. 17 | 18 | Returns 19 | ------- 20 | str | None 21 | The resolved token. 22 | 23 | """ 24 | token = token or os.getenv("GITHUB_TOKEN") 25 | if token: 26 | validate_github_token(token) 27 | return token 28 | -------------------------------------------------------------------------------- /src/gitingest/utils/compat_func.py: -------------------------------------------------------------------------------- 1 | """Compatibility functions for Python 3.8.""" 2 | 3 | import os 4 | from pathlib import Path 5 | 6 | 7 | def readlink(path: Path) -> Path: 8 | """Read the target of a symlink. 9 | 10 | Compatible with Python 3.8. 11 | 12 | Parameters 13 | ---------- 14 | path : Path 15 | Path to the symlink. 16 | 17 | Returns 18 | ------- 19 | Path 20 | The target of the symlink. 21 | 22 | """ 23 | return Path(os.readlink(path)) 24 | 25 | 26 | def removesuffix(s: str, suffix: str) -> str: 27 | """Remove a suffix from a string. 28 | 29 | Compatible with Python 3.8. 30 | 31 | Parameters 32 | ---------- 33 | s : str 34 | String to remove suffix from. 35 | suffix : str 36 | Suffix to remove. 37 | 38 | Returns 39 | ------- 40 | str 41 | String with suffix removed. 42 | 43 | """ 44 | return s[: -len(suffix)] if s.endswith(suffix) else s 45 | -------------------------------------------------------------------------------- /src/gitingest/utils/compat_typing.py: -------------------------------------------------------------------------------- 1 | """Compatibility layer for typing.""" 2 | 3 | try: 4 | from typing import ParamSpec, TypeAlias # type: ignore[attr-defined] # Py ≥ 3.10 5 | except ImportError: 6 | from typing_extensions import ParamSpec, TypeAlias # type: ignore[attr-defined] # Py 3.8 / 3.9 7 | 8 | try: 9 | from typing import Annotated # type: ignore[attr-defined] # Py ≥ 3.9 10 | except ImportError: 11 | from typing_extensions import Annotated # type: ignore[attr-defined] # Py 3.8 12 | 13 | __all__ = ["Annotated", "ParamSpec", "TypeAlias"] 14 | -------------------------------------------------------------------------------- /src/gitingest/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | """Custom exceptions for the Gitingest package.""" 2 | 3 | 4 | class InvalidPatternError(ValueError): 5 | """Exception raised when a pattern contains invalid characters. 6 | 7 | This exception is used to signal that a pattern provided for some operation 8 | contains characters that are not allowed. The valid characters for the pattern 9 | include alphanumeric characters, dash (-), underscore (_), dot (.), forward slash (/), 10 | plus (+), and asterisk (*). 11 | 12 | Parameters 13 | ---------- 14 | pattern : str 15 | The invalid pattern that caused the error. 16 | 17 | """ 18 | 19 | def __init__(self, pattern: str) -> None: 20 | super().__init__( 21 | f"Pattern '{pattern}' contains invalid characters. Only alphanumeric characters, dash (-), " 22 | "underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed.", 23 | ) 24 | 25 | 26 | class AsyncTimeoutError(Exception): 27 | """Exception raised when an async operation exceeds its timeout limit. 28 | 29 | This exception is used by the ``async_timeout`` decorator to signal that the wrapped 30 | asynchronous function has exceeded the specified time limit for execution. 31 | """ 32 | 33 | 34 | class InvalidNotebookError(Exception): 35 | """Exception raised when a Jupyter notebook is invalid or cannot be processed.""" 36 | 37 | def __init__(self, message: str) -> None: 38 | super().__init__(message) 39 | 40 | 41 | class InvalidGitHubTokenError(ValueError): 42 | """Exception raised when a GitHub Personal Access Token is malformed.""" 43 | 44 | def __init__(self) -> None: 45 | msg = ( 46 | "Invalid GitHub token format. To generate a token, go to " 47 | "https://github.com/settings/tokens/new?description=gitingest&scopes=repo." 48 | ) 49 | super().__init__(msg) 50 | -------------------------------------------------------------------------------- /src/gitingest/utils/file_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for working with files and directories.""" 2 | 3 | from __future__ import annotations 4 | 5 | import locale 6 | import platform 7 | from typing import TYPE_CHECKING 8 | 9 | if TYPE_CHECKING: 10 | from pathlib import Path 11 | 12 | try: 13 | locale.setlocale(locale.LC_ALL, "") 14 | except locale.Error: 15 | locale.setlocale(locale.LC_ALL, "C") 16 | 17 | _CHUNK_SIZE = 1024 # bytes 18 | 19 | 20 | def _get_preferred_encodings() -> list[str]: 21 | """Get list of encodings to try, prioritized for the current platform. 22 | 23 | Returns 24 | ------- 25 | list[str] 26 | List of encoding names to try in priority order, starting with the 27 | platform's default encoding followed by common fallback encodings. 28 | 29 | """ 30 | encodings = [locale.getpreferredencoding(), "utf-8", "utf-16", "utf-16le", "utf-8-sig", "latin"] 31 | if platform.system() == "Windows": 32 | encodings += ["cp1252", "iso-8859-1"] 33 | return list(dict.fromkeys(encodings)) 34 | 35 | 36 | def _read_chunk(path: Path) -> bytes | None: 37 | """Attempt to read the first *size* bytes of *path* in binary mode. 38 | 39 | Parameters 40 | ---------- 41 | path : Path 42 | The path to the file to read. 43 | 44 | Returns 45 | ------- 46 | bytes | None 47 | The first ``_CHUNK_SIZE`` bytes of ``path``, or ``None`` on any ``OSError``. 48 | 49 | """ 50 | try: 51 | with path.open("rb") as fp: 52 | return fp.read(_CHUNK_SIZE) 53 | except OSError: 54 | return None 55 | 56 | 57 | def _decodes(chunk: bytes, encoding: str) -> bool: 58 | """Return ``True`` if ``chunk`` decodes cleanly with ``encoding``. 59 | 60 | Parameters 61 | ---------- 62 | chunk : bytes 63 | The chunk of bytes to decode. 64 | encoding : str 65 | The encoding to use to decode the chunk. 66 | 67 | Returns 68 | ------- 69 | bool 70 | ``True`` if the chunk decodes cleanly with the encoding, ``False`` otherwise. 71 | 72 | """ 73 | try: 74 | chunk.decode(encoding) 75 | except UnicodeDecodeError: 76 | return False 77 | return True 78 | -------------------------------------------------------------------------------- /src/gitingest/utils/ignore_patterns.py: -------------------------------------------------------------------------------- 1 | """Default ignore patterns for Gitingest.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | 7 | DEFAULT_IGNORE_PATTERNS: set[str] = { 8 | # Python 9 | "*.pyc", 10 | "*.pyo", 11 | "*.pyd", 12 | "__pycache__", 13 | ".pytest_cache", 14 | ".coverage", 15 | ".tox", 16 | ".nox", 17 | ".mypy_cache", 18 | ".ruff_cache", 19 | ".hypothesis", 20 | "poetry.lock", 21 | "Pipfile.lock", 22 | # JavaScript/FileSystemNode 23 | "node_modules", 24 | "bower_components", 25 | "package-lock.json", 26 | "yarn.lock", 27 | ".npm", 28 | ".yarn", 29 | ".pnpm-store", 30 | "bun.lock", 31 | "bun.lockb", 32 | # Java 33 | "*.class", 34 | "*.jar", 35 | "*.war", 36 | "*.ear", 37 | "*.nar", 38 | ".gradle/", 39 | "build/", 40 | ".settings/", 41 | ".classpath", 42 | "gradle-app.setting", 43 | "*.gradle", 44 | # IDEs and editors / Java 45 | ".project", 46 | # C/C++ 47 | "*.o", 48 | "*.obj", 49 | "*.dll", 50 | "*.dylib", 51 | "*.exe", 52 | "*.lib", 53 | "*.out", 54 | "*.a", 55 | "*.pdb", 56 | # Binary 57 | "*.bin", 58 | # Swift/Xcode 59 | ".build/", 60 | "*.xcodeproj/", 61 | "*.xcworkspace/", 62 | "*.pbxuser", 63 | "*.mode1v3", 64 | "*.mode2v3", 65 | "*.perspectivev3", 66 | "*.xcuserstate", 67 | "xcuserdata/", 68 | ".swiftpm/", 69 | # Ruby 70 | "*.gem", 71 | ".bundle/", 72 | "vendor/bundle", 73 | "Gemfile.lock", 74 | ".ruby-version", 75 | ".ruby-gemset", 76 | ".rvmrc", 77 | # Rust 78 | "Cargo.lock", 79 | "**/*.rs.bk", 80 | # Java / Rust 81 | "target/", 82 | # Go 83 | "pkg/", 84 | # .NET/C# 85 | "obj/", 86 | "*.suo", 87 | "*.user", 88 | "*.userosscache", 89 | "*.sln.docstates", 90 | "*.nupkg", 91 | # Go / .NET / C# 92 | "bin/", 93 | # Version control 94 | ".git", 95 | ".svn", 96 | ".hg", 97 | ".gitignore", 98 | ".gitattributes", 99 | ".gitmodules", 100 | # Images and media 101 | "*.svg", 102 | "*.png", 103 | "*.jpg", 104 | "*.jpeg", 105 | "*.gif", 106 | "*.ico", 107 | "*.pdf", 108 | "*.mov", 109 | "*.mp4", 110 | "*.mp3", 111 | "*.wav", 112 | # Virtual environments 113 | "venv", 114 | ".venv", 115 | "env", 116 | ".env", 117 | "virtualenv", 118 | # IDEs and editors 119 | ".idea", 120 | ".vscode", 121 | ".vs", 122 | "*.swo", 123 | "*.swn", 124 | ".settings", 125 | "*.sublime-*", 126 | # Temporary and cache files 127 | "*.log", 128 | "*.bak", 129 | "*.swp", 130 | "*.tmp", 131 | "*.temp", 132 | ".cache", 133 | ".sass-cache", 134 | ".eslintcache", 135 | ".DS_Store", 136 | "Thumbs.db", 137 | "desktop.ini", 138 | # Build directories and artifacts 139 | "build", 140 | "dist", 141 | "target", 142 | "out", 143 | "*.egg-info", 144 | "*.egg", 145 | "*.whl", 146 | "*.so", 147 | # Documentation 148 | "site-packages", 149 | ".docusaurus", 150 | ".next", 151 | ".nuxt", 152 | # Database 153 | "*.db", 154 | "*.sqlite", 155 | "*.sqlite3", 156 | # Other common patterns 157 | ## Minified files 158 | "*.min.js", 159 | "*.min.css", 160 | ## Source maps 161 | "*.map", 162 | ## Terraform 163 | "*.tfstate*", 164 | ## Dependencies in various languages 165 | "vendor/", 166 | # Gitingest 167 | "digest.txt", 168 | } 169 | 170 | 171 | def load_ignore_patterns(root: Path, filename: str) -> set[str]: 172 | """Load ignore patterns from ``filename`` found under ``root``. 173 | 174 | The loader walks the directory tree, looks for the supplied ``filename``, 175 | and returns a unified set of patterns. It implements the same parsing rules 176 | we use for ``.gitignore`` and ``.gitingestignore`` (git-wildmatch syntax with 177 | support for negation and root-relative paths). 178 | 179 | Parameters 180 | ---------- 181 | root : Path 182 | Directory to walk. 183 | filename : str 184 | The filename to look for in each directory. 185 | 186 | Returns 187 | ------- 188 | set[str] 189 | A set of ignore patterns extracted from the ``filename`` file found under the ``root`` directory. 190 | 191 | """ 192 | patterns: set[str] = set() 193 | 194 | for ignore_file in root.rglob(filename): 195 | if ignore_file.is_file(): 196 | patterns.update(_parse_ignore_file(ignore_file, root)) 197 | return patterns 198 | 199 | 200 | def _parse_ignore_file(ignore_file: Path, root: Path) -> set[str]: 201 | """Parse an ignore file and return a set of ignore patterns. 202 | 203 | Parameters 204 | ---------- 205 | ignore_file : Path 206 | The path to the ignore file. 207 | root : Path 208 | The root directory of the repository. 209 | 210 | Returns 211 | ------- 212 | set[str] 213 | A set of ignore patterns. 214 | 215 | """ 216 | patterns: set[str] = set() 217 | 218 | # Path of the ignore file relative to the repository root 219 | rel_dir = ignore_file.parent.relative_to(root) 220 | base_dir = Path() if rel_dir == Path() else rel_dir 221 | 222 | with ignore_file.open(encoding="utf-8") as fh: 223 | for raw in fh: 224 | line = raw.strip() 225 | if not line or line.startswith("#"): # comments / blank lines 226 | continue 227 | 228 | # Handle negation ("!foobar") 229 | negated = line.startswith("!") 230 | if negated: 231 | line = line[1:] 232 | 233 | # Handle leading slash ("/foobar") 234 | if line.startswith("/"): 235 | line = line.lstrip("/") 236 | 237 | pattern_body = (base_dir / line).as_posix() 238 | patterns.add(f"!{pattern_body}" if negated else pattern_body) 239 | 240 | return patterns 241 | -------------------------------------------------------------------------------- /src/gitingest/utils/ingestion_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for the ingestion process.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | from pathspec import PathSpec 8 | 9 | if TYPE_CHECKING: 10 | from pathlib import Path 11 | 12 | 13 | def _should_include(path: Path, base_path: Path, include_patterns: set[str]) -> bool: 14 | """Return ``True`` if ``path`` matches any of ``include_patterns``. 15 | 16 | Parameters 17 | ---------- 18 | path : Path 19 | The absolute path of the file or directory to check. 20 | 21 | base_path : Path 22 | The base directory from which the relative path is calculated. 23 | 24 | include_patterns : set[str] 25 | A set of patterns to check against the relative path. 26 | 27 | Returns 28 | ------- 29 | bool 30 | ``True`` if the path matches any of the include patterns, ``False`` otherwise. 31 | 32 | """ 33 | rel_path = _relative_or_none(path, base_path) 34 | if rel_path is None: # outside repo → do *not* include 35 | return False 36 | if path.is_dir(): # keep directories so children are visited 37 | return True 38 | 39 | spec = PathSpec.from_lines("gitwildmatch", include_patterns) 40 | return spec.match_file(str(rel_path)) 41 | 42 | 43 | def _should_exclude(path: Path, base_path: Path, ignore_patterns: set[str]) -> bool: 44 | """Return ``True`` if ``path`` matches any of ``ignore_patterns``. 45 | 46 | Parameters 47 | ---------- 48 | path : Path 49 | The absolute path of the file or directory to check. 50 | base_path : Path 51 | The base directory from which the relative path is calculated. 52 | ignore_patterns : set[str] 53 | A set of patterns to check against the relative path. 54 | 55 | Returns 56 | ------- 57 | bool 58 | ``True`` if the path matches any of the ignore patterns, ``False`` otherwise. 59 | 60 | """ 61 | rel_path = _relative_or_none(path, base_path) 62 | if rel_path is None: # outside repo → already “excluded” 63 | return True 64 | 65 | spec = PathSpec.from_lines("gitwildmatch", ignore_patterns) 66 | return spec.match_file(str(rel_path)) 67 | 68 | 69 | def _relative_or_none(path: Path, base: Path) -> Path | None: 70 | """Return *path* relative to *base* or ``None`` if *path* is outside *base*. 71 | 72 | Parameters 73 | ---------- 74 | path : Path 75 | The absolute path of the file or directory to check. 76 | base : Path 77 | The base directory from which the relative path is calculated. 78 | 79 | Returns 80 | ------- 81 | Path | None 82 | The relative path of ``path`` to ``base``, or ``None`` if ``path`` is outside ``base``. 83 | 84 | """ 85 | try: 86 | return path.relative_to(base) 87 | except ValueError: # path is not a sub-path of base 88 | return None 89 | -------------------------------------------------------------------------------- /src/gitingest/utils/notebook.py: -------------------------------------------------------------------------------- 1 | """Utilities for processing Jupyter notebooks.""" 2 | 3 | from __future__ import annotations 4 | 5 | import json 6 | import warnings 7 | from itertools import chain 8 | from typing import TYPE_CHECKING, Any 9 | 10 | from gitingest.utils.exceptions import InvalidNotebookError 11 | 12 | if TYPE_CHECKING: 13 | from pathlib import Path 14 | 15 | 16 | def process_notebook(file: Path, *, include_output: bool = True) -> str: 17 | """Process a Jupyter notebook file and return an executable Python script as a string. 18 | 19 | Parameters 20 | ---------- 21 | file : Path 22 | The path to the Jupyter notebook file. 23 | include_output : bool 24 | Whether to include cell outputs in the generated script (default: ``True``). 25 | 26 | Returns 27 | ------- 28 | str 29 | The executable Python script as a string. 30 | 31 | Raises 32 | ------ 33 | InvalidNotebookError 34 | If the notebook file is invalid or cannot be processed. 35 | 36 | """ 37 | try: 38 | with file.open(encoding="utf-8") as f: 39 | notebook: dict[str, Any] = json.load(f) 40 | except json.JSONDecodeError as exc: 41 | msg = f"Invalid JSON in notebook: {file}" 42 | raise InvalidNotebookError(msg) from exc 43 | 44 | # Check if the notebook contains worksheets 45 | worksheets = notebook.get("worksheets") 46 | if worksheets: 47 | warnings.warn( 48 | "Worksheets are deprecated as of IPEP-17. Consider updating the notebook. " 49 | "(See: https://github.com/jupyter/nbformat and " 50 | "https://github.com/ipython/ipython/wiki/IPEP-17:-Notebook-Format-4#remove-multiple-worksheets " 51 | "for more information.)", 52 | DeprecationWarning, 53 | stacklevel=2, 54 | ) 55 | 56 | if len(worksheets) > 1: 57 | warnings.warn( 58 | "Multiple worksheets detected. Combining all worksheets into a single script.", 59 | UserWarning, 60 | stacklevel=2, 61 | ) 62 | 63 | cells = list(chain.from_iterable(ws["cells"] for ws in worksheets)) 64 | 65 | else: 66 | cells = notebook["cells"] 67 | 68 | result = ["# Jupyter notebook converted to Python script."] 69 | 70 | for cell in cells: 71 | cell_str = _process_cell(cell, include_output=include_output) 72 | if cell_str: 73 | result.append(cell_str) 74 | 75 | return "\n\n".join(result) + "\n" 76 | 77 | 78 | def _process_cell(cell: dict[str, Any], *, include_output: bool) -> str | None: 79 | """Process a Jupyter notebook cell and return the cell content as a string. 80 | 81 | Parameters 82 | ---------- 83 | cell : dict[str, Any] 84 | The cell dictionary from a Jupyter notebook. 85 | include_output : bool 86 | Whether to include cell outputs in the generated script. 87 | 88 | Returns 89 | ------- 90 | str | None 91 | The cell content as a string, or ``None`` if the cell is empty. 92 | 93 | Raises 94 | ------ 95 | ValueError 96 | If an unexpected cell type is encountered. 97 | 98 | """ 99 | cell_type = cell["cell_type"] 100 | 101 | # Validate cell type and handle unexpected types 102 | if cell_type not in ("markdown", "code", "raw"): 103 | msg = f"Unknown cell type: {cell_type}" 104 | raise ValueError(msg) 105 | 106 | cell_str = "".join(cell["source"]) 107 | 108 | # Skip empty cells 109 | if not cell_str: 110 | return None 111 | 112 | # Convert Markdown and raw cells to multi-line comments 113 | if cell_type in ("markdown", "raw"): 114 | return f'"""\n{cell_str}\n"""' 115 | 116 | # Add cell output as comments 117 | outputs = cell.get("outputs") 118 | if include_output and outputs: 119 | # Include cell outputs as comments 120 | raw_lines: list[str] = [] 121 | for output in outputs: 122 | raw_lines += _extract_output(output) 123 | 124 | cell_str += "\n# Output:\n# " + "\n# ".join(raw_lines) 125 | 126 | return cell_str 127 | 128 | 129 | def _extract_output(output: dict[str, Any]) -> list[str]: 130 | """Extract the output from a Jupyter notebook cell. 131 | 132 | Parameters 133 | ---------- 134 | output : dict[str, Any] 135 | The output dictionary from a Jupyter notebook cell. 136 | 137 | Returns 138 | ------- 139 | list[str] 140 | The output as a list of strings. 141 | 142 | Raises 143 | ------ 144 | ValueError 145 | If an unknown output type is encountered. 146 | 147 | """ 148 | output_type = output["output_type"] 149 | 150 | if output_type == "stream": 151 | return output["text"] 152 | 153 | if output_type in ("execute_result", "display_data"): 154 | return output["data"]["text/plain"] 155 | 156 | if output_type == "error": 157 | return [f"Error: {output['ename']}: {output['evalue']}"] 158 | 159 | msg = f"Unknown output type: {output_type}" 160 | raise ValueError(msg) 161 | -------------------------------------------------------------------------------- /src/gitingest/utils/os_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for working with the operating system.""" 2 | 3 | from pathlib import Path 4 | 5 | 6 | async def ensure_directory(path: Path) -> None: 7 | """Ensure the directory exists, creating it if necessary. 8 | 9 | Parameters 10 | ---------- 11 | path : Path 12 | The path to ensure exists. 13 | 14 | Raises 15 | ------ 16 | OSError 17 | If the directory cannot be created. 18 | 19 | """ 20 | try: 21 | path.mkdir(parents=True, exist_ok=True) 22 | except OSError as exc: 23 | msg = f"Failed to create directory {path}: {exc}" 24 | raise OSError(msg) from exc 25 | -------------------------------------------------------------------------------- /src/gitingest/utils/path_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for working with file paths.""" 2 | 3 | import platform 4 | from pathlib import Path 5 | 6 | 7 | def _is_safe_symlink(symlink_path: Path, base_path: Path) -> bool: 8 | """Return ``True`` if ``symlink_path`` resolves inside ``base_path``. 9 | 10 | Parameters 11 | ---------- 12 | symlink_path : Path 13 | Symlink whose target should be validated. 14 | base_path : Path 15 | Directory that the symlink target must remain within. 16 | 17 | Returns 18 | ------- 19 | bool 20 | Whether the symlink is “safe” (i.e., does not escape ``base_path``). 21 | 22 | """ 23 | # On Windows a non-symlink is immediately unsafe 24 | if platform.system() == "Windows" and not symlink_path.is_symlink(): 25 | return False 26 | 27 | try: 28 | target_path = symlink_path.resolve() 29 | base_resolved = base_path.resolve() 30 | except (OSError, ValueError): 31 | # Any resolution error → treat as unsafe 32 | return False 33 | 34 | return base_resolved in target_path.parents or target_path == base_resolved 35 | -------------------------------------------------------------------------------- /src/gitingest/utils/query_parser_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for parsing and validating query parameters.""" 2 | 3 | from __future__ import annotations 4 | 5 | import string 6 | 7 | HEX_DIGITS: set[str] = set(string.hexdigits) 8 | 9 | 10 | KNOWN_GIT_HOSTS: list[str] = [ 11 | "github.com", 12 | "gitlab.com", 13 | "bitbucket.org", 14 | "gitea.com", 15 | "codeberg.org", 16 | "gist.github.com", 17 | ] 18 | 19 | 20 | def _is_valid_git_commit_hash(commit: str) -> bool: 21 | """Validate if the provided string is a valid Git commit hash. 22 | 23 | This function checks if the commit hash is a 40-character string consisting only 24 | of hexadecimal digits, which is the standard format for Git commit hashes. 25 | 26 | Parameters 27 | ---------- 28 | commit : str 29 | The string to validate as a Git commit hash. 30 | 31 | Returns 32 | ------- 33 | bool 34 | ``True`` if the string is a valid 40-character Git commit hash, otherwise ``False``. 35 | 36 | """ 37 | sha_hex_length = 40 38 | return len(commit) == sha_hex_length and all(c in HEX_DIGITS for c in commit) 39 | 40 | 41 | def _is_valid_pattern(pattern: str) -> bool: 42 | """Validate if the given pattern contains only valid characters. 43 | 44 | This function checks if the pattern contains only alphanumeric characters or one 45 | of the following allowed characters: dash ('-'), underscore ('_'), dot ('.'), 46 | forward slash ('/'), plus ('+'), asterisk ('*'), or the at sign ('@'). 47 | 48 | Parameters 49 | ---------- 50 | pattern : str 51 | The pattern to validate. 52 | 53 | Returns 54 | ------- 55 | bool 56 | ``True`` if the pattern is valid, otherwise ``False``. 57 | 58 | """ 59 | return all(c.isalnum() or c in "-_./+*@" for c in pattern) 60 | 61 | 62 | def _validate_host(host: str) -> None: 63 | """Validate a hostname. 64 | 65 | The host is accepted if it is either present in the hard-coded ``KNOWN_GIT_HOSTS`` list or if it satisfies the 66 | simple heuristics in ``_looks_like_git_host``, which try to recognise common self-hosted Git services (e.g. GitLab 67 | instances on sub-domains such as 'gitlab.example.com' or 'git.example.com'). 68 | 69 | Parameters 70 | ---------- 71 | host : str 72 | Hostname (case-insensitive). 73 | 74 | Raises 75 | ------ 76 | ValueError 77 | If the host cannot be recognised as a probable Git hosting domain. 78 | 79 | """ 80 | host = host.lower() 81 | if host not in KNOWN_GIT_HOSTS and not _looks_like_git_host(host): 82 | msg = f"Unknown domain '{host}' in URL" 83 | raise ValueError(msg) 84 | 85 | 86 | def _looks_like_git_host(host: str) -> bool: 87 | """Check if the given host looks like a Git host. 88 | 89 | The current heuristic returns ``True`` when the host starts with ``git.`` (e.g. 'git.example.com'), starts with 90 | 'gitlab.' (e.g. 'gitlab.company.com'), or starts with 'github.' (e.g. 'github.company.com' for GitHub Enterprise). 91 | 92 | Parameters 93 | ---------- 94 | host : str 95 | Hostname (case-insensitive). 96 | 97 | Returns 98 | ------- 99 | bool 100 | ``True`` if the host looks like a Git host, otherwise ``False``. 101 | 102 | """ 103 | host = host.lower() 104 | return host.startswith(("git.", "gitlab.", "github.")) 105 | 106 | 107 | def _validate_url_scheme(scheme: str) -> None: 108 | """Validate the given scheme against the known schemes. 109 | 110 | Parameters 111 | ---------- 112 | scheme : str 113 | The scheme to validate. 114 | 115 | Raises 116 | ------ 117 | ValueError 118 | If the scheme is not 'http' or 'https'. 119 | 120 | """ 121 | scheme = scheme.lower() 122 | if scheme not in ("https", "http"): 123 | msg = f"Invalid URL scheme '{scheme}' in URL" 124 | raise ValueError(msg) 125 | 126 | 127 | def _get_user_and_repo_from_path(path: str) -> tuple[str, str]: 128 | """Extract the user and repository names from a given path. 129 | 130 | Parameters 131 | ---------- 132 | path : str 133 | The path to extract the user and repository names from. 134 | 135 | Returns 136 | ------- 137 | tuple[str, str] 138 | A tuple containing the user and repository names. 139 | 140 | Raises 141 | ------ 142 | ValueError 143 | If the path does not contain at least two parts. 144 | 145 | """ 146 | min_path_parts = 2 147 | path_parts = path.lower().strip("/").split("/") 148 | if len(path_parts) < min_path_parts: 149 | msg = f"Invalid repository URL '{path}'" 150 | raise ValueError(msg) 151 | return path_parts[0], path_parts[1] 152 | -------------------------------------------------------------------------------- /src/gitingest/utils/timeout_wrapper.py: -------------------------------------------------------------------------------- 1 | """Utility functions for the Gitingest package.""" 2 | 3 | import asyncio 4 | import functools 5 | from typing import Awaitable, Callable, TypeVar 6 | 7 | from gitingest.utils.compat_typing import ParamSpec 8 | from gitingest.utils.exceptions import AsyncTimeoutError 9 | 10 | T = TypeVar("T") 11 | P = ParamSpec("P") 12 | 13 | 14 | def async_timeout(seconds: int) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]: 15 | """Async Timeout decorator. 16 | 17 | This decorator wraps an asynchronous function and ensures it does not run for 18 | longer than the specified number of seconds. If the function execution exceeds 19 | this limit, it raises an ``AsyncTimeoutError``. 20 | 21 | Parameters 22 | ---------- 23 | seconds : int 24 | The maximum allowed time (in seconds) for the asynchronous function to complete. 25 | 26 | Returns 27 | ------- 28 | Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]] 29 | A decorator that, when applied to an async function, ensures the function 30 | completes within the specified time limit. If the function takes too long, 31 | an ``AsyncTimeoutError`` is raised. 32 | 33 | """ 34 | 35 | def decorator(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]: 36 | @functools.wraps(func) 37 | async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: 38 | try: 39 | return await asyncio.wait_for(func(*args, **kwargs), timeout=seconds) 40 | except asyncio.TimeoutError as exc: 41 | msg = f"Operation timed out after {seconds} seconds" 42 | raise AsyncTimeoutError(msg) from exc 43 | 44 | return wrapper 45 | 46 | return decorator 47 | -------------------------------------------------------------------------------- /src/server/__init__.py: -------------------------------------------------------------------------------- 1 | """Server module.""" 2 | -------------------------------------------------------------------------------- /src/server/form_types.py: -------------------------------------------------------------------------------- 1 | """Reusable form type aliases for FastAPI form parameters.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Optional 6 | 7 | from fastapi import Form 8 | 9 | from gitingest.utils.compat_typing import Annotated 10 | 11 | if TYPE_CHECKING: 12 | from gitingest.utils.compat_typing import TypeAlias 13 | 14 | StrForm: TypeAlias = Annotated[str, Form(...)] 15 | IntForm: TypeAlias = Annotated[int, Form(...)] 16 | OptStrForm: TypeAlias = Annotated[Optional[str], Form()] 17 | -------------------------------------------------------------------------------- /src/server/main.py: -------------------------------------------------------------------------------- 1 | """Main module for the FastAPI application.""" 2 | 3 | from __future__ import annotations 4 | 5 | import os 6 | import threading 7 | from pathlib import Path 8 | 9 | import sentry_sdk 10 | from dotenv import load_dotenv 11 | from fastapi import FastAPI, Request 12 | from fastapi.responses import FileResponse, HTMLResponse, JSONResponse 13 | from fastapi.staticfiles import StaticFiles 14 | from slowapi.errors import RateLimitExceeded 15 | from starlette.middleware.trustedhost import TrustedHostMiddleware 16 | 17 | from server.metrics_server import start_metrics_server 18 | from server.routers import dynamic, index, ingest 19 | from server.server_config import templates 20 | from server.server_utils import lifespan, limiter, rate_limit_exception_handler 21 | 22 | # Load environment variables from .env file 23 | load_dotenv() 24 | 25 | # Initialize Sentry SDK if enabled 26 | if os.getenv("GITINGEST_SENTRY_ENABLED") is not None: 27 | sentry_dsn = os.getenv("GITINGEST_SENTRY_DSN") 28 | 29 | # Only initialize Sentry if DSN is provided 30 | if sentry_dsn: 31 | # Configure Sentry options from environment variables 32 | traces_sample_rate = float(os.getenv("GITINGEST_SENTRY_TRACES_SAMPLE_RATE", "1.0")) 33 | profile_session_sample_rate = float(os.getenv("GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE", "1.0")) 34 | profile_lifecycle = os.getenv("GITINGEST_SENTRY_PROFILE_LIFECYCLE", "trace") 35 | send_default_pii = os.getenv("GITINGEST_SENTRY_SEND_DEFAULT_PII", "true").lower() == "true" 36 | sentry_environment = os.getenv("GITINGEST_SENTRY_ENVIRONMENT", "") 37 | 38 | sentry_sdk.init( 39 | dsn=sentry_dsn, 40 | # Add data like request headers and IP for users 41 | send_default_pii=send_default_pii, 42 | # Set traces_sample_rate to capture transactions for tracing 43 | traces_sample_rate=traces_sample_rate, 44 | # Set profile_session_sample_rate to profile sessions 45 | profile_session_sample_rate=profile_session_sample_rate, 46 | # Set profile_lifecycle to automatically run the profiler 47 | profile_lifecycle=profile_lifecycle, 48 | # Set environment name 49 | environment=sentry_environment, 50 | ) 51 | 52 | # Initialize the FastAPI application with lifespan 53 | app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None) 54 | app.state.limiter = limiter 55 | 56 | # Register the custom exception handler for rate limits 57 | app.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler) 58 | 59 | # Start metrics server in a separate thread if enabled 60 | if os.getenv("GITINGEST_METRICS_ENABLED") is not None: 61 | metrics_host = os.getenv("GITINGEST_METRICS_HOST", "127.0.0.1") 62 | metrics_port = int(os.getenv("GITINGEST_METRICS_PORT", "9090")) 63 | metrics_thread = threading.Thread( 64 | target=start_metrics_server, 65 | args=(metrics_host, metrics_port), 66 | daemon=True, 67 | ) 68 | metrics_thread.start() 69 | 70 | 71 | # Mount static files dynamically to serve CSS, JS, and other static assets 72 | static_dir = Path(__file__).parent.parent / "static" 73 | app.mount("/static", StaticFiles(directory=static_dir), name="static") 74 | 75 | 76 | # Fetch allowed hosts from the environment or use the default values 77 | allowed_hosts = os.getenv("ALLOWED_HOSTS") 78 | if allowed_hosts: 79 | allowed_hosts = allowed_hosts.split(",") 80 | else: 81 | # Define the default allowed hosts for the application 82 | default_allowed_hosts = ["gitingest.com", "*.gitingest.com", "localhost", "127.0.0.1"] 83 | allowed_hosts = default_allowed_hosts 84 | 85 | # Add middleware to enforce allowed hosts 86 | app.add_middleware(TrustedHostMiddleware, allowed_hosts=allowed_hosts) 87 | 88 | 89 | @app.get("/health") 90 | async def health_check() -> dict[str, str]: 91 | """Health check endpoint to verify that the server is running. 92 | 93 | **Returns** 94 | 95 | - **dict[str, str]**: A JSON object with a "status" key indicating the server's health status. 96 | 97 | """ 98 | return {"status": "healthy"} 99 | 100 | 101 | @app.head("/", include_in_schema=False) 102 | async def head_root() -> HTMLResponse: 103 | """Respond to HTTP HEAD requests for the root URL. 104 | 105 | **This endpoint mirrors the headers and status code of the index page** 106 | for HTTP HEAD requests, providing a lightweight way to check if the server 107 | is responding without downloading the full page content. 108 | 109 | **Returns** 110 | 111 | - **HTMLResponse**: An empty HTML response with appropriate headers 112 | 113 | """ 114 | return HTMLResponse(content=None, headers={"content-type": "text/html; charset=utf-8"}) 115 | 116 | 117 | @app.get("/robots.txt", include_in_schema=False) 118 | async def robots() -> FileResponse: 119 | """Serve the robots.txt file to guide search engine crawlers. 120 | 121 | **This endpoint serves the ``robots.txt`` file located in the static directory** 122 | to provide instructions to search engine crawlers about which parts of the site 123 | they should or should not index. 124 | 125 | **Returns** 126 | 127 | - **FileResponse**: The ``robots.txt`` file located in the static directory 128 | 129 | """ 130 | return FileResponse("static/robots.txt") 131 | 132 | 133 | @app.get("/llms.txt") 134 | async def llm_txt() -> FileResponse: 135 | """Serve the llm.txt file to provide information about the site to LLMs. 136 | 137 | **This endpoint serves the ``llms.txt`` file located in the static directory** 138 | to provide information about the site to Large Language Models (LLMs) 139 | and other AI systems that may be crawling the site. 140 | 141 | **Returns** 142 | 143 | - **FileResponse**: The ``llms.txt`` file located in the static directory 144 | 145 | """ 146 | return FileResponse("static/llms.txt") 147 | 148 | 149 | @app.get("/docs", response_class=HTMLResponse, include_in_schema=False) 150 | async def custom_swagger_ui(request: Request) -> HTMLResponse: 151 | """Serve custom Swagger UI documentation. 152 | 153 | **This endpoint serves a custom Swagger UI interface** 154 | for the API documentation, providing an interactive way to explore 155 | and test the available endpoints. 156 | 157 | **Parameters** 158 | 159 | - **request** (`Request`): The incoming HTTP request 160 | 161 | **Returns** 162 | 163 | - **HTMLResponse**: Custom Swagger UI documentation page 164 | 165 | """ 166 | return templates.TemplateResponse("swagger_ui.jinja", {"request": request}) 167 | 168 | 169 | @app.get("/api", include_in_schema=True) 170 | def openapi_json_get() -> JSONResponse: 171 | """Return the OpenAPI schema. 172 | 173 | **This endpoint returns the OpenAPI schema (openapi.json)** 174 | that describes the API structure, endpoints, and data models 175 | for documentation and client generation purposes. 176 | 177 | **Returns** 178 | 179 | - **JSONResponse**: The OpenAPI schema as JSON 180 | 181 | """ 182 | return JSONResponse(app.openapi()) 183 | 184 | 185 | @app.api_route("/api", methods=["POST", "PUT", "DELETE", "OPTIONS", "HEAD"], include_in_schema=False) 186 | @app.api_route("/api/", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD"], include_in_schema=False) 187 | def openapi_json() -> JSONResponse: 188 | """Return the OpenAPI schema for various HTTP methods. 189 | 190 | **This endpoint returns the OpenAPI schema (openapi.json)** 191 | for multiple HTTP methods, providing API documentation 192 | for clients that may use different request methods. 193 | 194 | **Returns** 195 | 196 | - **JSONResponse**: The OpenAPI schema as JSON 197 | 198 | """ 199 | return JSONResponse(app.openapi()) 200 | 201 | 202 | # Include routers for modular endpoints 203 | app.include_router(index) 204 | app.include_router(ingest) 205 | app.include_router(dynamic) 206 | -------------------------------------------------------------------------------- /src/server/metrics_server.py: -------------------------------------------------------------------------------- 1 | """Prometheus metrics server running on a separate port.""" 2 | 3 | import logging 4 | 5 | import uvicorn 6 | from fastapi import FastAPI 7 | from fastapi.responses import HTMLResponse 8 | from prometheus_client import REGISTRY, generate_latest 9 | 10 | # Create a logger for this module 11 | logger = logging.getLogger(__name__) 12 | 13 | # Create a separate FastAPI app for metrics 14 | metrics_app = FastAPI( 15 | title="Gitingest Metrics", 16 | description="Prometheus metrics for Gitingest", 17 | docs_url=None, 18 | redoc_url=None, 19 | ) 20 | 21 | 22 | @metrics_app.get("/metrics") 23 | async def metrics() -> HTMLResponse: 24 | """Serve Prometheus metrics without authentication. 25 | 26 | This endpoint is only accessible from the local network. 27 | 28 | Returns 29 | ------- 30 | HTMLResponse 31 | Prometheus metrics in text format 32 | 33 | """ 34 | return HTMLResponse( 35 | content=generate_latest(REGISTRY), 36 | status_code=200, 37 | media_type="text/plain", 38 | ) 39 | 40 | 41 | def start_metrics_server(host: str = "127.0.0.1", port: int = 9090) -> None: 42 | """Start the metrics server on a separate port. 43 | 44 | Parameters 45 | ---------- 46 | host : str 47 | The host to bind to (default: 127.0.0.1 for local network only) 48 | port : int 49 | The port to bind to (default: 9090) 50 | 51 | Returns 52 | ------- 53 | None 54 | 55 | """ 56 | logger.info("Starting metrics server on %s:%s", host, port) 57 | uvicorn.run(metrics_app, host=host, port=port) 58 | -------------------------------------------------------------------------------- /src/server/models.py: -------------------------------------------------------------------------------- 1 | """Pydantic models for the query form.""" 2 | 3 | from __future__ import annotations 4 | 5 | from enum import Enum 6 | from typing import Union 7 | 8 | from pydantic import BaseModel, Field, field_validator 9 | 10 | # needed for type checking (pydantic) 11 | from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import) 12 | 13 | 14 | class PatternType(str, Enum): 15 | """Enumeration for pattern types used in file filtering.""" 16 | 17 | INCLUDE = "include" 18 | EXCLUDE = "exclude" 19 | 20 | 21 | class IngestRequest(BaseModel): 22 | """Request model for the /api/ingest endpoint. 23 | 24 | Attributes 25 | ---------- 26 | input_text : str 27 | The Git repository URL or slug to ingest. 28 | max_file_size : int 29 | Maximum file size slider position (0-500) for filtering files. 30 | pattern_type : PatternType 31 | Type of pattern to use for file filtering (include or exclude). 32 | pattern : str 33 | Glob/regex pattern string for file filtering. 34 | token : str | None 35 | GitHub personal access token (PAT) for accessing private repositories. 36 | 37 | """ 38 | 39 | input_text: str = Field(..., description="Git repository URL or slug to ingest") 40 | max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)") 41 | pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering") 42 | pattern: str = Field(default="", description="Glob/regex pattern for file filtering") 43 | token: str | None = Field(default=None, description="GitHub PAT for private repositories") 44 | 45 | @field_validator("input_text") 46 | @classmethod 47 | def validate_input_text(cls, v: str) -> str: 48 | """Validate that input_text is not empty.""" 49 | if not v.strip(): 50 | err = "input_text cannot be empty" 51 | raise ValueError(err) 52 | return v.strip() 53 | 54 | @field_validator("pattern") 55 | @classmethod 56 | def validate_pattern(cls, v: str) -> str: 57 | """Validate pattern field.""" 58 | return v.strip() 59 | 60 | 61 | class IngestSuccessResponse(BaseModel): 62 | """Success response model for the /api/ingest endpoint. 63 | 64 | Attributes 65 | ---------- 66 | repo_url : str 67 | The original repository URL that was processed. 68 | short_repo_url : str 69 | Short form of repository URL (user/repo). 70 | summary : str 71 | Summary of the ingestion process including token estimates. 72 | ingest_id : str 73 | Ingestion id used to download full context. 74 | tree : str 75 | File tree structure of the repository. 76 | content : str 77 | Processed content from the repository files. 78 | default_max_file_size : int 79 | The file size slider position used. 80 | pattern_type : str 81 | The pattern type used for filtering. 82 | pattern : str 83 | The pattern used for filtering. 84 | 85 | """ 86 | 87 | repo_url: str = Field(..., description="Original repository URL") 88 | short_repo_url: str = Field(..., description="Short repository URL (user/repo)") 89 | summary: str = Field(..., description="Ingestion summary with token estimates") 90 | ingest_id: str = Field(..., description="Ingestion id used to download full context") 91 | tree: str = Field(..., description="File tree structure") 92 | content: str = Field(..., description="Processed file content") 93 | default_max_file_size: int = Field(..., description="File size slider position used") 94 | pattern_type: str = Field(..., description="Pattern type used") 95 | pattern: str = Field(..., description="Pattern used") 96 | 97 | 98 | class IngestErrorResponse(BaseModel): 99 | """Error response model for the /api/ingest endpoint. 100 | 101 | Attributes 102 | ---------- 103 | error : str 104 | Error message describing what went wrong. 105 | 106 | """ 107 | 108 | error: str = Field(..., description="Error message") 109 | 110 | 111 | # Union type for API responses 112 | IngestResponse = Union[IngestSuccessResponse, IngestErrorResponse] 113 | 114 | 115 | class QueryForm(BaseModel): 116 | """Form data for the query. 117 | 118 | Attributes 119 | ---------- 120 | input_text : str 121 | Text or URL supplied in the form. 122 | max_file_size : int 123 | The maximum allowed file size for the input, specified by the user. 124 | pattern_type : str 125 | The type of pattern used for the query (``include`` or ``exclude``). 126 | pattern : str 127 | Glob/regex pattern string. 128 | token : str | None 129 | GitHub personal access token (PAT) for accessing private repositories. 130 | 131 | """ 132 | 133 | input_text: str 134 | max_file_size: int 135 | pattern_type: str 136 | pattern: str 137 | token: str | None = None 138 | 139 | @classmethod 140 | def as_form( 141 | cls, 142 | input_text: StrForm, 143 | max_file_size: IntForm, 144 | pattern_type: StrForm, 145 | pattern: StrForm, 146 | token: OptStrForm, 147 | ) -> QueryForm: 148 | """Create a QueryForm from FastAPI form parameters. 149 | 150 | Parameters 151 | ---------- 152 | input_text : StrForm 153 | The input text provided by the user. 154 | max_file_size : IntForm 155 | The maximum allowed file size for the input. 156 | pattern_type : StrForm 157 | The type of pattern used for the query (``include`` or ``exclude``). 158 | pattern : StrForm 159 | Glob/regex pattern string. 160 | token : OptStrForm 161 | GitHub personal access token (PAT) for accessing private repositories. 162 | 163 | Returns 164 | ------- 165 | QueryForm 166 | The QueryForm instance. 167 | 168 | """ 169 | return cls( 170 | input_text=input_text, 171 | max_file_size=max_file_size, 172 | pattern_type=pattern_type, 173 | pattern=pattern, 174 | token=token, 175 | ) 176 | -------------------------------------------------------------------------------- /src/server/query_processor.py: -------------------------------------------------------------------------------- 1 | """Process a query by parsing input, cloning a repository, and generating a summary.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import cast 7 | 8 | from gitingest.clone import clone_repo 9 | from gitingest.ingestion import ingest_query 10 | from gitingest.query_parser import IngestionQuery, parse_query 11 | from gitingest.utils.git_utils import validate_github_token 12 | from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse 13 | from server.server_config import MAX_DISPLAY_SIZE 14 | from server.server_utils import Colors, log_slider_to_size 15 | 16 | 17 | async def process_query( 18 | input_text: str, 19 | slider_position: int, 20 | pattern_type: str = "exclude", 21 | pattern: str = "", 22 | token: str | None = None, 23 | ) -> IngestResponse: 24 | """Process a query by parsing input, cloning a repository, and generating a summary. 25 | 26 | Handle user input, process Git repository data, and prepare 27 | a response for rendering a template with the processed results or an error message. 28 | 29 | Parameters 30 | ---------- 31 | input_text : str 32 | Input text provided by the user, typically a Git repository URL or slug. 33 | slider_position : int 34 | Position of the slider, representing the maximum file size in the query. 35 | pattern_type : str 36 | Type of pattern to use (either "include" or "exclude") (default: ``"exclude"``). 37 | pattern : str 38 | Pattern to include or exclude in the query, depending on the pattern type. 39 | token : str | None 40 | GitHub personal access token (PAT) for accessing private repositories. 41 | 42 | Returns 43 | ------- 44 | IngestResponse 45 | A union type, corresponding to IngestErrorResponse or IngestSuccessResponse 46 | 47 | Raises 48 | ------ 49 | ValueError 50 | If an invalid pattern type is provided. 51 | 52 | """ 53 | if pattern_type == "include": 54 | include_patterns = pattern 55 | exclude_patterns = None 56 | elif pattern_type == "exclude": 57 | exclude_patterns = pattern 58 | include_patterns = None 59 | else: 60 | msg = f"Invalid pattern type: {pattern_type}" 61 | raise ValueError(msg) 62 | 63 | if token: 64 | validate_github_token(token) 65 | 66 | max_file_size = log_slider_to_size(slider_position) 67 | 68 | query: IngestionQuery | None = None 69 | short_repo_url = "" 70 | 71 | try: 72 | query = await parse_query( 73 | source=input_text, 74 | max_file_size=max_file_size, 75 | from_web=True, 76 | include_patterns=include_patterns, 77 | ignore_patterns=exclude_patterns, 78 | token=token, 79 | ) 80 | query.ensure_url() 81 | 82 | # Sets the "<user>/<repo>" for the page title 83 | short_repo_url = f"{query.user_name}/{query.repo_name}" 84 | 85 | clone_config = query.extract_clone_config() 86 | await clone_repo(clone_config, token=token) 87 | 88 | summary, tree, content = ingest_query(query) 89 | 90 | local_txt_file = Path(clone_config.local_path).with_suffix(".txt") 91 | 92 | with local_txt_file.open("w", encoding="utf-8") as f: 93 | f.write(tree + "\n" + content) 94 | 95 | except Exception as exc: 96 | if query and query.url: 97 | _print_error(query.url, exc, max_file_size, pattern_type, pattern) 98 | else: 99 | print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="") 100 | print(f"{Colors.RED}{exc}{Colors.END}") 101 | 102 | return IngestErrorResponse(error=str(exc)) 103 | 104 | if len(content) > MAX_DISPLAY_SIZE: 105 | content = ( 106 | f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, " 107 | "download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE] 108 | ) 109 | 110 | query.ensure_url() 111 | query.url = cast("str", query.url) 112 | 113 | _print_success( 114 | url=query.url, 115 | max_file_size=max_file_size, 116 | pattern_type=pattern_type, 117 | pattern=pattern, 118 | summary=summary, 119 | ) 120 | 121 | return IngestSuccessResponse( 122 | repo_url=input_text, 123 | short_repo_url=short_repo_url, 124 | summary=summary, 125 | ingest_id=query.id, 126 | tree=tree, 127 | content=content, 128 | default_max_file_size=slider_position, 129 | pattern_type=pattern_type, 130 | pattern=pattern, 131 | ) 132 | 133 | 134 | def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None: 135 | """Print a formatted summary of the query details for debugging. 136 | 137 | Parameters 138 | ---------- 139 | url : str 140 | The URL associated with the query. 141 | max_file_size : int 142 | The maximum file size allowed for the query, in bytes. 143 | pattern_type : str 144 | Specifies the type of pattern to use, either "include" or "exclude". 145 | pattern : str 146 | The actual pattern string to include or exclude in the query. 147 | 148 | """ 149 | default_max_file_kb = 50 150 | print(f"{Colors.WHITE}{url:<20}{Colors.END}", end="") 151 | if int(max_file_size / 1024) != default_max_file_kb: 152 | print( 153 | f" | {Colors.YELLOW}Size: {int(max_file_size / 1024)}kB{Colors.END}", 154 | end="", 155 | ) 156 | if pattern_type == "include" and pattern != "": 157 | print(f" | {Colors.YELLOW}Include {pattern}{Colors.END}", end="") 158 | elif pattern_type == "exclude" and pattern != "": 159 | print(f" | {Colors.YELLOW}Exclude {pattern}{Colors.END}", end="") 160 | 161 | 162 | def _print_error(url: str, exc: Exception, max_file_size: int, pattern_type: str, pattern: str) -> None: 163 | """Print a formatted error message for debugging. 164 | 165 | Parameters 166 | ---------- 167 | url : str 168 | The URL associated with the query that caused the error. 169 | exc : Exception 170 | The exception raised during the query or process. 171 | max_file_size : int 172 | The maximum file size allowed for the query, in bytes. 173 | pattern_type : str 174 | Specifies the type of pattern to use, either "include" or "exclude". 175 | pattern : str 176 | The actual pattern string to include or exclude in the query. 177 | 178 | """ 179 | print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="") 180 | _print_query(url, max_file_size, pattern_type, pattern) 181 | print(f" | {Colors.RED}{exc}{Colors.END}") 182 | 183 | 184 | def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str, summary: str) -> None: 185 | """Print a formatted success message for debugging. 186 | 187 | Parameters 188 | ---------- 189 | url : str 190 | The URL associated with the successful query. 191 | max_file_size : int 192 | The maximum file size allowed for the query, in bytes. 193 | pattern_type : str 194 | Specifies the type of pattern to use, either "include" or "exclude". 195 | pattern : str 196 | The actual pattern string to include or exclude in the query. 197 | summary : str 198 | A summary of the query result, including details like estimated tokens. 199 | 200 | """ 201 | estimated_tokens = summary[summary.index("Estimated tokens:") + len("Estimated ") :] 202 | print(f"{Colors.GREEN}INFO{Colors.END}: {Colors.GREEN}<- {Colors.END}", end="") 203 | _print_query(url, max_file_size, pattern_type, pattern) 204 | print(f" | {Colors.PURPLE}{estimated_tokens}{Colors.END}") 205 | -------------------------------------------------------------------------------- /src/server/routers/__init__.py: -------------------------------------------------------------------------------- 1 | """Module containing the routers for the FastAPI application.""" 2 | 3 | from server.routers.dynamic import router as dynamic 4 | from server.routers.index import router as index 5 | from server.routers.ingest import router as ingest 6 | 7 | __all__ = ["dynamic", "index", "ingest"] 8 | -------------------------------------------------------------------------------- /src/server/routers/dynamic.py: -------------------------------------------------------------------------------- 1 | """The dynamic router module defines handlers for dynamic path requests.""" 2 | 3 | from fastapi import APIRouter, Request 4 | from fastapi.responses import HTMLResponse 5 | 6 | from server.server_config import templates 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get("/{full_path:path}", include_in_schema=False) 12 | async def catch_all(request: Request, full_path: str) -> HTMLResponse: 13 | """Render a page with a Git URL based on the provided path. 14 | 15 | This endpoint catches all GET requests with a dynamic path, constructs a Git URL 16 | using the ``full_path`` parameter, and renders the ``git.jinja`` template with that URL. 17 | 18 | Parameters 19 | ---------- 20 | request : Request 21 | The incoming request object, which provides context for rendering the response. 22 | full_path : str 23 | The full path extracted from the URL, which is used to build the Git URL. 24 | 25 | Returns 26 | ------- 27 | HTMLResponse 28 | An HTML response containing the rendered template, with the Git URL 29 | and other default parameters such as file size. 30 | 31 | """ 32 | return templates.TemplateResponse( 33 | "git.jinja", 34 | { 35 | "request": request, 36 | "repo_url": full_path, 37 | "default_max_file_size": 243, 38 | }, 39 | ) 40 | -------------------------------------------------------------------------------- /src/server/routers/index.py: -------------------------------------------------------------------------------- 1 | """Module defining the FastAPI router for the home page of the application.""" 2 | 3 | from fastapi import APIRouter, Request 4 | from fastapi.responses import HTMLResponse 5 | 6 | from server.server_config import EXAMPLE_REPOS, templates 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get("/", response_class=HTMLResponse, include_in_schema=False) 12 | async def home(request: Request) -> HTMLResponse: 13 | """Render the home page with example repositories and default parameters. 14 | 15 | This endpoint serves the home page of the application, rendering the ``index.jinja`` template 16 | and providing it with a list of example repositories and default file size values. 17 | 18 | Parameters 19 | ---------- 20 | request : Request 21 | The incoming request object, which provides context for rendering the response. 22 | 23 | Returns 24 | ------- 25 | HTMLResponse 26 | An HTML response containing the rendered home page template, with example repositories 27 | and other default parameters such as file size. 28 | 29 | """ 30 | return templates.TemplateResponse( 31 | "index.jinja", 32 | { 33 | "request": request, 34 | "examples": EXAMPLE_REPOS, 35 | "default_max_file_size": 243, 36 | }, 37 | ) 38 | -------------------------------------------------------------------------------- /src/server/routers/ingest.py: -------------------------------------------------------------------------------- 1 | """Ingest endpoint for the API.""" 2 | 3 | from fastapi import APIRouter, HTTPException, Request, status 4 | from fastapi.responses import FileResponse, JSONResponse 5 | from prometheus_client import Counter 6 | 7 | from gitingest.config import TMP_BASE_PATH 8 | from server.models import IngestRequest 9 | from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion 10 | from server.server_config import MAX_DISPLAY_SIZE 11 | from server.server_utils import limiter 12 | 13 | ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"]) 14 | 15 | router = APIRouter() 16 | 17 | 18 | @router.post("/api/ingest", responses=COMMON_INGEST_RESPONSES) 19 | @limiter.limit("10/minute") 20 | async def api_ingest( 21 | request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument 22 | ingest_request: IngestRequest, 23 | ) -> JSONResponse: 24 | """Ingest a Git repository and return processed content. 25 | 26 | **This endpoint processes a Git repository by cloning it, analyzing its structure,** 27 | and returning a summary with the repository's content. The response includes 28 | file tree structure, processed content, and metadata about the ingestion. 29 | 30 | **Parameters** 31 | 32 | - **ingest_request** (`IngestRequest`): Pydantic model containing ingestion parameters 33 | 34 | **Returns** 35 | 36 | - **JSONResponse**: Success response with ingestion results or error response with appropriate HTTP status code 37 | 38 | """ 39 | response = await _perform_ingestion( 40 | input_text=ingest_request.input_text, 41 | max_file_size=ingest_request.max_file_size, 42 | pattern_type=ingest_request.pattern_type, 43 | pattern=ingest_request.pattern, 44 | token=ingest_request.token, 45 | ) 46 | # limit URL to 255 characters 47 | ingest_counter.labels(status=response.status_code, url=ingest_request.input_text[:255]).inc() 48 | return response 49 | 50 | 51 | @router.get("/api/{user}/{repository}", responses=COMMON_INGEST_RESPONSES) 52 | @limiter.limit("10/minute") 53 | async def api_ingest_get( 54 | request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument 55 | user: str, 56 | repository: str, 57 | max_file_size: int = MAX_DISPLAY_SIZE, 58 | pattern_type: str = "exclude", 59 | pattern: str = "", 60 | token: str = "", 61 | ) -> JSONResponse: 62 | """Ingest a GitHub repository via GET and return processed content. 63 | 64 | **This endpoint processes a GitHub repository by analyzing its structure and returning a summary** 65 | with the repository's content. The response includes file tree structure, processed content, and 66 | metadata about the ingestion. All ingestion parameters are optional and can be provided as query parameters. 67 | 68 | **Path Parameters** 69 | - **user** (`str`): GitHub username or organization 70 | - **repository** (`str`): GitHub repository name 71 | 72 | **Query Parameters** 73 | - **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB) 74 | - **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude") 75 | - **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "") 76 | - **token** (`str`, optional): GitHub personal access token for private repositories (default: "") 77 | 78 | **Returns** 79 | - **JSONResponse**: Success response with ingestion results or error response with appropriate HTTP status code 80 | """ 81 | response = await _perform_ingestion( 82 | input_text=f"{user}/{repository}", 83 | max_file_size=max_file_size, 84 | pattern_type=pattern_type, 85 | pattern=pattern, 86 | token=token or None, 87 | ) 88 | # limit URL to 255 characters 89 | ingest_counter.labels(status=response.status_code, url=f"{user}/{repository}"[:255]).inc() 90 | return response 91 | 92 | 93 | @router.get("/api/download/file/{ingest_id}", response_class=FileResponse) 94 | async def download_ingest(ingest_id: str) -> FileResponse: 95 | """Download the first text file produced for an ingest ID. 96 | 97 | **This endpoint retrieves the first ``*.txt`` file produced during the ingestion process** 98 | and returns it as a downloadable file. The file is streamed with media type ``text/plain`` 99 | and prompts the browser to download it. 100 | 101 | **Parameters** 102 | 103 | - **ingest_id** (`str`): Identifier that the ingest step emitted 104 | 105 | **Returns** 106 | 107 | - **FileResponse**: Streamed response with media type ``text/plain`` 108 | 109 | **Raises** 110 | 111 | - **HTTPException**: **404** - digest directory is missing or contains no ``*.txt`` file 112 | - **HTTPException**: **403** - the process lacks permission to read the directory or file 113 | 114 | """ 115 | directory = TMP_BASE_PATH / ingest_id 116 | 117 | if not directory.is_dir(): 118 | raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Digest {ingest_id!r} not found") 119 | 120 | try: 121 | first_txt_file = next(directory.glob("*.txt")) 122 | except StopIteration as exc: 123 | raise HTTPException( 124 | status_code=status.HTTP_404_NOT_FOUND, 125 | detail=f"No .txt file found for digest {ingest_id!r}", 126 | ) from exc 127 | 128 | try: 129 | return FileResponse(path=first_txt_file, media_type="text/plain", filename=first_txt_file.name) 130 | except PermissionError as exc: 131 | raise HTTPException( 132 | status_code=status.HTTP_403_FORBIDDEN, 133 | detail=f"Permission denied for {first_txt_file}", 134 | ) from exc 135 | -------------------------------------------------------------------------------- /src/server/routers_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for the ingest endpoints.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | from fastapi import status 8 | from fastapi.responses import JSONResponse 9 | 10 | from server.models import IngestErrorResponse, IngestSuccessResponse 11 | from server.query_processor import process_query 12 | 13 | COMMON_INGEST_RESPONSES: dict[int | str, dict[str, Any]] = { 14 | status.HTTP_200_OK: {"model": IngestSuccessResponse, "description": "Successful ingestion"}, 15 | status.HTTP_400_BAD_REQUEST: {"model": IngestErrorResponse, "description": "Bad request or processing error"}, 16 | status.HTTP_500_INTERNAL_SERVER_ERROR: {"model": IngestErrorResponse, "description": "Internal server error"}, 17 | } 18 | 19 | 20 | async def _perform_ingestion( 21 | input_text: str, 22 | max_file_size: int, 23 | pattern_type: str, 24 | pattern: str, 25 | token: str | None, 26 | ) -> JSONResponse: 27 | """Run ``process_query`` and wrap the result in a ``FastAPI`` ``JSONResponse``. 28 | 29 | Consolidates error handling shared by the ``POST`` and ``GET`` ingest endpoints. 30 | """ 31 | try: 32 | result = await process_query( 33 | input_text=input_text, 34 | slider_position=max_file_size, 35 | pattern_type=pattern_type, 36 | pattern=pattern, 37 | token=token, 38 | ) 39 | 40 | if isinstance(result, IngestErrorResponse): 41 | # Return structured error response with 400 status code 42 | return JSONResponse(status_code=status.HTTP_400_BAD_REQUEST, content=result.model_dump()) 43 | 44 | # Return structured success response with 200 status code 45 | return JSONResponse(status_code=status.HTTP_200_OK, content=result.model_dump()) 46 | 47 | except ValueError as ve: 48 | # Handle validation errors with 400 status code 49 | error_response = IngestErrorResponse(error=f"Validation error: {ve!s}") 50 | return JSONResponse(status_code=status.HTTP_400_BAD_REQUEST, content=error_response.model_dump()) 51 | 52 | except Exception as exc: 53 | # Handle unexpected errors with 500 status code 54 | error_response = IngestErrorResponse(error=f"Internal server error: {exc!s}") 55 | return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=error_response.model_dump()) 56 | -------------------------------------------------------------------------------- /src/server/server_config.py: -------------------------------------------------------------------------------- 1 | """Configuration for the server.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | 7 | from fastapi.templating import Jinja2Templates 8 | 9 | MAX_DISPLAY_SIZE: int = 300_000 10 | DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour) 11 | 12 | # Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js) 13 | MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 MB 14 | MAX_SLIDER_POSITION: int = 500 # Maximum slider position 15 | 16 | EXAMPLE_REPOS: list[dict[str, str]] = [ 17 | {"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"}, 18 | {"name": "FastAPI", "url": "https://github.com/tiangolo/fastapi"}, 19 | {"name": "Flask", "url": "https://github.com/pallets/flask"}, 20 | {"name": "Excalidraw", "url": "https://github.com/excalidraw/excalidraw"}, 21 | {"name": "ApiAnalytics", "url": "https://github.com/tom-draper/api-analytics"}, 22 | ] 23 | 24 | 25 | # Use absolute path to templates directory 26 | templates_dir = Path(__file__).parent / "templates" 27 | templates = Jinja2Templates(directory=templates_dir) 28 | -------------------------------------------------------------------------------- /src/server/server_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for the server.""" 2 | 3 | import asyncio 4 | import math 5 | import shutil 6 | import time 7 | from contextlib import asynccontextmanager, suppress 8 | from pathlib import Path 9 | from typing import AsyncGenerator 10 | 11 | from fastapi import FastAPI, Request 12 | from fastapi.responses import Response 13 | from slowapi import Limiter, _rate_limit_exceeded_handler 14 | from slowapi.errors import RateLimitExceeded 15 | from slowapi.util import get_remote_address 16 | 17 | from gitingest.config import TMP_BASE_PATH 18 | from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION 19 | 20 | # Initialize a rate limiter 21 | limiter = Limiter(key_func=get_remote_address) 22 | 23 | 24 | async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response: 25 | """Handle rate-limiting errors with a custom exception handler. 26 | 27 | Parameters 28 | ---------- 29 | request : Request 30 | The incoming HTTP request. 31 | exc : Exception 32 | The exception raised, expected to be RateLimitExceeded. 33 | 34 | Returns 35 | ------- 36 | Response 37 | A response indicating that the rate limit has been exceeded. 38 | 39 | Raises 40 | ------ 41 | exc 42 | If the exception is not a RateLimitExceeded error, it is re-raised. 43 | 44 | """ 45 | if isinstance(exc, RateLimitExceeded): 46 | # Delegate to the default rate limit handler 47 | return _rate_limit_exceeded_handler(request, exc) 48 | # Re-raise other exceptions 49 | raise exc 50 | 51 | 52 | @asynccontextmanager 53 | async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]: 54 | """Manage startup & graceful-shutdown tasks for the FastAPI app. 55 | 56 | Returns 57 | ------- 58 | AsyncGenerator[None, None] 59 | Yields control back to the FastAPI application while the background task runs. 60 | 61 | """ 62 | task = asyncio.create_task(_remove_old_repositories()) 63 | 64 | yield # app runs while the background task is alive 65 | 66 | task.cancel() # ask the worker to stop 67 | with suppress(asyncio.CancelledError): 68 | await task # swallow the cancellation signal 69 | 70 | 71 | async def _remove_old_repositories( 72 | base_path: Path = TMP_BASE_PATH, 73 | scan_interval: int = 60, 74 | delete_after: int = DELETE_REPO_AFTER, 75 | ) -> None: 76 | """Periodically delete old repositories/directories. 77 | 78 | Every ``scan_interval`` seconds the coroutine scans ``base_path`` and deletes directories older than 79 | ``delete_after`` seconds. The repository URL is extracted from the first ``.txt`` file in each directory 80 | and appended to ``history.txt``, assuming the filename format: "owner-repository.txt". Filesystem errors are 81 | logged and the loop continues. 82 | 83 | Parameters 84 | ---------- 85 | base_path : Path 86 | The path to the base directory where repositories are stored (default: ``TMP_BASE_PATH``). 87 | scan_interval : int 88 | The number of seconds between scans (default: 60). 89 | delete_after : int 90 | The number of seconds after which a repository is considered old and will be deleted 91 | (default: ``DELETE_REPO_AFTER``). 92 | 93 | """ 94 | while True: 95 | if not base_path.exists(): 96 | await asyncio.sleep(scan_interval) 97 | continue 98 | 99 | now = time.time() 100 | try: 101 | for folder in base_path.iterdir(): 102 | if now - folder.stat().st_ctime <= delete_after: # Not old enough 103 | continue 104 | 105 | await _process_folder(folder) 106 | 107 | except (OSError, PermissionError) as exc: 108 | print(f"Error in _remove_old_repositories: {exc}") 109 | 110 | await asyncio.sleep(scan_interval) 111 | 112 | 113 | async def _process_folder(folder: Path) -> None: 114 | """Append the repo URL (if discoverable) to ``history.txt`` and delete ``folder``. 115 | 116 | Parameters 117 | ---------- 118 | folder : Path 119 | The path to the folder to be processed. 120 | 121 | """ 122 | history_file = Path("history.txt") 123 | loop = asyncio.get_running_loop() 124 | 125 | try: 126 | first_txt_file = next(folder.glob("*.txt")) 127 | except StopIteration: # No .txt file found 128 | return 129 | 130 | # Append owner/repo to history.txt 131 | try: 132 | filename = first_txt_file.stem # "owner-repo" 133 | if "-" in filename: 134 | owner, repo = filename.split("-", 1) 135 | repo_url = f"{owner}/{repo}" 136 | await loop.run_in_executor(None, _append_line, history_file, repo_url) 137 | except (OSError, PermissionError) as exc: 138 | print(f"Error logging repository URL for {folder}: {exc}") 139 | 140 | # Delete the cloned repo 141 | try: 142 | await loop.run_in_executor(None, shutil.rmtree, folder) 143 | except PermissionError as exc: 144 | print(f"No permission to delete {folder}: {exc}") 145 | except OSError as exc: 146 | print(f"Could not delete {folder}: {exc}") 147 | 148 | 149 | def _append_line(path: Path, line: str) -> None: 150 | """Append a line to a file. 151 | 152 | Parameters 153 | ---------- 154 | path : Path 155 | The path to the file to append the line to. 156 | line : str 157 | The line to append to the file. 158 | 159 | """ 160 | with path.open("a", encoding="utf-8") as fp: 161 | fp.write(f"{line}\n") 162 | 163 | 164 | def log_slider_to_size(position: int) -> int: 165 | """Convert a slider position to a file size in bytes using a logarithmic scale. 166 | 167 | Parameters 168 | ---------- 169 | position : int 170 | Slider position ranging from 0 to 500. 171 | 172 | Returns 173 | ------- 174 | int 175 | File size in bytes corresponding to the slider position. 176 | 177 | """ 178 | maxv = math.log(MAX_FILE_SIZE_KB) 179 | return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024 180 | 181 | 182 | ## Color printing utility 183 | class Colors: 184 | """ANSI color codes.""" 185 | 186 | BLACK = "\033[0;30m" 187 | RED = "\033[0;31m" 188 | GREEN = "\033[0;32m" 189 | BROWN = "\033[0;33m" 190 | BLUE = "\033[0;34m" 191 | PURPLE = "\033[0;35m" 192 | CYAN = "\033[0;36m" 193 | LIGHT_GRAY = "\033[0;37m" 194 | DARK_GRAY = "\033[1;30m" 195 | LIGHT_RED = "\033[1;31m" 196 | LIGHT_GREEN = "\033[1;32m" 197 | YELLOW = "\033[1;33m" 198 | LIGHT_BLUE = "\033[1;34m" 199 | LIGHT_PURPLE = "\033[1;35m" 200 | LIGHT_CYAN = "\033[1;36m" 201 | WHITE = "\033[1;37m" 202 | BOLD = "\033[1m" 203 | FAINT = "\033[2m" 204 | ITALIC = "\033[3m" 205 | UNDERLINE = "\033[4m" 206 | BLINK = "\033[5m" 207 | NEGATIVE = "\033[7m" 208 | CROSSED = "\033[9m" 209 | END = "\033[0m" 210 | -------------------------------------------------------------------------------- /src/server/templates/base.jinja: -------------------------------------------------------------------------------- 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="UTF-8"> 5 | <meta name="viewport" content="width=device-width, initial-scale=1.0"> 6 | {# Favicons #} 7 | <link rel="icon" type="image/x-icon" href="/static/favicons/favicon.ico"> 8 | <link rel="icon" type="image/svg+xml" href="/static/favicons/favicon.svg"> 9 | <link rel="icon" 10 | type="image/png" 11 | href="/static/favicons/favicon-64.png" 12 | sizes="64x64"> 13 | <link rel="apple-touch-icon" 14 | type="image/png" 15 | href="/static/favicons/apple-touch-icon.png" 16 | sizes="180x180"> 17 | {# Search Engine Meta Tags #} 18 | <meta name="title" content="Gitingest"> 19 | <meta name="description" 20 | content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> 21 | <meta name="keywords" 22 | content="Gitingest, AI tools, LLM integration, Ingest, Digest, Context, Prompt, Git workflow, codebase extraction, Git repository, Git automation, Summarize, prompt-friendly"> 23 | <meta name="robots" content="index, follow"> 24 | {# Open Graph Meta Tags #} 25 | <meta property="og:title" content="Gitingest"> 26 | <meta property="og:description" 27 | content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> 28 | <meta property="og:type" content="website"> 29 | <meta property="og:url" content="{{ request.url }}"> 30 | <meta property="og:image" content="/static/og-image.png"> 31 | {# Web App Meta #} 32 | <meta name="apple-mobile-web-app-title" content="Gitingest"> 33 | <meta name="application-name" content="Gitingest"> 34 | <meta name="theme-color" content="#FCA847"> 35 | <meta name="mobile-web-app-capable" content="yes"> 36 | <meta name="apple-mobile-web-app-status-bar-style" content="default"> 37 | {# Twitter card #} 38 | <meta name="twitter:card" content="summary_large_image"> 39 | <meta name="twitter:title" content="Gitingest"> 40 | <meta name="twitter:description" 41 | content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> 42 | <meta name="twitter:image" content="/static/og-image.png"> 43 | {# Title #} 44 | <title> 45 | {% block title %} 46 | {% if short_repo_url %} 47 | Gitingest - {{ short_repo_url }} 48 | {% else %} 49 | Gitingest 50 | {% endif %} 51 | {% endblock %} 52 | </title> 53 | <script src="https://cdn.tailwindcss.com"></script> 54 | {% include 'components/tailwind_components.html' %} 55 | </head> 56 | <body class="bg-[#FFFDF8] min-h-screen flex flex-col"> 57 | {% include 'components/navbar.jinja' %} 58 | {# Main content wrapper #} 59 | <main class="flex-1 w-full"> 60 | <div class="max-w-4xl mx-auto px-4 py-8"> 61 | {% block content %}{% endblock %} 62 | </div> 63 | </main> 64 | {# Footer #} 65 | {% include 'components/footer.jinja' %} 66 | {# Scripts #} 67 | <script defer src="/static/js/index.js"></script> 68 | <script defer src="/static/js/utils.js"></script> 69 | <script defer src="/static/js/posthog.js"></script> 70 | </body> 71 | </html> 72 | -------------------------------------------------------------------------------- /src/server/templates/components/_macros.jinja: -------------------------------------------------------------------------------- 1 | {# Icon link #} 2 | {% macro footer_icon_link(href, icon, label) -%} 3 | <a href="{{ href }}" 4 | target="_blank" 5 | rel="noopener noreferrer" 6 | class="hover:underline flex items-center"> 7 | <img src="/static/{{ icon }}" alt="{{ label }} logo" class="w-4 h-4 mr-1"> 8 | {{ label }} 9 | </a> 10 | {%- endmacro %} 11 | -------------------------------------------------------------------------------- /src/server/templates/components/footer.jinja: -------------------------------------------------------------------------------- 1 | {% from 'components/_macros.jinja' import footer_icon_link %} 2 | <footer class="w-full border-t-[3px] border-gray-900 mt-auto"> 3 | <div class="max-w-4xl mx-auto px-4 py-4"> 4 | <div class="grid grid-cols-2 items-center text-gray-900 text-sm"> 5 | {# Left column — Chrome + PyPI #} 6 | <div class="flex items-center space-x-4"> 7 | {{ footer_icon_link('https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood', 8 | 'icons/chrome.svg', 9 | 'Chrome Extension') }} 10 | {{ footer_icon_link('https://pypi.org/project/gitingest', 11 | 'icons/python.svg', 12 | 'Python Package') }} 13 | </div> 14 | {# Right column - Discord #} 15 | <div class="flex justify-end"> 16 | {{ footer_icon_link('https://discord.gg/zerRaGK9EC', 17 | 'icons/discord.svg', 18 | 'Discord') }} 19 | </div> 20 | </div> 21 | </div> 22 | </footer> 23 | -------------------------------------------------------------------------------- /src/server/templates/components/navbar.jinja: -------------------------------------------------------------------------------- 1 | <header class="sticky top-0 bg-[#FFFDF8] border-b-[3px] border-gray-900 z-50"> 2 | <div class="max-w-4xl mx-auto px-4"> 3 | <div class="flex justify-between items-center h-16"> 4 | {# Logo #} 5 | <div class="flex items-center gap-4"> 6 | <h1 class="text-2xl font-bold tracking-tight"> 7 | <a href="/" class="hover:opacity-80 transition-opacity"> 8 | <span class="text-gray-900">Git</span><span class="text-[#FE4A60]">ingest</span> 9 | </a> 10 | </h1> 11 | </div> 12 | {# Navigation with updated styling #} 13 | <nav class="flex items-center space-x-6"> 14 | <a href="/llms.txt" class="link-bounce flex items-center text-gray-900"> 15 | <span class="badge-new">NEW</span> 16 | /llms.txt 17 | </a> 18 | {# GitHub link #} 19 | <div class="flex items-center gap-2"> 20 | <a href="https://github.com/coderamp-labs/gitingest" 21 | target="_blank" 22 | rel="noopener noreferrer" 23 | class="link-bounce flex items-center gap-1.5 text-gray-900"> 24 | <img src="/static/icons/github.svg" class="w-4 h-4" alt="GitHub logo"> 25 | GitHub 26 | </a> 27 | {# Star counter #} 28 | <div class="no-drag flex items-center text-sm text-gray-600"> 29 | <img src="/static/svg/github-star.svg" 30 | class="w-4 h-4 mr-1" 31 | alt="GitHub star icon"> 32 | <span id="github-stars">0</span> 33 | </div> 34 | </div> 35 | </nav> 36 | </div> 37 | </div> 38 | </header> 39 | {# Load GitHub stars script #} 40 | <script defer src="/static/js/navbar.js"></script> 41 | -------------------------------------------------------------------------------- /src/server/templates/components/result.jinja: -------------------------------------------------------------------------------- 1 | <div class="mt-10"> 2 | <!-- Error Message (hidden by default) --> 3 | <div id="results-error" style="display:none"></div> 4 | <!-- Loading Spinner (hidden by default) --> 5 | <div id="results-loading" style="display:none"> 6 | <div class="relative mt-10"> 7 | <div class="w-full h-full absolute inset-0 bg-black rounded-xl translate-y-2 translate-x-2"></div> 8 | <div class="bg-[#fafafa] rounded-xl border-[3px] border-gray-900 p-6 relative z-20 flex flex-col items-center space-y-4"> 9 | <div class="loader border-8 border-[#fff4da] border-t-8 border-t-[#ffc480] rounded-full w-16 h-16 animate-spin"></div> 10 | <p class="text-lg font-bold text-gray-900">Loading...</p> 11 | </div> 12 | </div> 13 | </div> 14 | <!-- Results Section (hidden by default) --> 15 | <div id="results-section" style="display:none"> 16 | <div class="relative"> 17 | <div class="w-full h-full absolute inset-0 bg-gray-900 rounded-xl translate-y-2 translate-x-2"></div> 18 | <div class="bg-[#fafafa] rounded-xl border-[3px] border-gray-900 p-6 relative z-20 space-y-6"> 19 | <div class="grid grid-cols-1 md:grid-cols-12 gap-6"> 20 | <div class="md:col-span-5"> 21 | <div class="flex justify-between items-center mb-4 py-2"> 22 | <h3 class="text-lg font-bold text-gray-900">Summary</h3> 23 | </div> 24 | <div class="relative"> 25 | <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div> 26 | <textarea id="result-summary" 27 | class="w-full h-[160px] p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-none focus:outline-none relative z-10" 28 | readonly></textarea> 29 | </div> 30 | <div class="relative mt-4 inline-block group ml-4"> 31 | <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div> 32 | <button onclick="copyFullDigest()" 33 | class="inline-flex items-center px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10"> 34 | <svg class="w-4 h-4 mr-2" 35 | fill="none" 36 | stroke="currentColor" 37 | viewBox="0 0 24 24"> 38 | <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" /> 39 | </svg> 40 | Copy all 41 | </button> 42 | </div> 43 | <div class="relative mt-4 inline-block group ml-4"> 44 | <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div> 45 | <button onclick="downloadFullDigest()" 46 | class="inline-flex items-center px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10"> 47 | <svg class="w-4 h-4 mr-2" 48 | fill="none" 49 | stroke="currentColor" 50 | viewBox="0 0 24 24"> 51 | <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" /> 52 | </svg> 53 | Download 54 | </button> 55 | </div> 56 | </div> 57 | <div class="md:col-span-7"> 58 | <div class="flex justify-between items-center mb-4"> 59 | <h3 class="text-lg font-bold text-gray-900">Directory Structure</h3> 60 | <div class="relative group"> 61 | <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div> 62 | <button onclick="copyText('directory-structure')" 63 | class="px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10 flex items-center gap-2"> 64 | <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"> 65 | <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" /> 66 | </svg> 67 | Copy 68 | </button> 69 | </div> 70 | </div> 71 | <div class="relative"> 72 | <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div> 73 | <div class="directory-structure w-full p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-y focus:outline-none relative z-10 h-[215px] overflow-auto" 74 | id="directory-structure-container" 75 | readonly> 76 | <input type="hidden" id="directory-structure-content" value="" /> 77 | <pre id="directory-structure-pre"></pre> 78 | </div> 79 | </div> 80 | </div> 81 | </div> 82 | <div> 83 | <div class="flex justify-between items-center mb-4"> 84 | <h3 class="text-lg font-bold text-gray-900">Files Content</h3> 85 | <div class="relative group"> 86 | <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div> 87 | <button onclick="copyText('result-text')" 88 | class="px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10 flex items-center gap-2"> 89 | <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"> 90 | <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" /> 91 | </svg> 92 | Copy 93 | </button> 94 | </div> 95 | </div> 96 | <div class="relative"> 97 | <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div> 98 | <textarea id="result-content" 99 | class="result-text w-full p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-y focus:outline-none relative z-10" 100 | style="min-height: 600px" 101 | readonly></textarea> 102 | </div> 103 | </div> 104 | </div> 105 | </div> 106 | </div> 107 | </div> 108 | -------------------------------------------------------------------------------- /src/server/templates/components/tailwind_components.html: -------------------------------------------------------------------------------- 1 | <style type="text/tailwindcss"> 2 | @layer components { 3 | .badge-new { 4 | @apply inline-block -rotate-6 -translate-y-1 mx-1 px-1 bg-[#FE4A60] border border-gray-900 text-white text-[10px] font-bold shadow-[2px_2px_0_0_rgba(0,0,0,1)]; 5 | } 6 | .landing-page-title { 7 | @apply inline-block w-full relative text-center text-4xl sm:text-5xl md:text-6xl lg:text-7xl sm:pt-20 lg:pt-5 font-bold tracking-tighter; 8 | } 9 | .intro-text { 10 | @apply text-center text-gray-600 text-lg max-w-2xl mx-auto; 11 | } 12 | .sparkle-red { 13 | @apply absolute flex-shrink-0 h-auto w-14 sm:w-20 md:w-24 p-2 left-0 lg:ml-32 -translate-x-2 md:translate-x-10 lg:-translate-x-full -translate-y-4 sm:-translate-y-8 md:-translate-y-0 lg:-translate-y-10; 14 | } 15 | .sparkle-green { 16 | @apply absolute flex-shrink-0 right-0 bottom-0 w-10 sm:w-16 lg:w-20 -translate-x-10 lg:-translate-x-12 translate-y-4 sm:translate-y-10 md:translate-y-2 lg:translate-y-4; 17 | } 18 | .pattern-select { 19 | @apply min-w-max appearance-none pr-6 pl-2 py-2 bg-[#e6e8eb] border-r-[3px] border-gray-900 cursor-pointer focus:outline-none; 20 | } 21 | } 22 | 23 | @layer utilities { 24 | .no-drag { 25 | @apply pointer-events-none select-none; 26 | -webkit-user-drag: none; 27 | } 28 | .link-bounce { 29 | @apply transition-transform hover:-translate-y-0.5; 30 | } 31 | } 32 | </style> 33 | -------------------------------------------------------------------------------- /src/server/templates/git.jinja: -------------------------------------------------------------------------------- 1 | {% extends "base.jinja" %} 2 | {% block content %} 3 | {% if error_message %} 4 | <div class="mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700" 5 | id="error-message" 6 | data-message="{{ error_message }}">{{ error_message }}</div> 7 | {% endif %} 8 | {% with show_examples=false %} 9 | {% include 'components/git_form.jinja' %} 10 | {% endwith %} 11 | {% include 'components/result.jinja' %} 12 | {% endblock content %} 13 | -------------------------------------------------------------------------------- /src/server/templates/index.jinja: -------------------------------------------------------------------------------- 1 | {% extends "base.jinja" %} 2 | {% block content %} 3 | <div class="mb-8"> 4 | <div class="relative w-full flex sm:flex-row flex-col justify-center sm:items-center"> 5 | {# Title & Sparkles #} 6 | <h1 class="landing-page-title"> 7 | Prompt-friendly 8 | <br> 9 | codebase 10 | </h1> 11 | <img src="/static/svg/sparkle-red.svg" class="sparkle-red no-drag"> 12 | <img src="/static/svg/sparkle-green.svg" class="sparkle-green no-drag"> 13 | </div> 14 | <p class="intro-text mt-8">Turn any Git repository into a simple text digest of its codebase.</p> 15 | <p class="intro-text mt-0">This is useful for feeding a codebase into any LLM.</p> 16 | </div> 17 | {% if error_message %} 18 | <div class="mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700" 19 | id="error-message" 20 | data-message="{{ error_message }}">{{ error_message }}</div> 21 | {% endif %} 22 | {% with show_examples=true %} 23 | {% include 'components/git_form.jinja' %} 24 | {% endwith %} 25 | <p class="text-gray-600 text-sm max-w-2xl mx-auto text-center mt-4"> 26 | You can also replace 'hub' with 'ingest' in any GitHub URL. 27 | </p> 28 | {% include 'components/result.jinja' %} 29 | {% endblock %} 30 | -------------------------------------------------------------------------------- /src/server/templates/swagger_ui.jinja: -------------------------------------------------------------------------------- 1 | {% extends "base.jinja" %} 2 | {% block title %}GitIngest API{% endblock %} 3 | {% block content %} 4 | <div class="mb-8"> 5 | <div class="relative w-full flex sm:flex-row flex-col justify-center sm:items-center"> 6 | {# Title & Sparkles #} 7 | <h1 class="landing-page-title"> 8 | GitIngest 9 | <br> 10 | API 11 | </h1> 12 | <img src="/static/svg/sparkle-red.svg" class="sparkle-red no-drag"> 13 | <img src="/static/svg/sparkle-green.svg" class="sparkle-green no-drag"> 14 | </div> 15 | <p class="intro-text mt-8">Turn any Git repository into a simple text digest of its codebase.</p> 16 | <p class="intro-text mt-0">This is useful for feeding a codebase into any LLM.</p> 17 | </div> 18 | <div class="bg-[#fff4da] rounded-xl border-[3px] border-gray-900 p-4 md:p-8 relative z-20"> 19 | <div id="swagger-ui"></div> 20 | </div> 21 | <link rel="stylesheet" 22 | href="https://unpkg.com/swagger-ui-dist@5/swagger-ui.css"> 23 | <script src="https://unpkg.com/swagger-ui-dist@5/swagger-ui-bundle.js"></script> 24 | <script> 25 | window.onload = function() { 26 | SwaggerUIBundle({ 27 | url: "/openapi.json", 28 | dom_id: '#swagger-ui', 29 | presets: [ 30 | SwaggerUIBundle.presets.apis, 31 | SwaggerUIBundle.SwaggerUIStandalonePreset 32 | ], 33 | layout: "BaseLayout", 34 | deepLinking: true, 35 | }); 36 | } 37 | </script> 38 | {% endblock %} 39 | -------------------------------------------------------------------------------- /src/static/favicons/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/src/static/favicons/apple-touch-icon.png -------------------------------------------------------------------------------- /src/static/favicons/favicon-64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/src/static/favicons/favicon-64.png -------------------------------------------------------------------------------- /src/static/favicons/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/src/static/favicons/favicon.ico -------------------------------------------------------------------------------- /src/static/favicons/favicon.svg: -------------------------------------------------------------------------------- 1 | <svg xmlns="http://www.w3.org/2000/svg" 2 | viewBox="0 0 64 64" 3 | width="512px" 4 | height="512px"> 5 | <defs><style>.cls-1{fill:#d4e9ff;}.cls-2{fill:#f1f8ff;}.cls-3{fill:#b7daff;}</style></defs> 6 | <title>1</title> 7 | <g id="Layer_70" 8 | data-name="Layer 70"> 9 | <path class="cls-1" d="M52,35V16L41,4H14.42A2.42,2.42,0,0,0,12,6.42V35H8V49h4v8.58A2.42,2.42,0,0,0,14.42,60H49.58A2.42,2.42,0,0,0,52,57.58V49h4V35Z"/> 10 | <polygon class="cls-2" points="52 16 41 16 41 4 52 16"/> 11 | <rect class="cls-3" x="12" y="49" width="40" height="4"/> 12 | <path d="M19.76,46.06a.91.91,0,0,1-.63-.23.83.83,0,0,1-.26-.66.85.85,0,0,1,.25-.62.83.83,0,0,1,.62-.26.87.87,0,0,1,.63.26.83.83,0,0,1,.26.62.84.84,0,0,1-.26.65A.88.88,0,0,1,19.76,46.06Z"/> 13 | <path d="M24.27,38.24v6.25H27.8a.93.93,0,0,1,.65.21.67.67,0,0,1,.23.52.65.65,0,0,1-.22.51,1,1,0,0,1-.65.2H23.6a.91.91,0,0,1-1.07-1.07V38.24a1.15,1.15,0,0,1,.24-.79.8.8,0,0,1,.62-.26.83.83,0,0,1,.64.26A1.13,1.13,0,0,1,24.27,38.24Z"/> 14 | <path d="M31.52,38.24v6.25H35a.93.93,0,0,1,.65.21.67.67,0,0,1,.23.52.65.65,0,0,1-.22.51,1,1,0,0,1-.65.2h-4.2a.91.91,0,0,1-1.07-1.07V38.24a1.15,1.15,0,0,1,.24-.79.8.8,0,0,1,.62-.26.83.83,0,0,1,.64.26A1.13,1.13,0,0,1,31.52,38.24Z"/> 15 | <path d="M40,44.62l-1.38-5.47v5.93a1.08,1.08,0,0,1-.22.74.81.81,0,0,1-1.16,0,1.07,1.07,0,0,1-.22-.74v-6.8a.85.85,0,0,1,.29-.76,1.4,1.4,0,0,1,.79-.2h.54a2.06,2.06,0,0,1,.71.09.59.59,0,0,1,.33.32,4.91,4.91,0,0,1,.24.74l1.25,4.71,1.25-4.71a4.91,4.91,0,0,1,.24-.74.59.59,0,0,1,.33-.32,2.06,2.06,0,0,1,.71-.09h.54a1.4,1.4,0,0,1,.79.2.85.85,0,0,1,.29.76v6.8a1.08,1.08,0,0,1-.22.74.75.75,0,0,1-.59.25.73.73,0,0,1-.57-.25,1.07,1.07,0,0,1-.22-.74V39.15L42.3,44.62c-.09.36-.16.62-.22.78a1.08,1.08,0,0,1-.31.45.91.91,0,0,1-.63.21.92.92,0,0,1-.84-.47,1.92,1.92,0,0,1-.18-.45Z"/> 16 | <path d="M57,35a1,1,0,0,0-1-1H53V16s0,0,0-.06a1,1,0,0,0,0-.21s0-.05,0-.07l0,0a1,1,0,0,0-.18-.29l-11-12a1,1,0,0,0-.29-.21l0,0-.06,0A1,1,0,0,0,41.11,3H14.42A3.42,3.42,0,0,0,11,6.42V34H8a1,1,0,0,0-1,1V49s0,0,0,.07a1.08,1.08,0,0,0,.34.68l0,.05L11,52.5v5.08A3.42,3.42,0,0,0,14.42,61H49.58A3.42,3.42,0,0,0,53,57.58V52.5l3.6-2.7,0-.05a1.08,1.08,0,0,0,.34-.68s0,0,0-.07ZM21,34V5H40V16a1,1,0,0,0,1,1H51V34ZM42,6.57,49.73,15H42ZM13,6.42A1.42,1.42,0,0,1,14.42,5H19V34H13ZM9,36H55V48H9Zm4,21.58V50h6v9H14.42A1.42,1.42,0,0,1,13,57.58ZM49.58,59H21V50H51v7.58A1.42,1.42,0,0,1,49.58,59Z"/> 17 | </g> 18 | </svg> 19 | -------------------------------------------------------------------------------- /src/static/icons/chrome.svg: -------------------------------------------------------------------------------- 1 | <svg role="img" viewBox="0 0 24 24" 2 | xmlns="http://www.w3.org/2000/svg"> 3 | <title>Google Chrome</title> 4 | <path d="M12 0C8.21 0 4.831 1.757 2.632 4.501l3.953 6.848A5.454 5.454 0 0 1 12 6.545h10.691A12 12 0 0 0 12 0zM1.931 5.47A11.943 11.943 0 0 0 0 12c0 6.012 4.42 10.991 10.189 11.864l3.953-6.847a5.45 5.45 0 0 1-6.865-2.29zm13.342 2.166a5.446 5.446 0 0 1 1.45 7.09l.002.001h-.002l-5.344 9.257c.206.01.413.016.621.016 6.627 0 12-5.373 12-12 0-1.54-.29-3.011-.818-4.364zM12 16.364a4.364 4.364 0 1 1 0-8.728 4.364 4.364 0 0 1 0 8.728Z"/> 5 | </svg> 6 | -------------------------------------------------------------------------------- /src/static/icons/discord.svg: -------------------------------------------------------------------------------- 1 | <svg role="img" viewBox="0 0 24 24" 2 | xmlns="http://www.w3.org/2000/svg"> 3 | <title>Discord</title> 4 | <path d="M20.317 4.3698a19.7913 19.7913 0 00-4.8851-1.5152.0741.0741 0 00-.0785.0371c-.211.3753-.4447.8648-.6083 1.2495-1.8447-.2762-3.68-.2762-5.4868 0-.1636-.3933-.4058-.8742-.6177-1.2495a.077.077 0 00-.0785-.037 19.7363 19.7363 0 00-4.8852 1.515.0699.0699 0 00-.0321.0277C.5334 9.0458-.319 13.5799.0992 18.0578a.0824.0824 0 00.0312.0561c2.0528 1.5076 4.0413 2.4228 5.9929 3.0294a.0777.0777 0 00.0842-.0276c.4616-.6304.8731-1.2952 1.226-1.9942a.076.076 0 00-.0416-.1057c-.6528-.2476-1.2743-.5495-1.8722-.8923a.077.077 0 01-.0076-.1277c.1258-.0943.2517-.1923.3718-.2914a.0743.0743 0 01.0776-.0105c3.9278 1.7933 8.18 1.7933 12.0614 0a.0739.0739 0 01.0785.0095c.1202.099.246.1981.3728.2924a.077.077 0 01-.0066.1276 12.2986 12.2986 0 01-1.873.8914.0766.0766 0 00-.0407.1067c.3604.698.7719 1.3628 1.225 1.9932a.076.076 0 00.0842.0286c1.961-.6067 3.9495-1.5219 6.0023-3.0294a.077.077 0 00.0313-.0552c.5004-5.177-.8382-9.6739-3.5485-13.6604a.061.061 0 00-.0312-.0286zM8.02 15.3312c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9555-2.4189 2.157-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.9555 2.4189-2.1569 2.4189zm7.9748 0c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9554-2.4189 2.1569-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.946 2.4189-2.1568 2.4189Z"/> 5 | </svg> 6 | -------------------------------------------------------------------------------- /src/static/icons/github.svg: -------------------------------------------------------------------------------- 1 | <svg role="img" viewBox="0 0 24 24" 2 | xmlns="http://www.w3.org/2000/svg" fill="#000"> 3 | <path fill-rule="evenodd" d="M12 2C6.477 2 2 6.484 2 12.017c0 4.425 2.865 8.18 6.839 9.504.5.092.682-.217.682-.483 0-.237-.008-.868-.013-1.703-2.782.605-3.369-1.343-3.369-1.343-.454-1.158-1.11-1.466-1.11-1.466-.908-.62.069-.608.069-.608 1.003.07 1.531 1.032 1.531 1.032.892 1.53 2.341 1.088 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.113-4.555-4.951 0-1.093.39-1.988 1.029-2.688-.103-.253-.446-1.272.098-2.65 0 0 .84-.27 2.75 1.026A9.564 9.564 0 0112 6.844c.85.004 1.705.115 2.504.337 1.909-1.296 2.747-1.027 2.747-1.027.546 1.379.202 2.398.1 2.651.64.7 1.028 1.595 1.028 2.688 0 3.848-2.339 4.695-4.566 4.943.359.309.678.92.678 1.855 0 1.338-.012 2.419-.012 2.747 0 .268.18.58.688.482A10.019 10.019 0 0022 12.017C22 6.484 17.522 2 12 2z" clip-rule="evenodd"/> 4 | </svg> 5 | -------------------------------------------------------------------------------- /src/static/icons/python-color.svg: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?> 2 | <svg version="1.0" id="svg2" sodipodi:version="0.32" inkscape:version="1.2.1 (9c6d41e410, 2022-07-14)" sodipodi:docname="python-logo-only.svg" width="83.371017pt" height="101.00108pt" inkscape:export-filename="python-logo-only.png" inkscape:export-xdpi="232.44" inkscape:export-ydpi="232.44" 3 | xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" 4 | xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" 5 | xmlns:xlink="http://www.w3.org/1999/xlink" 6 | xmlns="http://www.w3.org/2000/svg" 7 | xmlns:svg="http://www.w3.org/2000/svg" 8 | xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 9 | xmlns:cc="http://creativecommons.org/ns#" 10 | xmlns:dc="http://purl.org/dc/elements/1.1/"> 11 | <metadata id="metadata371"> 12 | <rdf:RDF> 13 | <cc:Work rdf:about=""> 14 | <dc:format>image/svg+xml</dc:format> 15 | <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> 16 | </cc:Work> 17 | </rdf:RDF> 18 | </metadata> 19 | <sodipodi:namedview inkscape:window-height="2080" inkscape:window-width="1976" inkscape:pageshadow="2" inkscape:pageopacity="0.0" guidetolerance="10.0" gridtolerance="10.0" objecttolerance="10.0" borderopacity="1.0" bordercolor="#666666" pagecolor="#ffffff" id="base" inkscape:zoom="2.1461642" inkscape:cx="91.558698" inkscape:cy="47.9926" inkscape:window-x="1092" inkscape:window-y="72" inkscape:current-layer="svg2" width="210mm" height="40mm" units="mm" inkscape:showpageshadow="2" inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" inkscape:document-units="pt" showgrid="false" inkscape:window-maximized="0" /> 20 | <defs id="defs4"> 21 | <linearGradient id="linearGradient2795"> 22 | <stop style="stop-color:#b8b8b8;stop-opacity:0.49803922;" offset="0" id="stop2797" /> 23 | <stop style="stop-color:#7f7f7f;stop-opacity:0;" offset="1" id="stop2799" /> 24 | </linearGradient> 25 | <linearGradient id="linearGradient2787"> 26 | <stop style="stop-color:#7f7f7f;stop-opacity:0.5;" offset="0" id="stop2789" /> 27 | <stop style="stop-color:#7f7f7f;stop-opacity:0;" offset="1" id="stop2791" /> 28 | </linearGradient> 29 | <linearGradient id="linearGradient3676"> 30 | <stop style="stop-color:#b2b2b2;stop-opacity:0.5;" offset="0" id="stop3678" /> 31 | <stop style="stop-color:#b3b3b3;stop-opacity:0;" offset="1" id="stop3680" /> 32 | </linearGradient> 33 | <linearGradient id="linearGradient3236"> 34 | <stop style="stop-color:#f4f4f4;stop-opacity:1" offset="0" id="stop3244" /> 35 | <stop style="stop-color:white;stop-opacity:1" offset="1" id="stop3240" /> 36 | </linearGradient> 37 | <linearGradient id="linearGradient4671"> 38 | <stop style="stop-color:#ffd43b;stop-opacity:1;" offset="0" id="stop4673" /> 39 | <stop style="stop-color:#ffe873;stop-opacity:1" offset="1" id="stop4675" /> 40 | </linearGradient> 41 | <linearGradient id="linearGradient4689"> 42 | <stop style="stop-color:#5a9fd4;stop-opacity:1;" offset="0" id="stop4691" /> 43 | <stop style="stop-color:#306998;stop-opacity:1;" offset="1" id="stop4693" /> 44 | </linearGradient> 45 | <linearGradient x1="224.23996" y1="144.75717" x2="-65.308502" y2="144.75717" id="linearGradient2987" xlink:href="#linearGradient4671" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" /> 46 | <linearGradient x1="172.94208" y1="77.475983" x2="26.670298" y2="76.313133" id="linearGradient2990" xlink:href="#linearGradient4689" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" /> 47 | <linearGradient inkscape:collect="always" xlink:href="#linearGradient4689" id="linearGradient2587" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" x1="172.94208" y1="77.475983" x2="26.670298" y2="76.313133" /> 48 | <linearGradient inkscape:collect="always" xlink:href="#linearGradient4671" id="linearGradient2589" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" x1="224.23996" y1="144.75717" x2="-65.308502" y2="144.75717" /> 49 | <linearGradient inkscape:collect="always" xlink:href="#linearGradient4689" id="linearGradient2248" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" x1="172.94208" y1="77.475983" x2="26.670298" y2="76.313133" /> 50 | <linearGradient inkscape:collect="always" xlink:href="#linearGradient4671" id="linearGradient2250" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" x1="224.23996" y1="144.75717" x2="-65.308502" y2="144.75717" /> 51 | <linearGradient inkscape:collect="always" xlink:href="#linearGradient4671" id="linearGradient2255" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.562541,0,0,0.567972,-11.5974,-7.60954)" x1="224.23996" y1="144.75717" x2="-65.308502" y2="144.75717" /> 52 | <linearGradient inkscape:collect="always" xlink:href="#linearGradient4689" id="linearGradient2258" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.562541,0,0,0.567972,-11.5974,-7.60954)" x1="172.94208" y1="76.176224" x2="26.670298" y2="76.313133" /> 53 | <radialGradient inkscape:collect="always" xlink:href="#linearGradient2795" id="radialGradient2801" cx="61.518883" cy="132.28575" fx="61.518883" fy="132.28575" r="29.036913" gradientTransform="matrix(1,0,0,0.177966,0,108.7434)" gradientUnits="userSpaceOnUse" /> 54 | <linearGradient inkscape:collect="always" xlink:href="#linearGradient4671" id="linearGradient1475" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.562541,0,0,0.567972,-14.99112,-11.702371)" x1="150.96111" y1="192.35176" x2="112.03144" y2="137.27299" /> 55 | <linearGradient inkscape:collect="always" xlink:href="#linearGradient4689" id="linearGradient1478" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.562541,0,0,0.567972,-14.99112,-11.702371)" x1="26.648937" y1="20.603781" x2="135.66525" y2="114.39767" /> 56 | <radialGradient inkscape:collect="always" xlink:href="#linearGradient2795" id="radialGradient1480" gradientUnits="userSpaceOnUse" gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-83.7008,142.46201)" cx="61.518883" cy="132.28575" fx="61.518883" fy="132.28575" r="29.036913" /> 57 | </defs> 58 | <path style="fill:url(#linearGradient1478);fill-opacity:1" d="M 54.918785,9.1927421e-4 C 50.335132,0.02221727 45.957846,0.41313697 42.106285,1.0946693 30.760069,3.0991731 28.700036,7.2947714 28.700035,15.032169 v 10.21875 h 26.8125 v 3.40625 h -26.8125 -10.0625 c -7.792459,0 -14.6157588,4.683717 -16.7499998,13.59375 -2.46181998,10.212966 -2.57101508,16.586023 0,27.25 1.9059283,7.937852 6.4575432,13.593748 14.2499998,13.59375 h 9.21875 v -12.25 c 0,-8.849902 7.657144,-16.656248 16.75,-16.65625 h 26.78125 c 7.454951,0 13.406253,-6.138164 13.40625,-13.625 v -25.53125 c 0,-7.2663386 -6.12998,-12.7247771 -13.40625,-13.9374997 C 64.281548,0.32794397 59.502438,-0.02037903 54.918785,9.1927421e-4 Z m -14.5,8.21875012579 c 2.769547,0 5.03125,2.2986456 5.03125,5.1249996 -2e-6,2.816336 -2.261703,5.09375 -5.03125,5.09375 -2.779476,-1e-6 -5.03125,-2.277415 -5.03125,-5.09375 -10e-7,-2.826353 2.251774,-5.1249996 5.03125,-5.1249996 z" id="path1948" /> 59 | <path style="fill:url(#linearGradient1475);fill-opacity:1" d="m 85.637535,28.657169 v 11.90625 c 0,9.230755 -7.825895,16.999999 -16.75,17 h -26.78125 c -7.335833,0 -13.406249,6.278483 -13.40625,13.625 v 25.531247 c 0,7.266344 6.318588,11.540324 13.40625,13.625004 8.487331,2.49561 16.626237,2.94663 26.78125,0 6.750155,-1.95439 13.406253,-5.88761 13.40625,-13.625004 V 86.500919 h -26.78125 v -3.40625 h 26.78125 13.406254 c 7.792461,0 10.696251,-5.435408 13.406241,-13.59375 2.79933,-8.398886 2.68022,-16.475776 0,-27.25 -1.92578,-7.757441 -5.60387,-13.59375 -13.406241,-13.59375 z m -15.0625,64.65625 c 2.779478,3e-6 5.03125,2.277417 5.03125,5.093747 -2e-6,2.826354 -2.251775,5.125004 -5.03125,5.125004 -2.76955,0 -5.03125,-2.29865 -5.03125,-5.125004 2e-6,-2.81633 2.261697,-5.093747 5.03125,-5.093747 z" id="path1950" /> 60 | <ellipse style="opacity:0.44382;fill:url(#radialGradient1480);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:15.4174;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" id="path1894" cx="55.816761" cy="127.70079" rx="35.930977" ry="6.9673119" /> 61 | </svg> 62 | -------------------------------------------------------------------------------- /src/static/icons/python.svg: -------------------------------------------------------------------------------- 1 | <svg role="img" viewBox="0 0 24 24" 2 | xmlns="http://www.w3.org/2000/svg"> 3 | <title>Python</title> 4 | <path d="M14.25.18l.9.2.73.26.59.3.45.32.34.34.25.34.16.33.1.3.04.26.02.2-.01.13V8.5l-.05.63-.13.55-.21.46-.26.38-.3.31-.33.25-.35.19-.35.14-.33.1-.3.07-.26.04-.21.02H8.77l-.69.05-.59.14-.5.22-.41.27-.33.32-.27.35-.2.36-.15.37-.1.35-.07.32-.04.27-.02.21v3.06H3.17l-.21-.03-.28-.07-.32-.12-.35-.18-.36-.26-.36-.36-.35-.46-.32-.59-.28-.73-.21-.88-.14-1.05-.05-1.23.06-1.22.16-1.04.24-.87.32-.71.36-.57.4-.44.42-.33.42-.24.4-.16.36-.1.32-.05.24-.01h.16l.06.01h8.16v-.83H6.18l-.01-2.75-.02-.37.05-.34.11-.31.17-.28.25-.26.31-.23.38-.2.44-.18.51-.15.58-.12.64-.1.71-.06.77-.04.84-.02 1.27.05zm-6.3 1.98l-.23.33-.08.41.08.41.23.34.33.22.41.09.41-.09.33-.22.23-.34.08-.41-.08-.41-.23-.33-.33-.22-.41-.09-.41.09zm13.09 3.95l.28.06.32.12.35.18.36.27.36.35.35.47.32.59.28.73.21.88.14 1.04.05 1.23-.06 1.23-.16 1.04-.24.86-.32.71-.36.57-.4.45-.42.33-.42.24-.4.16-.36.09-.32.05-.24.02-.16-.01h-8.22v.82h5.84l.01 2.76.02.36-.05.34-.11.31-.17.29-.25.25-.31.24-.38.2-.44.17-.51.15-.58.13-.64.09-.71.07-.77.04-.84.01-1.27-.04-1.07-.14-.9-.2-.73-.25-.59-.3-.45-.33-.34-.34-.25-.34-.16-.33-.1-.3-.04-.25-.02-.2.01-.13v-5.34l.05-.64.13-.54.21-.46.26-.38.3-.32.33-.24.35-.2.35-.14.33-.1.3-.06.26-.04.21-.02.13-.01h5.84l.69-.05.59-.14.5-.21.41-.28.33-.32.27-.35.2-.36.15-.36.1-.35.07-.32.04-.28.02-.21V6.07h2.09l.14.01zm-6.47 14.25l-.23.33-.08.41.08.41.23.33.33.23.41.08.41-.08.33-.23.23-.33.08-.41-.08-.41-.23-.33-.33-.23-.41-.08-.41.08z"/> 5 | </svg> 6 | -------------------------------------------------------------------------------- /src/static/js/git.js: -------------------------------------------------------------------------------- 1 | function waitForStars() { 2 | return new Promise((resolve) => { 3 | const check = () => { 4 | const stars = document.getElementById('github-stars'); 5 | 6 | if (stars && stars.textContent !== '0') {resolve();} 7 | else {setTimeout(check, 10);} 8 | }; 9 | 10 | check(); 11 | }); 12 | } 13 | 14 | document.addEventListener('DOMContentLoaded', () => { 15 | const urlInput = document.getElementById('input_text'); 16 | const form = document.getElementById('ingestForm'); 17 | 18 | if (urlInput && urlInput.value.trim() && form) { 19 | // Wait for stars to be loaded before submitting 20 | waitForStars().then(() => { 21 | const submitEvent = new SubmitEvent('submit', { 22 | cancelable: true, 23 | bubbles: true 24 | }); 25 | 26 | Object.defineProperty(submitEvent, 'target', { 27 | value: form, 28 | enumerable: true 29 | }); 30 | handleSubmit(submitEvent, true); 31 | }); 32 | } 33 | }); 34 | -------------------------------------------------------------------------------- /src/static/js/git_form.js: -------------------------------------------------------------------------------- 1 | // Strike-through / un-strike file lines when the pattern-type menu flips. 2 | function changePattern() { 3 | const dirPre = document.getElementById('directory-structure-pre'); 4 | 5 | if (!dirPre) {return;} 6 | 7 | const treeLineElements = Array.from(dirPre.querySelectorAll('pre[name="tree-line"]')); 8 | 9 | // Skip the first tree line element 10 | treeLineElements.slice(2).forEach((element) => { 11 | element.classList.toggle('line-through'); 12 | element.classList.toggle('text-gray-500'); 13 | }); 14 | } 15 | 16 | // Show/hide the Personal-Access-Token section when the "Private repository" checkbox is toggled. 17 | function toggleAccessSettings() { 18 | const container = document.getElementById('accessSettingsContainer'); 19 | const examples = document.getElementById('exampleRepositories'); 20 | const show = document.getElementById('showAccessSettings')?.checked; 21 | 22 | container?.classList.toggle('hidden', !show); 23 | examples?.classList.toggle('lg:mt-0', show); 24 | } 25 | 26 | 27 | 28 | document.addEventListener('DOMContentLoaded', () => { 29 | toggleAccessSettings(); 30 | changePattern(); 31 | }); 32 | 33 | 34 | // Make them available to existing inline attributes 35 | window.changePattern = changePattern; 36 | window.toggleAccessSettings = toggleAccessSettings; 37 | -------------------------------------------------------------------------------- /src/static/js/index.js: -------------------------------------------------------------------------------- 1 | function submitExample(repoName) { 2 | const input = document.getElementById('input_text'); 3 | 4 | if (input) { 5 | input.value = repoName; 6 | input.focus(); 7 | } 8 | } 9 | 10 | // Make it visible to inline onclick handlers 11 | window.submitExample = submitExample; 12 | -------------------------------------------------------------------------------- /src/static/js/navbar.js: -------------------------------------------------------------------------------- 1 | // Fetch GitHub stars 2 | function formatStarCount(count) { 3 | if (count >= 1000) {return `${ (count / 1000).toFixed(1) }k`;} 4 | 5 | return count.toString(); 6 | } 7 | 8 | async function fetchGitHubStars() { 9 | try { 10 | const res = await fetch('https://api.github.com/repos/coderamp-labs/gitingest'); 11 | 12 | if (!res.ok) {throw new Error(`${res.status} ${res.statusText}`);} 13 | const data = await res.json(); 14 | 15 | document.getElementById('github-stars').textContent = 16 | formatStarCount(data.stargazers_count); 17 | } catch (err) { 18 | console.error('Error fetching GitHub stars:', err); 19 | const el = document.getElementById('github-stars').parentElement; 20 | 21 | if (el) {el.style.display = 'none';} 22 | } 23 | } 24 | 25 | // auto-run when script loads 26 | fetchGitHubStars(); 27 | -------------------------------------------------------------------------------- /src/static/js/posthog.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | !function (t, e) { 3 | let o, n, p, r; 4 | if (e.__SV) {return;} // already loaded 5 | 6 | window.posthog = e; 7 | e._i = []; 8 | e.init = function (i, s, a) { 9 | function g(t, e) { 10 | const o = e.split("."); 11 | if (o.length === 2) { 12 | t = t[o[0]]; 13 | e = o[1]; 14 | } 15 | t[e] = function () { 16 | t.push([e].concat(Array.prototype.slice.call(arguments, 0))); 17 | }; 18 | } 19 | 20 | p = t.createElement("script"); 21 | p.type = "text/javascript"; 22 | p.crossOrigin = "anonymous"; 23 | p.async = true; 24 | p.src = `${ s.api_host.replace(".i.posthog.com", "-assets.i.posthog.com") }/static/array.js`; 25 | 26 | r = t.getElementsByTagName("script")[0]; 27 | r.parentNode.insertBefore(p, r); 28 | 29 | let u = e; 30 | if (a !== undefined) { 31 | u = e[a] = []; 32 | } else { 33 | a = "posthog"; 34 | } 35 | 36 | u.people = u.people || []; 37 | u.toString = function (t) { 38 | let e = "posthog"; 39 | if (a !== "posthog") {e += `.${ a }`;} 40 | if (!t) {e += " (stub)";} 41 | return e; 42 | }; 43 | u.people.toString = function () { 44 | return `${ u.toString(1) }.people (stub)`; 45 | }; 46 | 47 | 48 | o = [ 49 | "init", "capture", "register", "register_once", "register_for_session", "unregister", 50 | "unregister_for_session", "getFeatureFlag", "getFeatureFlagPayload", "isFeatureEnabled", 51 | "reloadFeatureFlags", "updateEarlyAccessFeatureEnrollment", "getEarlyAccessFeatures", 52 | "on", "onFeatureFlags", "onSessionId", "getSurveys", "getActiveMatchingSurveys", 53 | "renderSurvey", "canRenderSurvey", "getNextSurveyStep", "identify", "setPersonProperties", 54 | "group", "resetGroups", "setPersonPropertiesForFlags", "resetPersonPropertiesForFlags", 55 | "setGroupPropertiesForFlags", "resetGroupPropertiesForFlags", "reset", "get_distinct_id", 56 | "getGroups", "get_session_id", "get_session_replay_url", "alias", "set_config", 57 | "startSessionRecording", "stopSessionRecording", "sessionRecordingStarted", 58 | "captureException", "loadToolbar", "get_property", "getSessionProperty", 59 | "createPersonProfile", "opt_in_capturing", "opt_out_capturing", 60 | "has_opted_in_capturing", "has_opted_out_capturing", "clear_opt_in_out_capturing", 61 | "debug", "getPageViewId" 62 | ]; 63 | 64 | for (n = 0; n < o.length; n++) {g(u, o[n]);} 65 | e._i.push([i, s, a]); 66 | }; 67 | 68 | e.__SV = 1; 69 | }(document, window.posthog || []); 70 | 71 | /* Initialise PostHog */ 72 | posthog.init('phc_9aNpiIVH2zfTWeY84vdTWxvrJRCQQhP5kcVDXUvcdou', { 73 | api_host: 'https://eu.i.posthog.com', 74 | person_profiles: 'always', 75 | }); 76 | -------------------------------------------------------------------------------- /src/static/og-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/src/static/og-image.png -------------------------------------------------------------------------------- /src/static/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Allow: /api/ 4 | Allow: /coderamp-labs/gitingest/ 5 | -------------------------------------------------------------------------------- /src/static/svg/github-star.svg: -------------------------------------------------------------------------------- 1 | <svg role="img" viewBox="0 0 20 20" 2 | xmlns="http://www.w3.org/2000/svg" fill="#ffc480"> 3 | <path d="M9.049 2.927c.3-.921 1.603-.921 1.902 0l1.07 3.292a1 1 0 00.95.69h3.462c.969 0 1.371 1.24.588 1.81l-2.8 2.034a1 1 0 00-.364 1.118l1.07 3.292c.3.921-.755 1.688-1.54 1.118l-2.8-2.034a1 1 0 00-1.175 0l-2.8 2.034c-.784.57-1.838-.197-1.539-1.118l1.07-3.292a1 1 0 00-.364-1.118L2.98 8.72c-.783-.57-.38-1.81.588-1.81h3.461a1 1 0 00.951-.69l1.07-3.292z"/> 4 | </svg> 5 | -------------------------------------------------------------------------------- /src/static/svg/sparkle-green.svg: -------------------------------------------------------------------------------- 1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 90 80" fill="none"> 2 | <!-- Large diamond --> 3 | <path d="m35.213 16.953.595-5.261 2.644 4.587a35.056 35.056 0 0 0 26.432 17.33l5.261.594-4.587 2.644A35.056 35.056 0 0 0 48.23 63.28l-.595 5.26-2.644-4.587a35.056 35.056 0 0 0-26.432-17.328l-5.261-.595 4.587-2.644a35.056 35.056 0 0 0 17.329-26.433Z" 4 | fill="#5CF1A4" stroke="#000" stroke-width="2.868"/> 5 | <!-- Sparkle rays --> 6 | <path d="M75.062 40.108c1.07 5.255 1.072 16.52-7.472 19.54m7.422-19.682c1.836 2.965 7.643 8.14 16.187 5.121-8.544 3.02-8.207 15.23-6.971 20.957-1.97-3.343-8.044-9.274-16.588-6.254M12.054 28.012c1.34-5.22 6.126-15.4 14.554-14.369M12.035 28.162c-.274-3.487-2.93-10.719-11.358-11.75C9.104 17.443 14.013 6.262 15.414.542c.226 3.888 2.784 11.92 11.212 12.95" 7 | stroke="#000" stroke-width="2.319" stroke-linecap="round"/> 8 | </svg> 9 | -------------------------------------------------------------------------------- /src/static/svg/sparkle-red.svg: -------------------------------------------------------------------------------- 1 | 2 | <svg xmlns="http://www.w3.org/2000/svg" 3 | viewBox="0 0 90 100" 4 | fill="none"> 5 | <!-- Large diamond --> 6 | <path d="m35.878 14.162 1.333-5.369 1.933 5.183c4.47 11.982 14.036 21.085 25.828 24.467l5.42 1.555-5.209 2.16c-11.332 4.697-19.806 14.826-22.888 27.237l-1.333 5.369-1.933-5.183C34.56 57.599 24.993 48.496 13.201 45.114l-5.42-1.555 5.21-2.16c11.331-4.697 19.805-14.826 22.887-27.237Z" 7 | fill="#FE4A60" stroke="#000" stroke-width="3.445"/> 8 | <!-- Sparkle rays --> 9 | <path d="M79.653 5.729c-2.436 5.323-9.515 15.25-18.341 12.374m9.197 16.336c2.6-5.851 10.008-16.834 18.842-13.956m-9.738-15.07c-.374 3.787 1.076 12.078 9.869 14.943M70.61 34.6c.503-4.21-.69-13.346-9.49-16.214M14.922 65.967c1.338 5.677 6.372 16.756 15.808 15.659M18.21 95.832c-1.392-6.226-6.54-18.404-15.984-17.305m12.85-12.892c-.41 3.771-3.576 11.588-12.968 12.681M18.025 96c.367-4.21 3.453-12.905 12.854-14" 10 | stroke="#000" stroke-width="2.548" stroke-linecap="round"/> 11 | </svg> 12 | -------------------------------------------------------------------------------- /tests/.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | init-hook= 3 | import sys 4 | sys.path.append('./src') 5 | 6 | [MESSAGES CONTROL] 7 | disable=missing-class-docstring,missing-function-docstring,protected-access,fixme 8 | 9 | [FORMAT] 10 | max-line-length=119 11 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the gitingest package.""" 2 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Fixtures for tests. 2 | 3 | This file provides shared fixtures for creating sample queries, a temporary directory structure, and a helper function 4 | to write ``.ipynb`` notebooks for testing notebook utilities. 5 | """ 6 | 7 | from __future__ import annotations 8 | 9 | import json 10 | from pathlib import Path 11 | from typing import TYPE_CHECKING, Any, Callable, Dict 12 | from unittest.mock import AsyncMock 13 | 14 | import pytest 15 | 16 | from gitingest.query_parser import IngestionQuery 17 | 18 | if TYPE_CHECKING: 19 | from pytest_mock import MockerFixture 20 | 21 | WriteNotebookFunc = Callable[[str, Dict[str, Any]], Path] 22 | 23 | DEMO_URL = "https://github.com/user/repo" 24 | LOCAL_REPO_PATH = "/tmp/repo" 25 | 26 | 27 | @pytest.fixture 28 | def sample_query() -> IngestionQuery: 29 | """Provide a default ``IngestionQuery`` object for use in tests. 30 | 31 | This fixture returns a ``IngestionQuery`` pre-populated with typical fields and some default ignore patterns. 32 | 33 | Returns 34 | ------- 35 | IngestionQuery 36 | The sample ``IngestionQuery`` object. 37 | 38 | """ 39 | return IngestionQuery( 40 | user_name="test_user", 41 | repo_name="test_repo", 42 | local_path=Path("/tmp/test_repo").resolve(), 43 | slug="test_user/test_repo", 44 | id="id", 45 | branch="main", 46 | max_file_size=1_000_000, 47 | ignore_patterns={"*.pyc", "__pycache__", ".git"}, 48 | ) 49 | 50 | 51 | @pytest.fixture 52 | def temp_directory(tmp_path: Path) -> Path: 53 | """Create a temporary directory structure for testing repository scanning. 54 | 55 | The structure includes: 56 | test_repo/ 57 | ├── file1.txt 58 | ├── file2.py 59 | ├── src/ 60 | │ ├── subfile1.txt 61 | │ ├── subfile2.py 62 | │ └── subdir/ 63 | │ ├── file_subdir.txt 64 | │ └── file_subdir.py 65 | ├── dir1/ 66 | │ └── file_dir1.txt 67 | └── dir2/ 68 | └── file_dir2.txt 69 | 70 | Parameters 71 | ---------- 72 | tmp_path : Path 73 | The temporary directory path provided by the ``tmp_path`` fixture. 74 | 75 | Returns 76 | ------- 77 | Path 78 | The path to the created ``test_repo`` directory. 79 | 80 | """ 81 | test_dir = tmp_path / "test_repo" 82 | test_dir.mkdir() 83 | 84 | # Root files 85 | (test_dir / "file1.txt").write_text("Hello World") 86 | (test_dir / "file2.py").write_text("print('Hello')") 87 | 88 | # src directory and its files 89 | src_dir = test_dir / "src" 90 | src_dir.mkdir() 91 | (src_dir / "subfile1.txt").write_text("Hello from src") 92 | (src_dir / "subfile2.py").write_text("print('Hello from src')") 93 | 94 | # src/subdir and its files 95 | subdir = src_dir / "subdir" 96 | subdir.mkdir() 97 | (subdir / "file_subdir.txt").write_text("Hello from subdir") 98 | (subdir / "file_subdir.py").write_text("print('Hello from subdir')") 99 | 100 | # dir1 and its file 101 | dir1 = test_dir / "dir1" 102 | dir1.mkdir() 103 | (dir1 / "file_dir1.txt").write_text("Hello from dir1") 104 | 105 | # dir2 and its file 106 | dir2 = test_dir / "dir2" 107 | dir2.mkdir() 108 | (dir2 / "file_dir2.txt").write_text("Hello from dir2") 109 | 110 | return test_dir 111 | 112 | 113 | @pytest.fixture 114 | def write_notebook(tmp_path: Path) -> WriteNotebookFunc: 115 | """Provide a helper function to write a ``.ipynb`` notebook file with the given content. 116 | 117 | Parameters 118 | ---------- 119 | tmp_path : Path 120 | The temporary directory path provided by the ``tmp_path`` fixture. 121 | 122 | Returns 123 | ------- 124 | WriteNotebookFunc 125 | A callable that accepts a filename and a dictionary (representing JSON notebook data), writes it to a 126 | ``.ipynb`` file, and returns the path to the file. 127 | 128 | """ 129 | 130 | def _write_notebook(name: str, content: dict[str, Any]) -> Path: 131 | notebook_path = tmp_path / name 132 | with notebook_path.open(mode="w", encoding="utf-8") as f: 133 | json.dump(content, f) 134 | return notebook_path 135 | 136 | return _write_notebook 137 | 138 | 139 | @pytest.fixture 140 | def stub_branches(mocker: MockerFixture) -> Callable[[list[str]], None]: 141 | """Return a function that stubs git branch discovery to *branches*.""" 142 | 143 | def _factory(branches: list[str]) -> None: 144 | mocker.patch( 145 | "gitingest.utils.git_utils.run_command", 146 | new_callable=AsyncMock, 147 | return_value=("\n".join(f"refs/heads/{b}" for b in branches).encode() + b"\n", b""), 148 | ) 149 | mocker.patch( 150 | "gitingest.utils.git_utils.fetch_remote_branches_or_tags", 151 | new_callable=AsyncMock, 152 | return_value=branches, 153 | ) 154 | 155 | return _factory 156 | 157 | 158 | @pytest.fixture 159 | def repo_exists_true(mocker: MockerFixture) -> AsyncMock: 160 | """Patch ``gitingest.clone.check_repo_exists`` to always return ``True``.""" 161 | return mocker.patch("gitingest.clone.check_repo_exists", return_value=True) 162 | 163 | 164 | @pytest.fixture 165 | def run_command_mock(mocker: MockerFixture) -> AsyncMock: 166 | """Patch ``gitingest.clone.run_command`` with an ``AsyncMock``. 167 | 168 | The mocked function returns a dummy process whose ``communicate`` method yields generic 169 | ``stdout`` / ``stderr`` bytes. Tests can still access / tweak the mock via the fixture argument. 170 | """ 171 | mock_exec = mocker.patch("gitingest.clone.run_command", new_callable=AsyncMock) 172 | 173 | # Provide a default dummy process so most tests don't have to create one. 174 | dummy_process = AsyncMock() 175 | dummy_process.communicate.return_value = (b"output", b"error") 176 | mock_exec.return_value = dummy_process 177 | 178 | return mock_exec 179 | -------------------------------------------------------------------------------- /tests/query_parser/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the query parser.""" 2 | -------------------------------------------------------------------------------- /tests/query_parser/test_git_host_agnostic.py: -------------------------------------------------------------------------------- 1 | """Tests to verify that the query parser is Git host agnostic. 2 | 3 | These tests confirm that ``parse_query`` correctly identifies user/repo pairs and canonical URLs for GitHub, GitLab, 4 | Bitbucket, Gitea, and Codeberg, even if the host is omitted. 5 | """ 6 | 7 | from __future__ import annotations 8 | 9 | import pytest 10 | 11 | from gitingest.query_parser import parse_query 12 | from gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS 13 | 14 | # Repository matrix: (host, user, repo) 15 | _REPOS: list[tuple[str, str, str]] = [ 16 | ("github.com", "tiangolo", "fastapi"), 17 | ("gitlab.com", "gitlab-org", "gitlab-runner"), 18 | ("bitbucket.org", "na-dna", "llm-knowledge-share"), 19 | ("gitea.com", "xorm", "xorm"), 20 | ("codeberg.org", "forgejo", "forgejo"), 21 | ("git.rwth-aachen.de", "medialab", "19squared"), 22 | ("gitlab.alpinelinux.org", "alpine", "apk-tools"), 23 | ] 24 | 25 | 26 | # Generate cartesian product of repository tuples with URL variants. 27 | @pytest.mark.parametrize(("host", "user", "repo"), _REPOS, ids=[f"{h}:{u}/{r}" for h, u, r in _REPOS]) 28 | @pytest.mark.parametrize("variant", ["full", "noscheme", "slug"]) 29 | @pytest.mark.asyncio 30 | async def test_parse_query_without_host( 31 | host: str, 32 | user: str, 33 | repo: str, 34 | variant: str, 35 | ) -> None: 36 | """Verify that ``parse_query`` handles URLs, host-omitted URLs and raw slugs.""" 37 | # Build the input URL based on the selected variant 38 | if variant == "full": 39 | url = f"https://{host}/{user}/{repo}" 40 | elif variant == "noscheme": 41 | url = f"{host}/{user}/{repo}" 42 | else: # "slug" 43 | url = f"{user}/{repo}" 44 | 45 | expected_url = f"https://{host}/{user}/{repo}" 46 | 47 | # For slug form with a custom host (not in KNOWN_GIT_HOSTS) we expect a failure, 48 | # because the parser cannot guess which domain to use. 49 | if variant == "slug" and host not in KNOWN_GIT_HOSTS: 50 | with pytest.raises(ValueError, match="Could not find a valid repository host"): 51 | await parse_query(url, max_file_size=50, from_web=True) 52 | return 53 | 54 | query = await parse_query(url, max_file_size=50, from_web=True) 55 | 56 | # Compare against the canonical dict while ignoring unpredictable fields. 57 | actual = query.model_dump(exclude={"id", "local_path", "ignore_patterns"}) 58 | 59 | expected = { 60 | "user_name": user, 61 | "repo_name": repo, 62 | "url": expected_url, 63 | "slug": f"{user}-{repo}", 64 | "subpath": "/", 65 | "type": None, 66 | "branch": None, 67 | "tag": None, 68 | "commit": None, 69 | "max_file_size": 50, 70 | "include_patterns": None, 71 | "include_submodules": False, 72 | } 73 | 74 | assert actual == expected 75 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | """Tests for the Gitingest CLI.""" 2 | 3 | from __future__ import annotations 4 | 5 | from inspect import signature 6 | from pathlib import Path 7 | 8 | import pytest 9 | from click.testing import CliRunner, Result 10 | 11 | from gitingest.__main__ import main 12 | from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("cli_args", "expect_file"), 17 | [ 18 | pytest.param(["./"], True, id="default-options"), 19 | pytest.param( 20 | [ 21 | "./", 22 | "--output", 23 | str(OUTPUT_FILE_NAME), 24 | "--max-size", 25 | str(MAX_FILE_SIZE), 26 | "--exclude-pattern", 27 | "tests/", 28 | "--include-pattern", 29 | "src/", 30 | "--include-submodules", 31 | ], 32 | True, 33 | id="custom-options", 34 | ), 35 | ], 36 | ) 37 | def test_cli_writes_file( 38 | tmp_path: Path, 39 | monkeypatch: pytest.MonkeyPatch, 40 | *, 41 | cli_args: list[str], 42 | expect_file: bool, 43 | ) -> None: 44 | """Run the CLI and verify that the SARIF file is created (or not).""" 45 | expectes_exit_code = 0 46 | # Work inside an isolated temp directory 47 | monkeypatch.chdir(tmp_path) 48 | 49 | result = _invoke_isolated_cli_runner(cli_args) 50 | 51 | assert result.exit_code == expectes_exit_code, result.stderr 52 | 53 | # Summary line should be on STDOUT 54 | stdout_lines = result.stdout.splitlines() 55 | assert f"Analysis complete! Output written to: {OUTPUT_FILE_NAME}" in stdout_lines 56 | 57 | # File side-effect 58 | sarif_file = tmp_path / OUTPUT_FILE_NAME 59 | assert sarif_file.exists() is expect_file, f"{OUTPUT_FILE_NAME} existence did not match expectation" 60 | 61 | 62 | def test_cli_with_stdout_output() -> None: 63 | """Test CLI invocation with output directed to STDOUT.""" 64 | output_file = Path(OUTPUT_FILE_NAME) 65 | # Clean up any existing digest.txt file before test 66 | if output_file.exists(): 67 | output_file.unlink() 68 | 69 | try: 70 | result = _invoke_isolated_cli_runner(["./", "--output", "-", "--exclude-pattern", "tests/"]) 71 | 72 | # ─── core expectations (stdout) ────────────────────────────────────- 73 | assert result.exit_code == 0, f"CLI exited with code {result.exit_code}, stderr: {result.stderr}" 74 | assert "---" in result.stdout, "Expected file separator '---' not found in STDOUT" 75 | assert "src/gitingest/__main__.py" in result.stdout, ( 76 | "Expected content (e.g., src/gitingest/__main__.py) not found in STDOUT" 77 | ) 78 | assert not output_file.exists(), f"Output file {output_file} was unexpectedly created." 79 | 80 | # ─── the summary must *not* pollute STDOUT, must appear on STDERR ─── 81 | summary = "Analysis complete! Output sent to stdout." 82 | stdout_lines = result.stdout.splitlines() 83 | stderr_lines = result.stderr.splitlines() 84 | assert summary not in stdout_lines, "Unexpected summary message found in STDOUT" 85 | assert summary in stderr_lines, "Expected summary message not found in STDERR" 86 | assert f"Output written to: {output_file.name}" not in stderr_lines 87 | finally: 88 | # Clean up any digest.txt file that might have been created during test 89 | if output_file.exists(): 90 | output_file.unlink() 91 | 92 | 93 | def _invoke_isolated_cli_runner(args: list[str]) -> Result: 94 | """Return a ``CliRunner`` that keeps ``stderr`` separate on Click 8.0-8.1.""" 95 | kwargs = {} 96 | if "mix_stderr" in signature(CliRunner.__init__).parameters: 97 | kwargs["mix_stderr"] = False # Click 8.0-8.1 98 | runner = CliRunner(**kwargs) 99 | return runner.invoke(main, args) 100 | -------------------------------------------------------------------------------- /tests/test_flow_integration.py: -------------------------------------------------------------------------------- 1 | """Integration tests covering core functionalities, edge cases, and concurrency handling.""" 2 | 3 | import shutil 4 | from concurrent.futures import ThreadPoolExecutor 5 | from pathlib import Path 6 | from typing import Generator 7 | 8 | import pytest 9 | from fastapi import status 10 | from fastapi.testclient import TestClient 11 | from pytest_mock import MockerFixture 12 | 13 | from src.server.main import app 14 | 15 | BASE_DIR = Path(__file__).resolve().parent.parent 16 | TEMPLATE_DIR = BASE_DIR / "src" / "templates" 17 | 18 | 19 | @pytest.fixture(scope="module") 20 | def test_client() -> Generator[TestClient, None, None]: 21 | """Create a test client fixture.""" 22 | with TestClient(app) as client_instance: 23 | client_instance.headers.update({"Host": "localhost"}) 24 | yield client_instance 25 | 26 | 27 | @pytest.fixture(autouse=True) 28 | def mock_static_files(mocker: MockerFixture) -> None: 29 | """Mock the static file mount to avoid directory errors.""" 30 | mock_static = mocker.patch("src.server.main.StaticFiles", autospec=True) 31 | mock_static.return_value = None 32 | return mock_static 33 | 34 | 35 | @pytest.fixture(scope="module", autouse=True) 36 | def cleanup_tmp_dir() -> Generator[None, None, None]: 37 | """Remove ``/tmp/gitingest`` after this test-module is done.""" 38 | yield # run tests 39 | temp_dir = Path("/tmp/gitingest") 40 | if temp_dir.exists(): 41 | try: 42 | shutil.rmtree(temp_dir) 43 | except PermissionError as exc: 44 | print(f"Error cleaning up {temp_dir}: {exc}") 45 | 46 | 47 | @pytest.mark.asyncio 48 | async def test_remote_repository_analysis(request: pytest.FixtureRequest) -> None: 49 | """Test the complete flow of analyzing a remote repository.""" 50 | client = request.getfixturevalue("test_client") 51 | form_data = { 52 | "input_text": "https://github.com/octocat/Hello-World", 53 | "max_file_size": "243", 54 | "pattern_type": "exclude", 55 | "pattern": "", 56 | "token": "", 57 | } 58 | 59 | response = client.post("/api/ingest", json=form_data) 60 | assert response.status_code == status.HTTP_200_OK, f"Form submission failed: {response.text}" 61 | 62 | # Check that response is JSON 63 | response_data = response.json() 64 | assert "content" in response_data 65 | assert response_data["content"] 66 | assert "repo_url" in response_data 67 | assert "summary" in response_data 68 | assert "tree" in response_data 69 | assert "content" in response_data 70 | 71 | 72 | @pytest.mark.asyncio 73 | async def test_invalid_repository_url(request: pytest.FixtureRequest) -> None: 74 | """Test handling of an invalid repository URL.""" 75 | client = request.getfixturevalue("test_client") 76 | form_data = { 77 | "input_text": "https://github.com/nonexistent/repo", 78 | "max_file_size": "243", 79 | "pattern_type": "exclude", 80 | "pattern": "", 81 | "token": "", 82 | } 83 | 84 | response = client.post("/api/ingest", json=form_data) 85 | # Should return 400 for invalid repository 86 | assert response.status_code == status.HTTP_400_BAD_REQUEST, f"Request failed: {response.text}" 87 | 88 | # Check that response is JSON error 89 | response_data = response.json() 90 | assert "error" in response_data 91 | 92 | 93 | @pytest.mark.asyncio 94 | async def test_large_repository(request: pytest.FixtureRequest) -> None: 95 | """Simulate analysis of a large repository with nested folders.""" 96 | client = request.getfixturevalue("test_client") 97 | # TODO: ingesting a large repo take too much time (eg: godotengine/godot repository) 98 | form_data = { 99 | "input_text": "https://github.com/octocat/hello-world", 100 | "max_file_size": "10", 101 | "pattern_type": "exclude", 102 | "pattern": "", 103 | "token": "", 104 | } 105 | 106 | response = client.post("/api/ingest", json=form_data) 107 | assert response.status_code == status.HTTP_200_OK, f"Request failed: {response.text}" 108 | 109 | response_data = response.json() 110 | if response.status_code == status.HTTP_200_OK: 111 | assert "content" in response_data 112 | assert response_data["content"] 113 | else: 114 | assert "error" in response_data 115 | 116 | 117 | @pytest.mark.asyncio 118 | async def test_concurrent_requests(request: pytest.FixtureRequest) -> None: 119 | """Test handling of multiple concurrent requests.""" 120 | client = request.getfixturevalue("test_client") 121 | 122 | def make_request() -> None: 123 | form_data = { 124 | "input_text": "https://github.com/octocat/hello-world", 125 | "max_file_size": "243", 126 | "pattern_type": "exclude", 127 | "pattern": "", 128 | "token": "", 129 | } 130 | response = client.post("/api/ingest", json=form_data) 131 | assert response.status_code == status.HTTP_200_OK, f"Request failed: {response.text}" 132 | 133 | response_data = response.json() 134 | if response.status_code == status.HTTP_200_OK: 135 | assert "content" in response_data 136 | assert response_data["content"] 137 | else: 138 | assert "error" in response_data 139 | 140 | with ThreadPoolExecutor(max_workers=5) as executor: 141 | futures = [executor.submit(make_request) for _ in range(5)] 142 | for future in futures: 143 | future.result() 144 | 145 | 146 | @pytest.mark.asyncio 147 | async def test_large_file_handling(request: pytest.FixtureRequest) -> None: 148 | """Test handling of repositories with large files.""" 149 | client = request.getfixturevalue("test_client") 150 | form_data = { 151 | "input_text": "https://github.com/octocat/Hello-World", 152 | "max_file_size": "1", 153 | "pattern_type": "exclude", 154 | "pattern": "", 155 | "token": "", 156 | } 157 | 158 | response = client.post("/api/ingest", json=form_data) 159 | assert response.status_code == status.HTTP_200_OK, f"Request failed: {response.text}" 160 | 161 | response_data = response.json() 162 | if response.status_code == status.HTTP_200_OK: 163 | assert "content" in response_data 164 | assert response_data["content"] 165 | else: 166 | assert "error" in response_data 167 | 168 | 169 | @pytest.mark.asyncio 170 | async def test_repository_with_patterns(request: pytest.FixtureRequest) -> None: 171 | """Test repository analysis with include/exclude patterns.""" 172 | client = request.getfixturevalue("test_client") 173 | form_data = { 174 | "input_text": "https://github.com/octocat/Hello-World", 175 | "max_file_size": "243", 176 | "pattern_type": "include", 177 | "pattern": "*.md", 178 | "token": "", 179 | } 180 | 181 | response = client.post("/api/ingest", json=form_data) 182 | assert response.status_code == status.HTTP_200_OK, f"Request failed: {response.text}" 183 | 184 | response_data = response.json() 185 | if response.status_code == status.HTTP_200_OK: 186 | assert "content" in response_data 187 | assert "pattern_type" in response_data 188 | assert response_data["pattern_type"] == "include" 189 | assert "pattern" in response_data 190 | assert response_data["pattern"] == "*.md" 191 | else: 192 | assert "error" in response_data 193 | -------------------------------------------------------------------------------- /tests/test_gitignore_feature.py: -------------------------------------------------------------------------------- 1 | """Tests for the gitignore functionality in Gitingest.""" 2 | 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | from gitingest.entrypoint import ingest_async 8 | from gitingest.utils.ignore_patterns import load_ignore_patterns 9 | 10 | 11 | @pytest.fixture(name="repo_path") 12 | def repo_fixture(tmp_path: Path) -> Path: 13 | """Create a temporary repository structure. 14 | 15 | The repository structure includes: 16 | - A ``.gitignore`` that excludes ``exclude.txt`` 17 | - ``include.txt`` (should be processed) 18 | - ``exclude.txt`` (should be skipped when gitignore rules are respected) 19 | """ 20 | # Create a .gitignore file that excludes 'exclude.txt' 21 | gitignore_file = tmp_path / ".gitignore" 22 | gitignore_file.write_text("exclude.txt\n") 23 | 24 | # Create a file that should be included 25 | include_file = tmp_path / "include.txt" 26 | include_file.write_text("This file should be included.") 27 | 28 | # Create a file that should be excluded 29 | exclude_file = tmp_path / "exclude.txt" 30 | exclude_file.write_text("This file should be excluded.") 31 | 32 | return tmp_path 33 | 34 | 35 | def test_load_gitignore_patterns(tmp_path: Path) -> None: 36 | """Test that ``load_ignore_patterns()`` correctly loads patterns from a ``.gitignore`` file.""" 37 | gitignore = tmp_path / ".gitignore" 38 | # Write some sample patterns with a comment line included 39 | gitignore.write_text("exclude.txt\n*.log\n# a comment\n") 40 | 41 | patterns = load_ignore_patterns(tmp_path, filename=".gitignore") 42 | 43 | # Check that the expected patterns are loaded 44 | assert "exclude.txt" in patterns 45 | assert "*.log" in patterns 46 | # Ensure that comment lines are not added 47 | for pattern in patterns: 48 | assert not pattern.startswith("#") 49 | 50 | 51 | @pytest.mark.asyncio 52 | async def test_ingest_with_gitignore(repo_path: Path) -> None: 53 | """Integration test for ``ingest_async()`` respecting ``.gitignore`` rules. 54 | 55 | When ``include_gitignored`` is ``False`` (default), the content of ``exclude.txt`` should be omitted. 56 | When ``include_gitignored`` is ``True``, both files should be present. 57 | """ 58 | # Run ingestion with the gitignore functionality enabled. 59 | _, _, content_with_ignore = await ingest_async(source=str(repo_path)) 60 | # 'exclude.txt' should be skipped. 61 | assert "This file should be excluded." not in content_with_ignore 62 | # 'include.txt' should be processed. 63 | assert "This file should be included." in content_with_ignore 64 | 65 | # Run ingestion with the gitignore functionality disabled. 66 | _, _, content_without_ignore = await ingest_async(source=str(repo_path), include_gitignored=True) 67 | # Now both files should be present. 68 | assert "This file should be excluded." in content_without_ignore 69 | assert "This file should be included." in content_without_ignore 70 | --------------------------------------------------------------------------------