The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── .codecov.yaml
├── .dockerignore
├── .env.example
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   └── feature_request.yml
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   ├── docker_image.yml
    │   ├── pr-title-check.yml
    │   ├── publish_to_pypi.yml
    │   └── scorecard.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .vscode
    └── launch.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── SECURITY.md
├── docs
    └── frontpage.png
├── eslint.config.cjs
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── src
    ├── gitingest
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── clone.py
    │   ├── config.py
    │   ├── entrypoint.py
    │   ├── ingestion.py
    │   ├── output_formatter.py
    │   ├── query_parser.py
    │   ├── schemas
    │   │   ├── __init__.py
    │   │   ├── filesystem.py
    │   │   └── ingestion.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── auth.py
    │   │   ├── compat_func.py
    │   │   ├── compat_typing.py
    │   │   ├── exceptions.py
    │   │   ├── file_utils.py
    │   │   ├── git_utils.py
    │   │   ├── ignore_patterns.py
    │   │   ├── ingestion_utils.py
    │   │   ├── notebook.py
    │   │   ├── os_utils.py
    │   │   ├── path_utils.py
    │   │   ├── query_parser_utils.py
    │   │   └── timeout_wrapper.py
    ├── server
    │   ├── __init__.py
    │   ├── form_types.py
    │   ├── main.py
    │   ├── metrics_server.py
    │   ├── models.py
    │   ├── query_processor.py
    │   ├── routers
    │   │   ├── __init__.py
    │   │   ├── dynamic.py
    │   │   ├── index.py
    │   │   └── ingest.py
    │   ├── routers_utils.py
    │   ├── server_config.py
    │   ├── server_utils.py
    │   └── templates
    │   │   ├── base.jinja
    │   │   ├── components
    │   │       ├── _macros.jinja
    │   │       ├── footer.jinja
    │   │       ├── git_form.jinja
    │   │       ├── navbar.jinja
    │   │       ├── result.jinja
    │   │       └── tailwind_components.html
    │   │   ├── git.jinja
    │   │   ├── index.jinja
    │   │   └── swagger_ui.jinja
    └── static
    │   ├── favicons
    │       ├── apple-touch-icon.png
    │       ├── favicon-64.png
    │       ├── favicon.ico
    │       └── favicon.svg
    │   ├── icons
    │       ├── chrome.svg
    │       ├── discord.svg
    │       ├── github.svg
    │       ├── python-color.svg
    │       └── python.svg
    │   ├── js
    │       ├── git.js
    │       ├── git_form.js
    │       ├── index.js
    │       ├── navbar.js
    │       ├── posthog.js
    │       └── utils.js
    │   ├── llms.txt
    │   ├── og-image.png
    │   ├── robots.txt
    │   └── svg
    │       ├── github-star.svg
    │       ├── sparkle-green.svg
    │       └── sparkle-red.svg
└── tests
    ├── .pylintrc
    ├── __init__.py
    ├── conftest.py
    ├── query_parser
        ├── __init__.py
        ├── test_git_host_agnostic.py
        └── test_query_parser.py
    ├── test_cli.py
    ├── test_clone.py
    ├── test_flow_integration.py
    ├── test_git_utils.py
    ├── test_gitignore_feature.py
    ├── test_ingestion.py
    └── test_notebook_utils.py


/.codecov.yaml:
--------------------------------------------------------------------------------
1 | comment: false
2 | github_checks:
3 |   annotations: false
4 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------
 2 | # Base: reuse patterns from .gitignore
 3 | # -------------------------------------------------
 4 | 
 5 | # Operating-system
 6 | .DS_Store
 7 | Thumbs.db
 8 | 
 9 | # Editor / IDE settings
10 | .vscode/
11 | !.vscode/launch.json
12 | .idea/
13 | *.swp
14 | 
15 | # Python virtual-envs & tooling
16 | .venv*/
17 | .python-version
18 | __pycache__/
19 | *.egg-info/
20 | *.egg
21 | .ruff_cache/
22 | 
23 | # Test artifacts & coverage
24 | .pytest_cache/
25 | .coverage
26 | coverage.xml
27 | htmlcov/
28 | 
29 | # Build, distribution & docs
30 | build/
31 | dist/
32 | *.wheel
33 | 
34 | # Logs & runtime output
35 | *.log
36 | logs/
37 | *.tmp
38 | tmp/
39 | 
40 | # Project-specific files
41 | history.txt
42 | digest.txt
43 | 
44 | 
45 | # -------------------------------------------------
46 | # Extra for Docker
47 | # -------------------------------------------------
48 | 
49 | # Git history
50 | .git/
51 | .gitignore
52 | 
53 | # Tests
54 | tests/
55 | 
56 | # Docs
57 | docs/
58 | *.md
59 | LICENSE
60 | 
61 | # Local overrides & secrets
62 | .env
63 | 
64 | # Docker files
65 | .dockerignore
66 | Dockerfile*
67 | 
68 | # -------------------------------------------------
69 | # Files required during build
70 | # -------------------------------------------------
71 | !pyproject.toml
72 | !src/
73 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # Gitingest Environment Variables
 2 | 
 3 | # Host Configuration
 4 | # Comma-separated list of allowed hostnames
 5 | # Default: "gitingest.com, *.gitingest.com, localhost, 127.0.0.1"
 6 | ALLOWED_HOSTS=gitingest.com,*.gitingest.com,localhost,127.0.0.1
 7 | 
 8 | # GitHub Authentication
 9 | # Personal Access Token for accessing private repositories
10 | # Generate your token here: https://github.com/settings/tokens/new?description=gitingest&scopes=repo
11 | # GITHUB_TOKEN=your_github_token_here
12 | 
13 | # Metrics Configuration
14 | # Set to any value to enable the Prometheus metrics server
15 | # GITINGEST_METRICS_ENABLED=true
16 | # Host for the metrics server (default: "127.0.0.1")
17 | GITINGEST_METRICS_HOST=127.0.0.1
18 | # Port for the metrics server (default: "9090")
19 | GITINGEST_METRICS_PORT=9090
20 | 
21 | # Sentry Configuration
22 | # Set to any value to enable Sentry error tracking
23 | # GITINGEST_SENTRY_ENABLED=true
24 | # Sentry DSN (required if Sentry is enabled)
25 | # GITINGEST_SENTRY_DSN=your_sentry_dsn_here
26 | # Sampling rate for performance data (default: "1.0", range: 0.0-1.0)
27 | GITINGEST_SENTRY_TRACES_SAMPLE_RATE=1.0
28 | # Sampling rate for profile sessions (default: "1.0", range: 0.0-1.0)
29 | GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE=1.0
30 | # Profile lifecycle mode (default: "trace")
31 | GITINGEST_SENTRY_PROFILE_LIFECYCLE=trace
32 | # Send default personally identifiable information (default: "true")
33 | GITINGEST_SENTRY_SEND_DEFAULT_PII=true
34 | # Environment name for Sentry (default: "")
35 | GITINGEST_SENTRY_ENVIRONMENT=development
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
  1 | name: Bug report 🐞
  2 | description: Report a bug or internal server error when using Gitingest
  3 | title: "(bug): "
  4 | labels: ["bug"]
  5 | body:
  6 |   - type: markdown
  7 |     attributes:
  8 |       value: |
  9 |         Thanks for taking the time to report a bug! :lady_beetle:
 10 | 
 11 |         Please fill out the following details to help us reproduce and fix the issue. :point_down:
 12 | 
 13 |   - type: dropdown
 14 |     id: interface
 15 |     attributes:
 16 |       label: Which interface did you use?
 17 |       default: 0
 18 |       options:
 19 |         - "Select one..."
 20 |         - Web UI
 21 |         - CLI
 22 |         - PyPI package
 23 |     validations:
 24 |       required: true
 25 | 
 26 |   - type: input
 27 |     id: repo_url
 28 |     attributes:
 29 |       label: Repository URL (if public)
 30 |       placeholder: e.g., https://github.com/<username>/<repo>/commit_branch_or_tag/blob_or_tree/subdir
 31 | 
 32 |   - type: dropdown
 33 |     id: git_host
 34 |     attributes:
 35 |       label: Git host
 36 |       description: The Git host of the repository.
 37 |       default: 0
 38 |       options:
 39 |         - "Select one..."
 40 |         - GitHub (github.com)
 41 |         - GitLab (gitlab.com)
 42 |         - Bitbucket (bitbucket.org)
 43 |         - Gitea (gitea.com)
 44 |         - Codeberg (codeberg.org)
 45 |         - Gist (gist.github.com)
 46 |         - Kaggle (kaggle.com)
 47 |         - GitHub Enterprise (github.company.com)
 48 |         - Other (specify below)
 49 |     validations:
 50 |       required: true
 51 | 
 52 |   - type: input
 53 |     id: git_host_other
 54 |     attributes:
 55 |       label: Other Git host
 56 |       placeholder: If you selected "Other", please specify the Git host here.
 57 | 
 58 |   - type: dropdown
 59 |     id: repo_visibility
 60 |     attributes:
 61 |       label: Repository visibility
 62 |       default: 0
 63 |       options:
 64 |         - "Select one..."
 65 |         - public
 66 |         - private
 67 |     validations:
 68 |       required: true
 69 | 
 70 |   - type: dropdown
 71 |     id: revision
 72 |     attributes:
 73 |       label: Commit, branch, or tag
 74 |       default: 0
 75 |       options:
 76 |         - "Select one..."
 77 |         - default branch
 78 |         - commit
 79 |         - branch
 80 |         - tag
 81 |     validations:
 82 |       required: true
 83 | 
 84 |   - type: dropdown
 85 |     id: ingest_scope
 86 |     attributes:
 87 |       label: Did you ingest the full repository or a subdirectory?
 88 |       default: 0
 89 |       options:
 90 |         - "Select one..."
 91 |         - full repository
 92 |         - subdirectory
 93 |     validations:
 94 |       required: true
 95 | 
 96 |   - type: dropdown
 97 |     id: os
 98 |     attributes:
 99 |       label: Operating system
100 |       default: 0
101 |       options:
102 |         - "Select one..."
103 |         - Not relevant (Web UI)
104 |         - macOS
105 |         - Windows
106 |         - Linux
107 |     validations:
108 |       required: true
109 | 
110 |   - type: dropdown
111 |     id: browser
112 |     attributes:
113 |       label: Browser (Web UI only)
114 |       default: 0
115 |       options:
116 |         - "Select one..."
117 |         - Not relevant (CLI / PyPI)
118 |         - Chrome
119 |         - Firefox
120 |         - Safari
121 |         - Edge
122 |         - Other (specify below)
123 |     validations:
124 |       required: true
125 | 
126 |   - type: input
127 |     id: browser_other
128 |     attributes:
129 |       label: Other browser
130 |       placeholder: If you selected "Other", please specify the browser here.
131 | 
132 |   - type: input
133 |     id: gitingest_version
134 |     attributes:
135 |       label: Gitingest version
136 |       placeholder: e.g., v0.1.5
137 |       description: Not required if you used the Web UI.
138 | 
139 |   - type: input
140 |     id: python_version
141 |     attributes:
142 |       label: Python version
143 |       placeholder: e.g., 3.11.5
144 |       description: Not required if you used the Web UI.
145 | 
146 |   - type: textarea
147 |     id: bug_description
148 |     attributes:
149 |       label: Bug description
150 |       placeholder: Describe the bug here.
151 |       description: A detailed but concise description of the bug.
152 |     validations:
153 |       required: true
154 | 
155 | 
156 |   - type: textarea
157 |     id: steps_to_reproduce
158 |     attributes:
159 |       label: Steps to reproduce
160 |       placeholder: Include the exact commands or actions that led to the error.
161 |       description: Include the exact commands or actions that led to the error *(if relevant)*.
162 |       render: shell
163 | 
164 |   - type: textarea
165 |     id: expected_behavior
166 |     attributes:
167 |       label: Expected behavior
168 |       placeholder: Describe what you expected to happen.
169 |       description: Describe what you expected to happen *(if relevant)*.
170 | 
171 |   - type: textarea
172 |     id: actual_behavior
173 |     attributes:
174 |       label: Actual behavior
175 |       description: Paste the full error message or stack trace here.
176 | 
177 |   - type: textarea
178 |     id: additional_context
179 |     attributes:
180 |       label: Additional context, logs, or screenshots
181 |       placeholder: Add any other context, links, or screenshots about the issue here.
182 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
  1 | name: Feature request 💡
  2 | description: Suggest a new feature or improvement for Gitingest
  3 | title: "(feat): "
  4 | labels: ["enhancement"]
  5 | body:
  6 |   - type: markdown
  7 |     attributes:
  8 |       value: |
  9 |         Thanks for taking the time to help us improve **Gitingest**! :sparkles:
 10 | 
 11 |         Please fill in the sections below to describe your idea. The more detail you provide, the easier it is for us to evaluate and plan the work. :point_down:
 12 | 
 13 |   - type: input
 14 |     id: summary
 15 |     attributes:
 16 |       label: Feature summary
 17 |       placeholder: One-sentence description of the feature.
 18 |     validations:
 19 |       required: true
 20 | 
 21 |   - type: textarea
 22 |     id: problem
 23 |     attributes:
 24 |       label: Problem / motivation
 25 |       description: What problem does this feature solve? How does it affect your workflow?
 26 |       placeholder: Why is this feature important? Describe the pain point or limitation you're facing.
 27 |     validations:
 28 |       required: true
 29 | 
 30 |   - type: textarea
 31 |     id: proposal
 32 |     attributes:
 33 |       label: Proposed solution
 34 |       placeholder: Describe what you would like to see happen.
 35 |       description: Outline the feature as you imagine it. *(optional)*
 36 | 
 37 | 
 38 |   - type: textarea
 39 |     id: alternatives
 40 |     attributes:
 41 |       label: Alternatives considered
 42 |       placeholder: List other approaches you've considered or work-arounds you use today.
 43 |       description: Feel free to mention why those alternatives don't fully solve the problem.
 44 | 
 45 |   - type: dropdown
 46 |     id: interface
 47 |     attributes:
 48 |       label: Which interface would this affect?
 49 |       default: 0
 50 |       options:
 51 |         - "Select one..."
 52 |         - Web UI
 53 |         - CLI
 54 |         - PyPI package
 55 |         - CLI + PyPI package
 56 |         - All
 57 |     validations:
 58 |       required: true
 59 | 
 60 |   - type: dropdown
 61 |     id: priority
 62 |     attributes:
 63 |       label: How important is this to you?
 64 |       default: 0
 65 |       options:
 66 |         - "Select one..."
 67 |         - Nice to have
 68 |         - Important
 69 |         - Critical
 70 |     validations:
 71 |       required: true
 72 | 
 73 |   - type: dropdown
 74 |     id: willingness
 75 |     attributes:
 76 |       label: Would you like to work on this feature yourself?
 77 |       default: 0
 78 |       options:
 79 |         - "Select one..."
 80 |         - Yes, I'd like to implement it
 81 |         - Maybe, if I get some guidance
 82 |         - No, just requesting (absolutely fine!)
 83 |     validations:
 84 |       required: true
 85 | 
 86 |   - type: dropdown
 87 |     id: support_needed
 88 |     attributes:
 89 |       label: Would you need support from the maintainers (if you're implementing it yourself)?
 90 |       default: 0
 91 |       options:
 92 |         - "Select one..."
 93 |         - No, I can handle it solo
 94 |         - Yes, I'd need some guidance
 95 |         - Not sure yet
 96 |         - This is just a suggestion, I'm not planning to implement it myself (absolutely fine!)
 97 | 
 98 |   - type: textarea
 99 |     id: additional_context
100 |     attributes:
101 |       label: Additional context, screenshots, or examples
102 |       placeholder: Add links, sketches, or any other context that would help us understand and implement the feature.
103 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   # ─── Python (pip) ─────────────────────────────
 4 |   - package-ecosystem: "pip"
 5 |     directory: "/"
 6 |     schedule: { interval: "weekly" }
 7 |     labels: [ "dependencies", "pip" ]
 8 |     groups: # Group patches & minors from dev-only tools
 9 |       dev-py:
10 |         dependency-type: "development"
11 |         update-types: ["minor", "patch"]
12 | 
13 |   # ─── GitHub Actions ───────────────────────────
14 |   - package-ecosystem: "github-actions"
15 |     directory: "/"
16 |     schedule: { interval: "weekly" }
17 |     labels: [ "dependencies", "gh-actions" ]
18 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   test:
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       fail-fast: true
17 |       matrix:
18 |         os: [ubuntu-latest, macos-latest, windows-latest]
19 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
20 | 
21 |         include:
22 |           - os: ubuntu-latest
23 |             python-version: "3.13"
24 |             coverage: true
25 | 
26 |     steps:
27 |     - uses: actions/checkout@v4
28 | 
29 |     - name: Set up Python
30 |       uses: actions/setup-python@v5
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 | 
34 |     - name: Locate pip cache
35 |       id: pip-cache
36 |       shell: bash
37 |       run: echo "dir=$(python -m pip cache dir)" >> "$GITHUB_OUTPUT"
38 | 
39 |     - name: Cache pip
40 |       uses: actions/cache@v4
41 |       with:
42 |         path: ${{ steps.pip-cache.outputs.dir }}
43 |         key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
44 |         restore-keys: ${{ runner.os }}-pip-
45 | 
46 |     - name: Install dependencies
47 |       run: |
48 |         python -m pip install --upgrade pip
49 |         python -m pip install ".[dev]"
50 | 
51 |     - name: Run tests
52 |       if: ${{ matrix.coverage != true }}
53 |       run: pytest
54 | 
55 |     - name: Run tests and collect coverage
56 |       if: ${{ matrix.coverage == true }}
57 |       run: |
58 |         pytest \
59 |         --cov=gitingest \
60 |         --cov=server \
61 |         --cov-branch \
62 |         --cov-report=xml \
63 |         --cov-report=term
64 | 
65 |     - name: Upload coverage to Codecov
66 |       if: ${{ matrix.coverage == true }}
67 |       uses: codecov/codecov-action@v5
68 |       with:
69 |         token: ${{ secrets.CODECOV_TOKEN }}
70 |         files: coverage.xml
71 |         flags: ${{ matrix.os }}-py${{ matrix.python-version }}
72 |         name: codecov-${{ matrix.os }}-${{ matrix.python-version }}
73 |         fail_ci_if_error: true
74 |         verbose: true
75 | 
76 |     - name: Run pre-commit hooks
77 |       uses: pre-commit/action@v3.0.1
78 |       if: ${{ matrix.python-version == '3.13' && matrix.os == 'ubuntu-latest' }}
79 | 


--------------------------------------------------------------------------------
/.github/workflows/docker_image.yml:
--------------------------------------------------------------------------------
 1 | name: Build & Push Container
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - 'main'
 6 |     tags:
 7 |       - '*'
 8 |   merge_group:
 9 |   pull_request:
10 |     types: [labeled, synchronize, reopened, ready_for_review, opened]
11 | 
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
14 |   cancel-in-progress: true
15 | 
16 | env:
17 |   REGISTRY: ghcr.io
18 |   IMAGE_NAME: ${{ github.repository }}
19 |   # Set to 'true' to allow pushing container from pull requests with the label 'push-container'
20 |   PUSH_FROM_PR: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'push-container') }}
21 | 
22 | jobs:
23 |   docker-build:
24 |     runs-on: ubuntu-latest
25 |     permissions:
26 |       contents: read
27 |       packages: write
28 |       attestations: write
29 |       id-token: write
30 |     steps:
31 |       - uses: actions/checkout@v4
32 | 
33 |       - name: Set current timestamp
34 |         id: vars
35 |         run: |
36 |           echo "timestamp=$(date +%s)" >> $GITHUB_OUTPUT
37 |           echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
38 | 
39 |       - name: Log in to the Container registry
40 |         uses: docker/login-action@v3
41 |         with:
42 |           registry: ${{ env.REGISTRY }}
43 |           username: ${{ github.actor }}
44 |           password: ${{ secrets.GITHUB_TOKEN }}
45 | 
46 |       - name: Docker Meta
47 |         id: meta
48 |         uses: docker/metadata-action@v5
49 |         with:
50 |           images: |
51 |             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
52 |           flavor: |
53 |             latest=false
54 |           tags: |
55 |             type=ref,event=branch,branch=main
56 |             type=ref,event=branch,branch=main,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }}
57 |             type=pep440,pattern={{raw}}
58 |             type=ref,event=pr,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }}
59 | 
60 |       - name: Set up QEMU
61 |         uses: docker/setup-qemu-action@v3
62 | 
63 |       - name: Set up Docker Buildx
64 |         uses: docker/setup-buildx-action@v3
65 | 
66 |       - name: Build and push
67 |         uses: docker/build-push-action@v6
68 |         id: push
69 |         with:
70 |           context: .
71 |           platforms: linux/amd64, linux/arm64
72 |           push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }}
73 |           tags: ${{ steps.meta.outputs.tags }}
74 |           labels: ${{ steps.meta.outputs.labels }}
75 |           cache-from: type=gha
76 |           cache-to: type=gha,mode=max
77 | 
78 |       - name: Generate artifact attestation
79 |         if: github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true'
80 |         uses: actions/attest-build-provenance@v2
81 |         with:
82 |           subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
83 |           subject-digest: ${{ steps.push.outputs.digest }}
84 |           push-to-registry: true
85 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-title-check.yml:
--------------------------------------------------------------------------------
 1 | name: PR Conventional Commit Validation
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, synchronize, reopened, edited]
 6 | 
 7 | jobs:
 8 |   validate-pr-title:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: PR Conventional Commit Validation
12 |         uses:  ytanikin/pr-conventional-commits@1.4.1
13 |         with:
14 |           task_types: '["feat","fix","docs","test","ci","refactor","perf","chore","revert"]'
15 |           add_label: 'false'
16 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_to_pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created] # Run when you click “Publish release”
 6 |   workflow_dispatch: # ... or run it manually from the Actions tab
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   release-build:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - name: Set up Python 3.13
19 |         uses: actions/setup-python@v5
20 |         with:
21 |           python-version: "3.13"
22 |           cache: pip
23 |           cache-dependency-path: pyproject.toml
24 | 
25 |       - name: Build package
26 |         run: |
27 |           python -m pip install --upgrade pip
28 |           python -m pip install build twine
29 |           twine check dist/*
30 |       - name: Upload dist artefact
31 |         uses: actions/upload-artifact@v4
32 |         with:
33 |           name: dist
34 |           path: dist/
35 | 
36 | # Publish to PyPI (only if “dist/” succeeded)
37 |   pypi-publish:
38 |     needs: release-build
39 |     runs-on: ubuntu-latest
40 |     environment: pypi
41 | 
42 |     permissions:
43 |       id-token: write # OIDC token for trusted publishing
44 | 
45 |     steps:
46 |       - uses: actions/download-artifact@v4
47 |         with:
48 |           name: dist
49 |           path: dist/
50 | 
51 |       - uses: pypa/gh-action-pypi-publish@release/v1
52 |         with:
53 |           verbose: true
54 | 


--------------------------------------------------------------------------------
/.github/workflows/scorecard.yml:
--------------------------------------------------------------------------------
 1 | name: OSSF Scorecard
 2 | on:
 3 |   branch_protection_rule:
 4 |   schedule:
 5 |     - cron: '33 11 * * 2'  # Every Tuesday at 11:33 AM UTC
 6 |   push:
 7 |     branches: [ main ]
 8 | 
 9 | permissions: read-all
10 | 
11 | concurrency: # avoid overlapping runs
12 |   group: scorecard-${{ github.ref }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   analysis:
17 |     name: Scorecard analysis
18 |     runs-on: ubuntu-latest
19 |     permissions:
20 |       security-events: write # upload SARIF to code-scanning
21 |       id-token: write # publish results for the badge
22 | 
23 |     steps:
24 |       - name: Checkout
25 |         uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
26 |         with:
27 |           persist-credentials: false
28 | 
29 |       - name: Run Scorecard
30 |         uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde
31 |         with:
32 |           results_file: results.sarif
33 |           results_format: sarif
34 |           publish_results: true  # enables the public badge
35 | 
36 |       - name: Upload to code-scanning
37 |         uses: github/codeql-action/upload-sarif@v3
38 |         with:
39 |           sarif_file: results.sarif
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Operating-system
 2 | .DS_Store
 3 | Thumbs.db
 4 | 
 5 | # Editor / IDE settings
 6 | .vscode/
 7 | !.vscode/launch.json
 8 | .idea/
 9 | *.swp
10 | 
11 | # Python virtual-envs & tooling
12 | .venv*/
13 | venv/
14 | .python-version
15 | __pycache__/
16 | *.egg-info/
17 | *.egg
18 | .ruff_cache/
19 | 
20 | # Test artifacts & coverage
21 | .pytest_cache/
22 | .coverage
23 | coverage.xml
24 | htmlcov/
25 | 
26 | # Build, distribution & docs
27 | build/
28 | dist/
29 | *.wheel
30 | 
31 | 
32 | 
33 | # Logs & runtime output
34 | *.log
35 | logs/
36 | *.tmp
37 | tmp/
38 | 
39 | # Project-specific files
40 | history.txt
41 | digest.txt
42 | 
43 | # Environment variables
44 | .env
45 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
  1 | repos:
  2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
  3 |     rev: v5.0.0
  4 |     hooks:
  5 |       - id: check-added-large-files
  6 |         description: 'Prevent large files from being committed.'
  7 |         args: ['--maxkb=10000']
  8 | 
  9 |       - id: check-case-conflict
 10 |         description: 'Check for files that would conflict in case-insensitive filesystems.'
 11 | 
 12 |       - id: fix-byte-order-marker
 13 |         description: 'Remove utf-8 byte order marker.'
 14 | 
 15 |       - id: mixed-line-ending
 16 |         description: 'Replace mixed line ending.'
 17 | 
 18 |       - id: destroyed-symlinks
 19 |         description: 'Detect symlinks which are changed to regular files with a content of a path which that symlink was pointing to.'
 20 | 
 21 |       - id: check-ast
 22 |         description: 'Check for parseable syntax.'
 23 | 
 24 |       - id: end-of-file-fixer
 25 |         description: 'Ensure that a file is either empty, or ends with one newline.'
 26 | 
 27 |       - id: trailing-whitespace
 28 |         description: 'Trim trailing whitespace.'
 29 | 
 30 |       - id: check-docstring-first
 31 |         description: 'Check a common error of defining a docstring after code.'
 32 | 
 33 |       - id: requirements-txt-fixer
 34 |         description: 'Sort entries in requirements.txt.'
 35 | 
 36 |   - repo: https://github.com/MarcoGorelli/absolufy-imports
 37 |     rev: v0.3.1
 38 |     hooks:
 39 |       - id: absolufy-imports
 40 |         description: 'Automatically convert relative imports to absolute. (Use `args: [--never]` to revert.)'
 41 | 
 42 |   - repo: https://github.com/asottile/pyupgrade
 43 |     rev: v3.20.0
 44 |     hooks:
 45 |       - id: pyupgrade
 46 |         description: 'Automatically upgrade syntax for newer versions.'
 47 |         args: [--py3-plus, --py36-plus]
 48 | 
 49 |   - repo: https://github.com/pre-commit/pygrep-hooks
 50 |     rev: v1.10.0
 51 |     hooks:
 52 |       - id: python-check-blanket-noqa
 53 |         description: 'Enforce that `# noqa` annotations always occur with specific codes.'
 54 | 
 55 |       - id: python-check-blanket-type-ignore
 56 |         description: 'Enforce that `# type: ignore` annotations always occur with specific codes.'
 57 | 
 58 |       - id: python-use-type-annotations
 59 |         description: 'Enforce that python3.6+ type annotations are used instead of type comments.'
 60 | 
 61 |   - repo: https://github.com/PyCQA/isort
 62 |     rev: 6.0.1
 63 |     hooks:
 64 |       - id: isort
 65 |         description: 'Sort imports alphabetically, and automatically separated into sections and by type.'
 66 | 
 67 |   - repo: https://github.com/pre-commit/mirrors-eslint
 68 |     rev: v9.30.1
 69 |     hooks:
 70 |       - id: eslint
 71 |         description: 'Lint javascript files.'
 72 |         files: \.js$
 73 |         args: [--max-warnings=0, --fix]
 74 |         additional_dependencies:
 75 |           [
 76 |             'eslint@9.30.1',
 77 |             '@eslint/js@9.30.1',
 78 |             'eslint-plugin-import@2.32.0',
 79 |             'globals@16.3.0',
 80 |           ]
 81 | 
 82 |   - repo: https://github.com/djlint/djLint
 83 |     rev: v1.36.4
 84 |     hooks:
 85 |       - id: djlint-reformat-jinja
 86 | 
 87 |   - repo: https://github.com/igorshubovych/markdownlint-cli
 88 |     rev: v0.45.0
 89 |     hooks:
 90 |       - id: markdownlint
 91 |         description: 'Lint markdown files.'
 92 |         args: ['--disable=line-length']
 93 | 
 94 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 95 |     rev: v0.12.2
 96 |     hooks:
 97 |       - id: ruff-check
 98 |       - id: ruff-format
 99 | 
100 |   - repo: https://github.com/jsh9/pydoclint
101 |     rev: 0.6.7
102 |     hooks:
103 |       - id: pydoclint
104 |         name: pydoclint for source
105 |         args: [--style=numpy]
106 |         files: ^src/
107 | 
108 |   - repo: https://github.com/pycqa/pylint
109 |     rev: v3.3.7
110 |     hooks:
111 |       - id: pylint
112 |         name: pylint for source
113 |         files: ^src/
114 |         additional_dependencies:
115 |           [
116 |             click>=8.0.0,
117 |             'fastapi[standard]>=0.109.1',
118 |             httpx,
119 |             pathspec>=0.12.1,
120 |             prometheus-client,
121 |             pydantic,
122 |             pytest-asyncio,
123 |             pytest-mock,
124 |             python-dotenv,
125 |             slowapi,
126 |             starlette>=0.40.0,
127 |             tiktoken>=0.7.0,
128 |             uvicorn>=0.11.7,
129 |           ]
130 | 
131 |       - id: pylint
132 |         name: pylint for tests
133 |         files: ^tests/
134 |         args:
135 |           - --rcfile=tests/.pylintrc
136 |         additional_dependencies:
137 |           [
138 |             click>=8.0.0,
139 |             'fastapi[standard]>=0.109.1',
140 |             httpx,
141 |             pathspec>=0.12.1,
142 |             prometheus-client,
143 |             pydantic,
144 |             pytest-asyncio,
145 |             pytest-mock,
146 |             python-dotenv,
147 |             slowapi,
148 |             starlette>=0.40.0,
149 |             tiktoken>=0.7.0,
150 |             uvicorn>=0.11.7,
151 |           ]
152 | 
153 |   - repo: meta
154 |     hooks:
155 |       - id: check-hooks-apply
156 |       - id: check-useless-excludes
157 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "name": "Python Debugger: Module",
 5 |             "type": "debugpy",
 6 |             "request": "launch",
 7 |             "module": "uvicorn",
 8 |             "args": ["server.main:app", "--host", "0.0.0.0", "--port", "8000"],
 9 |             "cwd": "${workspaceFolder}/src"
10 |         }
11 |     ]
12 | }
13 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | <romain@coderamp.io>.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org),
118 | version 2.0, available at
119 | <https://www.contributor-covenant.org/version/2/0/code_of_conduct.html>.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | For answers to common questions about this code of conduct, see the FAQ at
125 | <https://www.contributor-covenant.org/faq>. Translations are available at
126 | <https://www.contributor-covenant.org/translations>.
127 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Gitingest
 2 | 
 3 | Thanks for your interest in contributing to **Gitingest** 🚀 Our goal is to keep the codebase friendly to first-time contributors.
 4 | If you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK9EC).
 5 | 
 6 | ---
 7 | 
 8 | ## How to Contribute (non-technical)
 9 | 
10 | - **Create an Issue** – found a bug or have a feature idea?
11 |   [Open an issue](https://github.com/coderamp-labs/gitingest/issues/new).
12 | - **Spread the Word** – tweet, blog, or tell a friend.
13 | - **Use Gitingest** – real-world usage gives the best feedback. File issues or ping us on [Discord](https://discord.com/invite/zerRaGK9EC) with anything you notice.
14 | 
15 | ---
16 | 
17 | ## How to submit a Pull Request
18 | 
19 | > **Prerequisites**: The project uses **Python 3.9+** and `pre-commit` for development.
20 | 
21 | 1. **Fork** the repository.
22 | 
23 | 2. **Clone** your fork:
24 | 
25 |    ```bash
26 |    git clone https://github.com/coderamp-labs/gitingest.git
27 |    cd gitingest
28 |    ```
29 | 
30 | 3. **Set up the dev environment**:
31 | 
32 |    ```bash
33 |    python -m venv .venv
34 |    source .venv/bin/activate
35 |    pip install -e ".[dev]"
36 |    pre-commit install
37 |    ```
38 | 
39 | 4. **Create a branch** for your changes:
40 | 
41 |    ```bash
42 |    git checkout -b your-branch
43 |    ```
44 | 
45 | 5. **Make your changes** (and add tests when relevant).
46 | 
47 | 6. **Stage** the changes:
48 | 
49 |    ```bash
50 |    git add .
51 |    ```
52 | 
53 | 7. **Run the backend test suite**:
54 | 
55 |    ```bash
56 |    pytest
57 |    ```
58 | 
59 | 8. *(Optional)* **Run `pre-commit` on all files** to check hooks without committing:
60 | 
61 |    ```bash
62 |    pre-commit run --all-files
63 |    ```
64 | 
65 | 9. **Run the local server** to sanity-check:
66 | 
67 |     ```bash
68 |     cd src
69 |     uvicorn server.main:app
70 |     ```
71 | 
72 |     Open [http://localhost:8000](http://localhost:8000) to confirm everything works.
73 | 
74 | 10. **Commit** (signed):
75 | 
76 |     ```bash
77 |     git commit -S -m "Your commit message"
78 |     ```
79 | 
80 |     If *pre-commit* complains, fix the problems and repeat **5 – 9**.
81 | 
82 | 11. **Push** your branch:
83 | 
84 |     ```bash
85 |     git push origin your-branch
86 |     ```
87 | 
88 | 12. **Open a pull request** on GitHub with a clear description.
89 | 
90 | 13. **Iterate** on any review feedback—update your branch and repeat **6 – 11** as needed.
91 | 
92 | *(Optional) Invite a maintainer to your branch for easier collaboration.*
93 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Stage 1: Install Python dependencies
 2 | FROM python:3.13-slim AS python-builder
 3 | 
 4 | WORKDIR /build
 5 | 
 6 | RUN set -eux; \
 7 |     apt-get update; \
 8 |     apt-get install -y --no-install-recommends gcc python3-dev; \
 9 |     rm -rf /var/lib/apt/lists/*
10 | 
11 | COPY pyproject.toml .
12 | COPY src/ ./src/
13 | 
14 | RUN set -eux; \
15 |     pip install --no-cache-dir --upgrade pip; \
16 |     pip install --no-cache-dir --timeout 1000 .
17 | 
18 | # Stage 2: Runtime image
19 | FROM python:3.13-slim
20 | 
21 | ARG UID=1000
22 | ARG GID=1000
23 | 
24 | ENV PYTHONUNBUFFERED=1 \
25 |     PYTHONDONTWRITEBYTECODE=1
26 | 
27 | RUN set -eux; \
28 |     apt-get update; \
29 |     apt-get install -y --no-install-recommends git curl; \
30 |     apt-get clean; \
31 |     rm -rf /var/lib/apt/lists/*
32 | 
33 | WORKDIR /app
34 | RUN set -eux; \
35 |     groupadd -g "$GID" appuser; \
36 |     useradd -m -u "$UID" -g "$GID" appuser
37 | 
38 | COPY --from=python-builder --chown=$UID:$GID /usr/local/lib/python3.13/site-packages/ /usr/local/lib/python3.13/site-packages/
39 | COPY --chown=$UID:$GID src/ ./
40 | 
41 | RUN set -eux; \
42 |     chown -R appuser:appuser /app
43 | USER appuser
44 | 
45 | EXPOSE 8000
46 | EXPOSE 9090
47 | CMD ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"]
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Romain Courtois
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 | 
3 | ## Reporting a Vulnerability
4 | 
5 | If you have discovered a vulnerability inside the project, report it privately at <romain@coderamp.io>. This way the maintainer can work on a proper fix without disclosing the problem to the public before it has been solved.
6 | 


--------------------------------------------------------------------------------
/docs/frontpage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/docs/frontpage.png


--------------------------------------------------------------------------------
/eslint.config.cjs:
--------------------------------------------------------------------------------
 1 | const js = require('@eslint/js');
 2 | const globals = require('globals');
 3 | const importPlugin = require('eslint-plugin-import');
 4 | 
 5 | module.exports = [
 6 |   js.configs.recommended,
 7 | 
 8 |   {
 9 |     files: ['src/static/js/**/*.js'],
10 | 
11 |     languageOptions: {
12 |       parserOptions: { ecmaVersion: 2021, sourceType: 'module' },
13 |       globals: {
14 |         ...globals.browser,
15 |         changePattern: 'readonly',
16 |         copyFullDigest: 'readonly',
17 |         copyText: 'readonly',
18 |         downloadFullDigest: 'readonly',
19 |         handleSubmit: 'readonly',
20 |         posthog: 'readonly',
21 |         submitExample: 'readonly',
22 |         toggleAccessSettings: 'readonly',
23 |         toggleFile: 'readonly',
24 |       },
25 |     },
26 | 
27 |     plugins: { import: importPlugin },
28 | 
29 |     rules: {
30 |       // Import hygiene (eslint-plugin-import)
31 |       'import/no-extraneous-dependencies': 'error',
32 |       'import/no-unresolved': 'error',
33 |       'import/order': ['warn', { alphabetize: { order: 'asc' } }],
34 | 
35 |       // Safety & bug-catchers
36 |       'consistent-return': 'error',
37 |       'default-case': 'error',
38 |       'no-implicit-globals': 'error',
39 |       'no-shadow': 'error',
40 | 
41 |       // Maintainability / complexity
42 |       complexity: ['warn', 10],
43 |       'max-depth': ['warn', 4],
44 |       'max-lines': ['warn', 500],
45 |       'max-params': ['warn', 5],
46 | 
47 |       // Stylistic consistency (auto-fixable)
48 |       'arrow-parens': ['error', 'always'],
49 |       curly: ['error', 'all'],
50 |       indent: ['error', 4, { SwitchCase: 2 }],
51 |       'newline-per-chained-call': ['warn', { ignoreChainWithDepth: 2 }],
52 |       'no-multi-spaces': 'error',
53 |       'object-shorthand': ['error', 'always'],
54 |       'padding-line-between-statements': [
55 |         'warn',
56 |         { blankLine: 'always', prev: '*', next: 'return' },
57 |         { blankLine: 'always', prev: ['const', 'let', 'var'], next: '*' },
58 |         { blankLine: 'any', prev: ['const', 'let', 'var'], next: ['const', 'let', 'var'] },
59 |       ],
60 |       'quote-props': ['error', 'consistent-as-needed'],
61 |       quotes: ['error', 'single', { avoidEscape: true }],
62 |       semi: 'error',
63 | 
64 |       // Modern / performance tips
65 |       'arrow-body-style': ['warn', 'as-needed'],
66 |       'prefer-arrow-callback': 'error',
67 |       'prefer-exponentiation-operator': 'error',
68 |       'prefer-numeric-literals': 'error',
69 |       'prefer-object-has-own': 'warn',
70 |       'prefer-object-spread': 'error',
71 |       'prefer-template': 'error',
72 |     },
73 |   },
74 | ];
75 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "gitingest"
  3 | version = "0.1.5"
  4 | description="CLI tool to analyze and create text dumps of codebases for LLMs"
  5 | readme = {file = "README.md", content-type = "text/markdown" }
  6 | requires-python = ">= 3.8"
  7 | dependencies = [
  8 |     "click>=8.0.0",
  9 |     "fastapi[standard]>=0.109.1",  # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38)
 10 |     "httpx",
 11 |     "pathspec>=0.12.1",
 12 |     "pydantic",
 13 |     "python-dotenv",
 14 |     "slowapi",
 15 |     "starlette>=0.40.0",  # Minimum safe release (https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw)
 16 |     "tiktoken>=0.7.0",  # Support for o200k_base encoding
 17 |     "typing_extensions>= 4.0.0; python_version < '3.10'",
 18 |     "uvicorn>=0.11.7",  # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150)
 19 |     "prometheus-client",
 20 | ]
 21 | 
 22 | license = {file = "LICENSE"}
 23 | authors = [
 24 |     { name = "Romain Courtois", email = "romain@coderamp.io" },
 25 |     { name = "Filip Christiansen"},
 26 | ]
 27 | classifiers=[
 28 |     "Development Status :: 3 - Alpha",
 29 |     "Intended Audience :: Developers",
 30 |     "License :: OSI Approved :: MIT License",
 31 |     "Programming Language :: Python :: 3.8",
 32 |     "Programming Language :: Python :: 3.9",
 33 |     "Programming Language :: Python :: 3.10",
 34 |     "Programming Language :: Python :: 3.11",
 35 |     "Programming Language :: Python :: 3.12",
 36 |     "Programming Language :: Python :: 3.13",
 37 | ]
 38 | 
 39 | [project.optional-dependencies]
 40 | dev = [
 41 |     "eval-type-backport",
 42 |     "pre-commit",
 43 |     "pytest",
 44 |     "pytest-asyncio",
 45 |     "pytest-cov",
 46 |     "pytest-mock",
 47 | ]
 48 | 
 49 | [project.scripts]
 50 | gitingest = "gitingest.__main__:main"
 51 | 
 52 | [project.urls]
 53 | homepage = "https://gitingest.com"
 54 | github = "https://github.com/coderamp-labs/gitingest"
 55 | 
 56 | [build-system]
 57 | requires = ["setuptools>=61.0", "wheel"]
 58 | build-backend = "setuptools.build_meta"
 59 | 
 60 | [tool.setuptools]
 61 | packages = {find = {where = ["src"]}}
 62 | include-package-data = true
 63 | 
 64 | # Linting configuration
 65 | [tool.pylint.format]
 66 | max-line-length = 119
 67 | 
 68 | [tool.pylint.'MESSAGES CONTROL']
 69 | disable = [
 70 |     "too-many-arguments",
 71 |     "too-many-positional-arguments",
 72 |     "too-many-locals",
 73 |     "too-few-public-methods",
 74 |     "broad-exception-caught",
 75 |     "duplicate-code",
 76 |     "fixme",
 77 | ]
 78 | 
 79 | [tool.ruff]
 80 | line-length = 119
 81 | fix = true
 82 | 
 83 | [tool.ruff.lint]
 84 | select = ["ALL"]
 85 | ignore = [  # https://docs.astral.sh/ruff/rules/...
 86 |     "D107", # undocumented-public-init
 87 |     "FIX002", # line-contains-todo
 88 |     "TD002", # missing-todo-author
 89 |     "PLR0913", # too-many-arguments,
 90 | 
 91 |     # TODO: fix the following issues:
 92 |     "TD003", # missing-todo-link, TODO: add issue links
 93 |     "T201", # print, TODO: replace with logging
 94 |     "S108", # hardcoded-temp-file, TODO: replace with tempfile
 95 |     "BLE001", # blind-except, TODO: replace with specific exceptions
 96 |     "FAST003", # fast-api-unused-path-parameter, TODO: fix
 97 | ]
 98 | per-file-ignores = { "tests/**/*.py" = ["S101"] } # Skip the "assert used" warning
 99 | 
100 | [tool.ruff.lint.pylint]
101 | max-returns = 10
102 | 
103 | [tool.ruff.lint.isort]
104 | order-by-type = true
105 | case-sensitive = true
106 | 
107 | [tool.pycln]
108 | all = true
109 | 
110 | # TODO: Remove this once we figure out how to use ruff-isort
111 | [tool.isort]
112 | profile = "black"
113 | line_length = 119
114 | remove_redundant_aliases = true
115 | float_to_top = true  # https://github.com/astral-sh/ruff/issues/6514
116 | order_by_type = true
117 | filter_files = true
118 | 
119 | # Test configuration
120 | [tool.pytest.ini_options]
121 | pythonpath = ["src"]
122 | testpaths = ["tests/"]
123 | python_files = "test_*.py"
124 | asyncio_mode = "auto"
125 | asyncio_default_fixture_loop_scope = "function"
126 | python_classes = "Test*"
127 | python_functions = "test_*"
128 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | eval-type-backport
3 | pre-commit
4 | pytest
5 | pytest-asyncio
6 | pytest-cov
7 | pytest-mock
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | click>=8.0.0
 2 | fastapi[standard]>=0.109.1  # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38
 3 | httpx
 4 | pathspec>=0.12.1
 5 | prometheus-client
 6 | pydantic
 7 | python-dotenv
 8 | sentry-sdk[fastapi]
 9 | slowapi
10 | starlette>=0.40.0  # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw
11 | tiktoken>=0.7.0  # Support for o200k_base encoding
12 | uvicorn>=0.11.7  # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150
13 | 


--------------------------------------------------------------------------------
/src/gitingest/__init__.py:
--------------------------------------------------------------------------------
1 | """Gitingest: A package for ingesting data from Git repositories."""
2 | 
3 | from gitingest.clone import clone_repo
4 | from gitingest.entrypoint import ingest, ingest_async
5 | from gitingest.ingestion import ingest_query
6 | from gitingest.query_parser import parse_query
7 | 
8 | __all__ = ["clone_repo", "ingest", "ingest_async", "ingest_query", "parse_query"]
9 | 


--------------------------------------------------------------------------------
/src/gitingest/__main__.py:
--------------------------------------------------------------------------------
  1 | """Command-line interface (CLI) for Gitingest."""
  2 | 
  3 | # pylint: disable=no-value-for-parameter
  4 | from __future__ import annotations
  5 | 
  6 | import asyncio
  7 | from typing import TypedDict
  8 | 
  9 | import click
 10 | from typing_extensions import Unpack
 11 | 
 12 | from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME
 13 | from gitingest.entrypoint import ingest_async
 14 | 
 15 | 
 16 | class _CLIArgs(TypedDict):
 17 |     source: str
 18 |     max_size: int
 19 |     exclude_pattern: tuple[str, ...]
 20 |     include_pattern: tuple[str, ...]
 21 |     branch: str | None
 22 |     include_gitignored: bool
 23 |     include_submodules: bool
 24 |     token: str | None
 25 |     output: str | None
 26 | 
 27 | 
 28 | @click.command()
 29 | @click.argument("source", type=str, default=".")
 30 | @click.option(
 31 |     "--max-size",
 32 |     "-s",
 33 |     default=MAX_FILE_SIZE,
 34 |     show_default=True,
 35 |     help="Maximum file size to process in bytes",
 36 | )
 37 | @click.option("--exclude-pattern", "-e", multiple=True, help="Shell-style patterns to exclude.")
 38 | @click.option(
 39 |     "--include-pattern",
 40 |     "-i",
 41 |     multiple=True,
 42 |     help="Shell-style patterns to include.",
 43 | )
 44 | @click.option("--branch", "-b", default=None, help="Branch to clone and ingest")
 45 | @click.option(
 46 |     "--include-gitignored",
 47 |     is_flag=True,
 48 |     default=False,
 49 |     help="Include files matched by .gitignore and .gitingestignore",
 50 | )
 51 | @click.option(
 52 |     "--include-submodules",
 53 |     is_flag=True,
 54 |     help="Include repository's submodules in the analysis",
 55 |     default=False,
 56 | )
 57 | @click.option(
 58 |     "--token",
 59 |     "-t",
 60 |     envvar="GITHUB_TOKEN",
 61 |     default=None,
 62 |     help=(
 63 |         "GitHub personal access token (PAT) for accessing private repositories. "
 64 |         "If omitted, the CLI will look for the GITHUB_TOKEN environment variable."
 65 |     ),
 66 | )
 67 | @click.option(
 68 |     "--output",
 69 |     "-o",
 70 |     default=None,
 71 |     help="Output file path (default: digest.txt in current directory). Use '-' for stdout.",
 72 | )
 73 | def main(**cli_kwargs: Unpack[_CLIArgs]) -> None:
 74 |     """Run the CLI entry point to analyze a repo / directory and dump its contents.
 75 | 
 76 |     Parameters
 77 |     ----------
 78 |     **cli_kwargs : Unpack[_CLIArgs]
 79 |         A dictionary of keyword arguments forwarded to ``ingest_async``.
 80 | 
 81 |     Notes
 82 |     -----
 83 |     See ``ingest_async`` for a detailed description of each argument.
 84 | 
 85 |     Examples
 86 |     --------
 87 |     Basic usage:
 88 |         $ gitingest
 89 |         $ gitingest /path/to/repo
 90 |         $ gitingest https://github.com/user/repo
 91 | 
 92 |     Output to stdout:
 93 |         $ gitingest -o -
 94 |         $ gitingest https://github.com/user/repo --output -
 95 | 
 96 |     With filtering:
 97 |         $ gitingest -i "*.py" -e "*.log"
 98 |         $ gitingest --include-pattern "*.js" --exclude-pattern "node_modules/*"
 99 | 
100 |     Private repositories:
101 |         $ gitingest https://github.com/user/private-repo -t ghp_token
102 |         $ GITHUB_TOKEN=ghp_token gitingest https://github.com/user/private-repo
103 | 
104 |     Include submodules:
105 |         $ gitingest https://github.com/user/repo --include-submodules
106 | 
107 |     """
108 |     asyncio.run(_async_main(**cli_kwargs))
109 | 
110 | 
111 | async def _async_main(
112 |     source: str,
113 |     *,
114 |     max_size: int = MAX_FILE_SIZE,
115 |     exclude_pattern: tuple[str, ...] | None = None,
116 |     include_pattern: tuple[str, ...] | None = None,
117 |     branch: str | None = None,
118 |     include_gitignored: bool = False,
119 |     include_submodules: bool = False,
120 |     token: str | None = None,
121 |     output: str | None = None,
122 | ) -> None:
123 |     """Analyze a directory or repository and create a text dump of its contents.
124 | 
125 |     This command scans the specified ``source`` (a local directory or Git repo),
126 |     applies custom include and exclude patterns, and generates a text summary of
127 |     the analysis.  The summary is written to an output file or printed to ``stdout``.
128 | 
129 |     Parameters
130 |     ----------
131 |     source : str
132 |         A directory path or a Git repository URL.
133 |     max_size : int
134 |         Maximum file size in bytes to ingest (default: 10 MB).
135 |     exclude_pattern : tuple[str, ...] | None
136 |         Glob patterns for pruning the file set.
137 |     include_pattern : tuple[str, ...] | None
138 |         Glob patterns for including files in the output.
139 |     branch : str | None
140 |         Git branch to ingest. If ``None``, the repository's default branch is used.
141 |     include_gitignored : bool
142 |         If ``True``, also ingest files matched by ``.gitignore`` or ``.gitingestignore`` (default: ``False``).
143 |     include_submodules : bool
144 |         If ``True``, recursively include all Git submodules within the repository (default: ``False``).
145 |     token : str | None
146 |         GitHub personal access token (PAT) for accessing private repositories.
147 |         Can also be set via the ``GITHUB_TOKEN`` environment variable.
148 |     output : str | None
149 |         The path where the output file will be written (default: ``digest.txt`` in current directory).
150 |         Use ``"-"`` to write to ``stdout``.
151 | 
152 |     Raises
153 |     ------
154 |     click.Abort
155 |         Raised if an error occurs during execution and the command must be aborted.
156 | 
157 |     """
158 |     try:
159 |         # Normalise pattern containers (the ingest layer expects sets)
160 |         exclude_patterns = set(exclude_pattern) if exclude_pattern else set()
161 |         include_patterns = set(include_pattern) if include_pattern else set()
162 | 
163 |         output_target = output if output is not None else OUTPUT_FILE_NAME
164 | 
165 |         if output_target == "-":
166 |             click.echo("Analyzing source, preparing output for stdout...", err=True)
167 |         else:
168 |             click.echo(f"Analyzing source, output will be written to '{output_target}'...", err=True)
169 | 
170 |         summary, _, _ = await ingest_async(
171 |             source,
172 |             max_file_size=max_size,
173 |             include_patterns=include_patterns,
174 |             exclude_patterns=exclude_patterns,
175 |             branch=branch,
176 |             include_gitignored=include_gitignored,
177 |             include_submodules=include_submodules,
178 |             token=token,
179 |             output=output_target,
180 |         )
181 |     except Exception as exc:
182 |         # Convert any exception into Click.Abort so that exit status is non-zero
183 |         click.echo(f"Error: {exc}", err=True)
184 |         raise click.Abort from exc
185 | 
186 |     if output_target == "-":  # stdout
187 |         click.echo("\n--- Summary ---", err=True)
188 |         click.echo(summary, err=True)
189 |         click.echo("--- End Summary ---", err=True)
190 |         click.echo("Analysis complete! Output sent to stdout.", err=True)
191 |     else:  # file
192 |         click.echo(f"Analysis complete! Output written to: {output_target}")
193 |         click.echo("\nSummary:")
194 |         click.echo(summary)
195 | 
196 | 
197 | if __name__ == "__main__":
198 |     main()
199 | 


--------------------------------------------------------------------------------
/src/gitingest/clone.py:
--------------------------------------------------------------------------------
  1 | """Module containing functions for cloning a Git repository to a local path."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from pathlib import Path
  6 | from typing import TYPE_CHECKING
  7 | 
  8 | from gitingest.config import DEFAULT_TIMEOUT
  9 | from gitingest.utils.git_utils import (
 10 |     check_repo_exists,
 11 |     create_git_auth_header,
 12 |     create_git_command,
 13 |     ensure_git_installed,
 14 |     is_github_host,
 15 |     run_command,
 16 | )
 17 | from gitingest.utils.os_utils import ensure_directory
 18 | from gitingest.utils.timeout_wrapper import async_timeout
 19 | 
 20 | if TYPE_CHECKING:
 21 |     from gitingest.schemas import CloneConfig
 22 | 
 23 | 
 24 | @async_timeout(DEFAULT_TIMEOUT)
 25 | async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
 26 |     """Clone a repository to a local path based on the provided configuration.
 27 | 
 28 |     This function handles the process of cloning a Git repository to the local file system.
 29 |     It can clone a specific branch, tag, or commit if provided, and it raises exceptions if
 30 |     any errors occur during the cloning process.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     config : CloneConfig
 35 |         The configuration for cloning the repository.
 36 |     token : str | None
 37 |         GitHub personal access token (PAT) for accessing private repositories.
 38 | 
 39 |     Raises
 40 |     ------
 41 |     ValueError
 42 |         If the repository is not found, if the provided URL is invalid, or if the token format is invalid.
 43 | 
 44 |     """
 45 |     # Extract and validate query parameters
 46 |     url: str = config.url
 47 |     local_path: str = config.local_path
 48 |     commit: str | None = config.commit
 49 |     branch: str | None = config.branch
 50 |     tag: str | None = config.tag
 51 |     partial_clone: bool = config.subpath != "/"
 52 | 
 53 |     # Create parent directory if it doesn't exist
 54 |     await ensure_directory(Path(local_path).parent)
 55 | 
 56 |     # Check if the repository exists
 57 |     if not await check_repo_exists(url, token=token):
 58 |         msg = "Repository not found. Make sure it is public or that you have provided a valid token."
 59 |         raise ValueError(msg)
 60 | 
 61 |     clone_cmd = ["git"]
 62 |     if token and is_github_host(url):
 63 |         clone_cmd += ["-c", create_git_auth_header(token, url=url)]
 64 | 
 65 |     clone_cmd += ["clone", "--single-branch"]
 66 | 
 67 |     if config.include_submodules:
 68 |         clone_cmd += ["--recurse-submodules"]
 69 | 
 70 |     if partial_clone:
 71 |         clone_cmd += ["--filter=blob:none", "--sparse"]
 72 | 
 73 |     # Shallow clone unless a specific commit is requested
 74 |     if not commit:
 75 |         clone_cmd += ["--depth=1"]
 76 | 
 77 |         # Prefer tag over branch when both are provided
 78 |         if tag:
 79 |             clone_cmd += ["--branch", tag]
 80 |         elif branch and branch.lower() not in ("main", "master"):
 81 |             clone_cmd += ["--branch", branch]
 82 | 
 83 |     clone_cmd += [url, local_path]
 84 | 
 85 |     # Clone the repository
 86 |     await ensure_git_installed()
 87 |     await run_command(*clone_cmd)
 88 | 
 89 |     # Checkout the subpath if it is a partial clone
 90 |     if partial_clone:
 91 |         await _checkout_partial_clone(config, token)
 92 | 
 93 |     # Checkout the commit if it is provided
 94 |     if commit:
 95 |         checkout_cmd = create_git_command(["git"], local_path, url, token)
 96 |         await run_command(*checkout_cmd, "checkout", commit)
 97 | 
 98 | 
 99 | async def _checkout_partial_clone(config: CloneConfig, token: str | None) -> None:
100 |     """Configure sparse-checkout for a partially cloned repository.
101 | 
102 |     Parameters
103 |     ----------
104 |     config : CloneConfig
105 |         The configuration for cloning the repository, including subpath and blob flag.
106 |     token : str | None
107 |         GitHub personal access token (PAT) for accessing private repositories.
108 | 
109 |     """
110 |     subpath = config.subpath.lstrip("/")
111 |     if config.blob:
112 |         # Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt)
113 |         subpath = str(Path(subpath).parent.as_posix())
114 |     checkout_cmd = create_git_command(["git"], config.local_path, config.url, token)
115 |     await run_command(*checkout_cmd, "sparse-checkout", "set", subpath)
116 | 


--------------------------------------------------------------------------------
/src/gitingest/config.py:
--------------------------------------------------------------------------------
 1 | """Configuration file for the project."""
 2 | 
 3 | import tempfile
 4 | from pathlib import Path
 5 | 
 6 | MAX_FILE_SIZE = 10 * 1024 * 1024  # Maximum size of a single file to process (10 MB)
 7 | MAX_DIRECTORY_DEPTH = 20  # Maximum depth of directory traversal
 8 | MAX_FILES = 10_000  # Maximum number of files to process
 9 | MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024  # Maximum size of output file (500 MB)
10 | DEFAULT_TIMEOUT = 60  # seconds
11 | 
12 | OUTPUT_FILE_NAME = "digest.txt"
13 | 
14 | TMP_BASE_PATH = Path(tempfile.gettempdir()) / "gitingest"
15 | 


--------------------------------------------------------------------------------
/src/gitingest/output_formatter.py:
--------------------------------------------------------------------------------
  1 | """Functions to ingest and analyze a codebase directory or single file."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | import tiktoken
  8 | 
  9 | from gitingest.schemas import FileSystemNode, FileSystemNodeType
 10 | from gitingest.utils.compat_func import readlink
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from gitingest.query_parser import IngestionQuery
 14 | 
 15 | _TOKEN_THRESHOLDS: list[tuple[int, str]] = [
 16 |     (1_000_000, "M"),
 17 |     (1_000, "k"),
 18 | ]
 19 | 
 20 | 
 21 | def format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str, str]:
 22 |     """Generate a summary, directory structure, and file contents for a given file system node.
 23 | 
 24 |     If the node represents a directory, the function will recursively process its contents.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     node : FileSystemNode
 29 |         The file system node to be summarized.
 30 |     query : IngestionQuery
 31 |         The parsed query object containing information about the repository and query parameters.
 32 | 
 33 |     Returns
 34 |     -------
 35 |     tuple[str, str, str]
 36 |         A tuple containing the summary, directory structure, and file contents.
 37 | 
 38 |     """
 39 |     is_single_file = node.type == FileSystemNodeType.FILE
 40 |     summary = _create_summary_prefix(query, single_file=is_single_file)
 41 | 
 42 |     if node.type == FileSystemNodeType.DIRECTORY:
 43 |         summary += f"Files analyzed: {node.file_count}\n"
 44 |     elif node.type == FileSystemNodeType.FILE:
 45 |         summary += f"File: {node.name}\n"
 46 |         summary += f"Lines: {len(node.content.splitlines()):,}\n"
 47 | 
 48 |     tree = "Directory structure:\n" + _create_tree_structure(query, node=node)
 49 | 
 50 |     content = _gather_file_contents(node)
 51 | 
 52 |     token_estimate = _format_token_count(tree + content)
 53 |     if token_estimate:
 54 |         summary += f"\nEstimated tokens: {token_estimate}"
 55 | 
 56 |     return summary, tree, content
 57 | 
 58 | 
 59 | def _create_summary_prefix(query: IngestionQuery, *, single_file: bool = False) -> str:
 60 |     """Create a prefix string for summarizing a repository or local directory.
 61 | 
 62 |     Includes repository name (if provided), commit/branch details, and subpath if relevant.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     query : IngestionQuery
 67 |         The parsed query object containing information about the repository and query parameters.
 68 |     single_file : bool
 69 |         A flag indicating whether the summary is for a single file (default: ``False``).
 70 | 
 71 |     Returns
 72 |     -------
 73 |     str
 74 |         A summary prefix string containing repository, commit, branch, and subpath details.
 75 | 
 76 |     """
 77 |     parts = []
 78 | 
 79 |     if query.user_name:
 80 |         parts.append(f"Repository: {query.user_name}/{query.repo_name}")
 81 |     else:
 82 |         # Local scenario
 83 |         parts.append(f"Directory: {query.slug}")
 84 | 
 85 |     if query.commit:
 86 |         parts.append(f"Commit: {query.commit}")
 87 |     elif query.branch and query.branch not in ("main", "master"):
 88 |         parts.append(f"Branch: {query.branch}")
 89 | 
 90 |     if query.subpath != "/" and not single_file:
 91 |         parts.append(f"Subpath: {query.subpath}")
 92 | 
 93 |     return "\n".join(parts) + "\n"
 94 | 
 95 | 
 96 | def _gather_file_contents(node: FileSystemNode) -> str:
 97 |     """Recursively gather contents of all files under the given node.
 98 | 
 99 |     This function recursively processes a directory node and gathers the contents of all files
100 |     under that node. It returns the concatenated content of all files as a single string.
101 | 
102 |     Parameters
103 |     ----------
104 |     node : FileSystemNode
105 |         The current directory or file node being processed.
106 | 
107 |     Returns
108 |     -------
109 |     str
110 |         The concatenated content of all files under the given node.
111 | 
112 |     """
113 |     if node.type != FileSystemNodeType.DIRECTORY:
114 |         return node.content_string
115 | 
116 |     # Recursively gather contents of all files under the current directory
117 |     return "\n".join(_gather_file_contents(child) for child in node.children)
118 | 
119 | 
120 | def _create_tree_structure(
121 |     query: IngestionQuery,
122 |     *,
123 |     node: FileSystemNode,
124 |     prefix: str = "",
125 |     is_last: bool = True,
126 | ) -> str:
127 |     """Generate a tree-like string representation of the file structure.
128 | 
129 |     This function generates a string representation of the directory structure, formatted
130 |     as a tree with appropriate indentation for nested directories and files.
131 | 
132 |     Parameters
133 |     ----------
134 |     query : IngestionQuery
135 |         The parsed query object containing information about the repository and query parameters.
136 |     node : FileSystemNode
137 |         The current directory or file node being processed.
138 |     prefix : str
139 |         A string used for indentation and formatting of the tree structure (default: ``""``).
140 |     is_last : bool
141 |         A flag indicating whether the current node is the last in its directory (default: ``True``).
142 | 
143 |     Returns
144 |     -------
145 |     str
146 |         A string representing the directory structure formatted as a tree.
147 | 
148 |     """
149 |     if not node.name:
150 |         # If no name is present, use the slug as the top-level directory name
151 |         node.name = query.slug
152 | 
153 |     tree_str = ""
154 |     current_prefix = "└── " if is_last else "├── "
155 | 
156 |     # Indicate directories with a trailing slash
157 |     display_name = node.name
158 |     if node.type == FileSystemNodeType.DIRECTORY:
159 |         display_name += "/"
160 |     elif node.type == FileSystemNodeType.SYMLINK:
161 |         display_name += " -> " + readlink(node.path).name
162 | 
163 |     tree_str += f"{prefix}{current_prefix}{display_name}\n"
164 | 
165 |     if node.type == FileSystemNodeType.DIRECTORY and node.children:
166 |         prefix += "    " if is_last else "│   "
167 |         for i, child in enumerate(node.children):
168 |             tree_str += _create_tree_structure(query, node=child, prefix=prefix, is_last=i == len(node.children) - 1)
169 |     return tree_str
170 | 
171 | 
172 | def _format_token_count(text: str) -> str | None:
173 |     """Return a human-readable token-count string (e.g. 1.2k, 1.2 M).
174 | 
175 |     Parameters
176 |     ----------
177 |     text : str
178 |         The text string for which the token count is to be estimated.
179 | 
180 |     Returns
181 |     -------
182 |     str | None
183 |         The formatted number of tokens as a string (e.g., ``"1.2k"``, ``"1.2M"``), or ``None`` if an error occurs.
184 | 
185 |     """
186 |     try:
187 |         encoding = tiktoken.get_encoding("o200k_base")  # gpt-4o, gpt-4o-mini
188 |         total_tokens = len(encoding.encode(text, disallowed_special=()))
189 |     except (ValueError, UnicodeEncodeError) as exc:
190 |         print(exc)
191 |         return None
192 | 
193 |     for threshold, suffix in _TOKEN_THRESHOLDS:
194 |         if total_tokens >= threshold:
195 |             return f"{total_tokens / threshold:.1f}{suffix}"
196 | 
197 |     return str(total_tokens)
198 | 


--------------------------------------------------------------------------------
/src/gitingest/schemas/__init__.py:
--------------------------------------------------------------------------------
1 | """Module containing the schemas for the Gitingest package."""
2 | 
3 | from gitingest.schemas.filesystem import FileSystemNode, FileSystemNodeType, FileSystemStats
4 | from gitingest.schemas.ingestion import CloneConfig, IngestionQuery
5 | 
6 | __all__ = ["CloneConfig", "FileSystemNode", "FileSystemNodeType", "FileSystemStats", "IngestionQuery"]
7 | 


--------------------------------------------------------------------------------
/src/gitingest/schemas/filesystem.py:
--------------------------------------------------------------------------------
  1 | """Define the schema for the filesystem representation."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import os
  6 | from dataclasses import dataclass, field
  7 | from enum import Enum, auto
  8 | from typing import TYPE_CHECKING
  9 | 
 10 | from gitingest.utils.compat_func import readlink
 11 | from gitingest.utils.file_utils import _decodes, _get_preferred_encodings, _read_chunk
 12 | from gitingest.utils.notebook import process_notebook
 13 | 
 14 | if TYPE_CHECKING:
 15 |     from pathlib import Path
 16 | 
 17 | SEPARATOR = "=" * 48  # Tiktoken, the tokenizer openai uses, counts 2 tokens if we have more than 48
 18 | 
 19 | 
 20 | class FileSystemNodeType(Enum):
 21 |     """Enum representing the type of a file system node (directory or file)."""
 22 | 
 23 |     DIRECTORY = auto()
 24 |     FILE = auto()
 25 |     SYMLINK = auto()
 26 | 
 27 | 
 28 | @dataclass
 29 | class FileSystemStats:
 30 |     """Class for tracking statistics during file system traversal."""
 31 | 
 32 |     total_files: int = 0
 33 |     total_size: int = 0
 34 | 
 35 | 
 36 | @dataclass
 37 | class FileSystemNode:  # pylint: disable=too-many-instance-attributes
 38 |     """Class representing a node in the file system (either a file or directory).
 39 | 
 40 |     Tracks properties of files/directories for comprehensive analysis.
 41 |     """
 42 | 
 43 |     name: str
 44 |     type: FileSystemNodeType
 45 |     path_str: str
 46 |     path: Path
 47 |     size: int = 0
 48 |     file_count: int = 0
 49 |     dir_count: int = 0
 50 |     depth: int = 0
 51 |     children: list[FileSystemNode] = field(default_factory=list)
 52 | 
 53 |     def sort_children(self) -> None:
 54 |         """Sort the children nodes of a directory according to a specific order.
 55 | 
 56 |         Order of sorting:
 57 |           2. Regular files (not starting with dot)
 58 |           3. Hidden files (starting with dot)
 59 |           4. Regular directories (not starting with dot)
 60 |           5. Hidden directories (starting with dot)
 61 | 
 62 |         All groups are sorted alphanumerically within themselves.
 63 | 
 64 |         Raises
 65 |         ------
 66 |         ValueError
 67 |             If the node is not a directory.
 68 | 
 69 |         """
 70 |         if self.type != FileSystemNodeType.DIRECTORY:
 71 |             msg = "Cannot sort children of a non-directory node"
 72 |             raise ValueError(msg)
 73 | 
 74 |         def _sort_key(child: FileSystemNode) -> tuple[int, str]:
 75 |             # returns the priority order for the sort function, 0 is first
 76 |             # Groups: 0=README, 1=regular file, 2=hidden file, 3=regular dir, 4=hidden dir
 77 |             name = child.name.lower()
 78 |             if child.type == FileSystemNodeType.FILE:
 79 |                 if name == "readme" or name.startswith("readme."):
 80 |                     return (0, name)
 81 |                 return (1 if not name.startswith(".") else 2, name)
 82 |             return (3 if not name.startswith(".") else 4, name)
 83 | 
 84 |         self.children.sort(key=_sort_key)
 85 | 
 86 |     @property
 87 |     def content_string(self) -> str:
 88 |         """Return the content of the node as a string, including path and content.
 89 | 
 90 |         Returns
 91 |         -------
 92 |         str
 93 |             A string representation of the node's content.
 94 | 
 95 |         """
 96 |         parts = [
 97 |             SEPARATOR,
 98 |             f"{self.type.name}: {str(self.path_str).replace(os.sep, '/')}"
 99 |             + (f" -> {readlink(self.path).name}" if self.type == FileSystemNodeType.SYMLINK else ""),
100 |             SEPARATOR,
101 |             f"{self.content}",
102 |         ]
103 | 
104 |         return "\n".join(parts) + "\n\n"
105 | 
106 |     @property
107 |     def content(self) -> str:  # pylint: disable=too-many-return-statements
108 |         """Return file content (if text / notebook) or an explanatory placeholder.
109 | 
110 |         Heuristically decides whether the file is text or binary by decoding a small chunk of the file
111 |         with multiple encodings and checking for common binary markers.
112 | 
113 |         Returns
114 |         -------
115 |         str
116 |             The content of the file, or an error message if the file could not be read.
117 | 
118 |         Raises
119 |         ------
120 |         ValueError
121 |             If the node is a directory.
122 | 
123 |         """
124 |         if self.type == FileSystemNodeType.DIRECTORY:
125 |             msg = "Cannot read content of a directory node"
126 |             raise ValueError(msg)
127 | 
128 |         if self.type == FileSystemNodeType.SYMLINK:
129 |             return ""  # TODO: are we including the empty content of symlinks?
130 | 
131 |         if self.path.suffix == ".ipynb":  # Notebook
132 |             try:
133 |                 return process_notebook(self.path)
134 |             except Exception as exc:
135 |                 return f"Error processing notebook: {exc}"
136 | 
137 |         chunk = _read_chunk(self.path)
138 | 
139 |         if chunk is None:
140 |             return "Error reading file"
141 | 
142 |         if chunk == b"":
143 |             return "[Empty file]"
144 | 
145 |         if not _decodes(chunk, "utf-8"):
146 |             return "[Binary file]"
147 | 
148 |         # Find the first encoding that decodes the sample
149 |         good_enc: str | None = next(
150 |             (enc for enc in _get_preferred_encodings() if _decodes(chunk, encoding=enc)),
151 |             None,
152 |         )
153 | 
154 |         if good_enc is None:
155 |             return "Error: Unable to decode file with available encodings"
156 | 
157 |         try:
158 |             with self.path.open(encoding=good_enc) as fp:
159 |                 return fp.read()
160 |         except (OSError, UnicodeDecodeError) as exc:
161 |             return f"Error reading file with {good_enc!r}: {exc}"
162 | 


--------------------------------------------------------------------------------
/src/gitingest/schemas/ingestion.py:
--------------------------------------------------------------------------------
  1 | """Module containing the dataclasses for the ingestion process."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from dataclasses import dataclass
  6 | from pathlib import Path  # noqa: TC003 (typing-only-standard-library-import) needed for type checking (pydantic)
  7 | 
  8 | from pydantic import BaseModel, Field
  9 | 
 10 | from gitingest.config import MAX_FILE_SIZE
 11 | 
 12 | 
 13 | @dataclass
 14 | class CloneConfig:  # pylint: disable=too-many-instance-attributes
 15 |     """Configuration for cloning a Git repository.
 16 | 
 17 |     This class holds the necessary parameters for cloning a repository to a local path, including
 18 |     the repository's URL, the target local path, and optional parameters for a specific commit or branch.
 19 | 
 20 |     Attributes
 21 |     ----------
 22 |     url : str
 23 |         The URL of the Git repository to clone.
 24 |     local_path : str
 25 |         The local directory where the repository will be cloned.
 26 |     commit : str | None
 27 |         The specific commit hash to check out after cloning.
 28 |     branch : str | None
 29 |         The branch to clone.
 30 |     tag: str | None
 31 |         The tag to clone.
 32 |     subpath : str
 33 |         The subpath to clone from the repository (default: ``"/"``).
 34 |     blob: bool
 35 |         Whether the repository is a blob (default: ``False``).
 36 |     include_submodules: bool
 37 |         Whether to clone submodules (default: ``False``).
 38 | 
 39 |     """
 40 | 
 41 |     url: str
 42 |     local_path: str
 43 |     commit: str | None = None
 44 |     branch: str | None = None
 45 |     tag: str | None = None
 46 |     subpath: str = "/"
 47 |     blob: bool = False
 48 |     include_submodules: bool = False
 49 | 
 50 | 
 51 | class IngestionQuery(BaseModel):  # pylint: disable=too-many-instance-attributes
 52 |     """Pydantic model to store the parsed details of the repository or file path.
 53 | 
 54 |     Attributes
 55 |     ----------
 56 |     user_name : str | None
 57 |         The username or owner of the repository.
 58 |     repo_name : str | None
 59 |         The name of the repository.
 60 |     local_path : Path
 61 |         The local path to the repository or file.
 62 |     url : str | None
 63 |         The URL of the repository.
 64 |     slug : str
 65 |         The slug of the repository.
 66 |     id : str
 67 |         The ID of the repository.
 68 |     subpath : str
 69 |         The subpath to the repository or file (default: ``"/"``).
 70 |     type : str | None
 71 |         The type of the repository or file.
 72 |     branch : str | None
 73 |         The branch of the repository.
 74 |     commit : str | None
 75 |         The commit of the repository.
 76 |     tag: str | None
 77 |         The tag of the repository.
 78 |     max_file_size : int
 79 |         The maximum file size to ingest (default: 10 MB).
 80 |     ignore_patterns : set[str]
 81 |         The patterns to ignore (default: ``set()``).
 82 |     include_patterns : set[str] | None
 83 |         The patterns to include.
 84 |     include_submodules : bool
 85 |         Whether to include all Git submodules within the repository. (default: ``False``)
 86 | 
 87 |     """
 88 | 
 89 |     user_name: str | None = None
 90 |     repo_name: str | None = None
 91 |     local_path: Path
 92 |     url: str | None = None
 93 |     slug: str
 94 |     id: str
 95 |     subpath: str = "/"
 96 |     type: str | None = None
 97 |     branch: str | None = None
 98 |     commit: str | None = None
 99 |     tag: str | None = None
100 |     max_file_size: int = Field(default=MAX_FILE_SIZE)
101 |     ignore_patterns: set[str] = set()  # TODO: ignore_patterns and include_patterns have the same type
102 |     include_patterns: set[str] | None = None
103 |     include_submodules: bool = False
104 | 
105 |     def extract_clone_config(self) -> CloneConfig:
106 |         """Extract the relevant fields for the CloneConfig object.
107 | 
108 |         Returns
109 |         -------
110 |         CloneConfig
111 |             A CloneConfig object containing the relevant fields.
112 | 
113 |         Raises
114 |         ------
115 |         ValueError
116 |             If the ``url`` parameter is not provided.
117 | 
118 |         """
119 |         if not self.url:
120 |             msg = "The 'url' parameter is required."
121 |             raise ValueError(msg)
122 | 
123 |         return CloneConfig(
124 |             url=self.url,
125 |             local_path=str(self.local_path),
126 |             commit=self.commit,
127 |             branch=self.branch,
128 |             tag=self.tag,
129 |             subpath=self.subpath,
130 |             blob=self.type == "blob",
131 |             include_submodules=self.include_submodules,
132 |         )
133 | 
134 |     def ensure_url(self) -> None:
135 |         """Raise if the parsed query has no URL (invalid user input).
136 | 
137 |         Raises
138 |         ------
139 |         ValueError
140 |             If the parsed query has no URL (invalid user input).
141 | 
142 |         """
143 |         if not self.url:
144 |             msg = "The 'url' parameter is required."
145 |             raise ValueError(msg)
146 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utility functions for the gitingest package."""
2 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/auth.py:
--------------------------------------------------------------------------------
 1 | """Utilities for handling authentication."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import os
 6 | 
 7 | from gitingest.utils.git_utils import validate_github_token
 8 | 
 9 | 
10 | def resolve_token(token: str | None) -> str | None:
11 |     """Resolve the token to use for the query.
12 | 
13 |     Parameters
14 |     ----------
15 |     token : str | None
16 |         GitHub personal access token (PAT) for accessing private repositories.
17 | 
18 |     Returns
19 |     -------
20 |     str | None
21 |         The resolved token.
22 | 
23 |     """
24 |     token = token or os.getenv("GITHUB_TOKEN")
25 |     if token:
26 |         validate_github_token(token)
27 |     return token
28 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/compat_func.py:
--------------------------------------------------------------------------------
 1 | """Compatibility functions for Python 3.8."""
 2 | 
 3 | import os
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def readlink(path: Path) -> Path:
 8 |     """Read the target of a symlink.
 9 | 
10 |     Compatible with Python 3.8.
11 | 
12 |     Parameters
13 |     ----------
14 |     path : Path
15 |         Path to the symlink.
16 | 
17 |     Returns
18 |     -------
19 |     Path
20 |         The target of the symlink.
21 | 
22 |     """
23 |     return Path(os.readlink(path))
24 | 
25 | 
26 | def removesuffix(s: str, suffix: str) -> str:
27 |     """Remove a suffix from a string.
28 | 
29 |     Compatible with Python 3.8.
30 | 
31 |     Parameters
32 |     ----------
33 |     s : str
34 |         String to remove suffix from.
35 |     suffix : str
36 |         Suffix to remove.
37 | 
38 |     Returns
39 |     -------
40 |     str
41 |         String with suffix removed.
42 | 
43 |     """
44 |     return s[: -len(suffix)] if s.endswith(suffix) else s
45 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/compat_typing.py:
--------------------------------------------------------------------------------
 1 | """Compatibility layer for typing."""
 2 | 
 3 | try:
 4 |     from typing import ParamSpec, TypeAlias  # type: ignore[attr-defined]  # Py ≥ 3.10
 5 | except ImportError:
 6 |     from typing_extensions import ParamSpec, TypeAlias  # type: ignore[attr-defined]  # Py 3.8 / 3.9
 7 | 
 8 | try:
 9 |     from typing import Annotated  # type: ignore[attr-defined]  # Py ≥ 3.9
10 | except ImportError:
11 |     from typing_extensions import Annotated  # type: ignore[attr-defined]  # Py 3.8
12 | 
13 | __all__ = ["Annotated", "ParamSpec", "TypeAlias"]
14 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/exceptions.py:
--------------------------------------------------------------------------------
 1 | """Custom exceptions for the Gitingest package."""
 2 | 
 3 | 
 4 | class InvalidPatternError(ValueError):
 5 |     """Exception raised when a pattern contains invalid characters.
 6 | 
 7 |     This exception is used to signal that a pattern provided for some operation
 8 |     contains characters that are not allowed. The valid characters for the pattern
 9 |     include alphanumeric characters, dash (-), underscore (_), dot (.), forward slash (/),
10 |     plus (+), and asterisk (*).
11 | 
12 |     Parameters
13 |     ----------
14 |     pattern : str
15 |         The invalid pattern that caused the error.
16 | 
17 |     """
18 | 
19 |     def __init__(self, pattern: str) -> None:
20 |         super().__init__(
21 |             f"Pattern '{pattern}' contains invalid characters. Only alphanumeric characters, dash (-), "
22 |             "underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed.",
23 |         )
24 | 
25 | 
26 | class AsyncTimeoutError(Exception):
27 |     """Exception raised when an async operation exceeds its timeout limit.
28 | 
29 |     This exception is used by the ``async_timeout`` decorator to signal that the wrapped
30 |     asynchronous function has exceeded the specified time limit for execution.
31 |     """
32 | 
33 | 
34 | class InvalidNotebookError(Exception):
35 |     """Exception raised when a Jupyter notebook is invalid or cannot be processed."""
36 | 
37 |     def __init__(self, message: str) -> None:
38 |         super().__init__(message)
39 | 
40 | 
41 | class InvalidGitHubTokenError(ValueError):
42 |     """Exception raised when a GitHub Personal Access Token is malformed."""
43 | 
44 |     def __init__(self) -> None:
45 |         msg = (
46 |             "Invalid GitHub token format. To generate a token, go to "
47 |             "https://github.com/settings/tokens/new?description=gitingest&scopes=repo."
48 |         )
49 |         super().__init__(msg)
50 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/file_utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for working with files and directories."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import locale
 6 | import platform
 7 | from typing import TYPE_CHECKING
 8 | 
 9 | if TYPE_CHECKING:
10 |     from pathlib import Path
11 | 
12 | try:
13 |     locale.setlocale(locale.LC_ALL, "")
14 | except locale.Error:
15 |     locale.setlocale(locale.LC_ALL, "C")
16 | 
17 | _CHUNK_SIZE = 1024  # bytes
18 | 
19 | 
20 | def _get_preferred_encodings() -> list[str]:
21 |     """Get list of encodings to try, prioritized for the current platform.
22 | 
23 |     Returns
24 |     -------
25 |     list[str]
26 |         List of encoding names to try in priority order, starting with the
27 |         platform's default encoding followed by common fallback encodings.
28 | 
29 |     """
30 |     encodings = [locale.getpreferredencoding(), "utf-8", "utf-16", "utf-16le", "utf-8-sig", "latin"]
31 |     if platform.system() == "Windows":
32 |         encodings += ["cp1252", "iso-8859-1"]
33 |     return list(dict.fromkeys(encodings))
34 | 
35 | 
36 | def _read_chunk(path: Path) -> bytes | None:
37 |     """Attempt to read the first *size* bytes of *path* in binary mode.
38 | 
39 |     Parameters
40 |     ----------
41 |     path : Path
42 |         The path to the file to read.
43 | 
44 |     Returns
45 |     -------
46 |     bytes | None
47 |         The first ``_CHUNK_SIZE`` bytes of ``path``, or ``None`` on any ``OSError``.
48 | 
49 |     """
50 |     try:
51 |         with path.open("rb") as fp:
52 |             return fp.read(_CHUNK_SIZE)
53 |     except OSError:
54 |         return None
55 | 
56 | 
57 | def _decodes(chunk: bytes, encoding: str) -> bool:
58 |     """Return ``True`` if ``chunk`` decodes cleanly with ``encoding``.
59 | 
60 |     Parameters
61 |     ----------
62 |     chunk : bytes
63 |         The chunk of bytes to decode.
64 |     encoding : str
65 |         The encoding to use to decode the chunk.
66 | 
67 |     Returns
68 |     -------
69 |     bool
70 |         ``True`` if the chunk decodes cleanly with the encoding, ``False`` otherwise.
71 | 
72 |     """
73 |     try:
74 |         chunk.decode(encoding)
75 |     except UnicodeDecodeError:
76 |         return False
77 |     return True
78 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/ignore_patterns.py:
--------------------------------------------------------------------------------
  1 | """Default ignore patterns for Gitingest."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from pathlib import Path
  6 | 
  7 | DEFAULT_IGNORE_PATTERNS: set[str] = {
  8 |     # Python
  9 |     "*.pyc",
 10 |     "*.pyo",
 11 |     "*.pyd",
 12 |     "__pycache__",
 13 |     ".pytest_cache",
 14 |     ".coverage",
 15 |     ".tox",
 16 |     ".nox",
 17 |     ".mypy_cache",
 18 |     ".ruff_cache",
 19 |     ".hypothesis",
 20 |     "poetry.lock",
 21 |     "Pipfile.lock",
 22 |     # JavaScript/FileSystemNode
 23 |     "node_modules",
 24 |     "bower_components",
 25 |     "package-lock.json",
 26 |     "yarn.lock",
 27 |     ".npm",
 28 |     ".yarn",
 29 |     ".pnpm-store",
 30 |     "bun.lock",
 31 |     "bun.lockb",
 32 |     # Java
 33 |     "*.class",
 34 |     "*.jar",
 35 |     "*.war",
 36 |     "*.ear",
 37 |     "*.nar",
 38 |     ".gradle/",
 39 |     "build/",
 40 |     ".settings/",
 41 |     ".classpath",
 42 |     "gradle-app.setting",
 43 |     "*.gradle",
 44 |     # IDEs and editors / Java
 45 |     ".project",
 46 |     # C/C++
 47 |     "*.o",
 48 |     "*.obj",
 49 |     "*.dll",
 50 |     "*.dylib",
 51 |     "*.exe",
 52 |     "*.lib",
 53 |     "*.out",
 54 |     "*.a",
 55 |     "*.pdb",
 56 |     # Binary
 57 |     "*.bin",
 58 |     # Swift/Xcode
 59 |     ".build/",
 60 |     "*.xcodeproj/",
 61 |     "*.xcworkspace/",
 62 |     "*.pbxuser",
 63 |     "*.mode1v3",
 64 |     "*.mode2v3",
 65 |     "*.perspectivev3",
 66 |     "*.xcuserstate",
 67 |     "xcuserdata/",
 68 |     ".swiftpm/",
 69 |     # Ruby
 70 |     "*.gem",
 71 |     ".bundle/",
 72 |     "vendor/bundle",
 73 |     "Gemfile.lock",
 74 |     ".ruby-version",
 75 |     ".ruby-gemset",
 76 |     ".rvmrc",
 77 |     # Rust
 78 |     "Cargo.lock",
 79 |     "**/*.rs.bk",
 80 |     # Java / Rust
 81 |     "target/",
 82 |     # Go
 83 |     "pkg/",
 84 |     # .NET/C#
 85 |     "obj/",
 86 |     "*.suo",
 87 |     "*.user",
 88 |     "*.userosscache",
 89 |     "*.sln.docstates",
 90 |     "*.nupkg",
 91 |     # Go / .NET / C#
 92 |     "bin/",
 93 |     # Version control
 94 |     ".git",
 95 |     ".svn",
 96 |     ".hg",
 97 |     ".gitignore",
 98 |     ".gitattributes",
 99 |     ".gitmodules",
100 |     # Images and media
101 |     "*.svg",
102 |     "*.png",
103 |     "*.jpg",
104 |     "*.jpeg",
105 |     "*.gif",
106 |     "*.ico",
107 |     "*.pdf",
108 |     "*.mov",
109 |     "*.mp4",
110 |     "*.mp3",
111 |     "*.wav",
112 |     # Virtual environments
113 |     "venv",
114 |     ".venv",
115 |     "env",
116 |     ".env",
117 |     "virtualenv",
118 |     # IDEs and editors
119 |     ".idea",
120 |     ".vscode",
121 |     ".vs",
122 |     "*.swo",
123 |     "*.swn",
124 |     ".settings",
125 |     "*.sublime-*",
126 |     # Temporary and cache files
127 |     "*.log",
128 |     "*.bak",
129 |     "*.swp",
130 |     "*.tmp",
131 |     "*.temp",
132 |     ".cache",
133 |     ".sass-cache",
134 |     ".eslintcache",
135 |     ".DS_Store",
136 |     "Thumbs.db",
137 |     "desktop.ini",
138 |     # Build directories and artifacts
139 |     "build",
140 |     "dist",
141 |     "target",
142 |     "out",
143 |     "*.egg-info",
144 |     "*.egg",
145 |     "*.whl",
146 |     "*.so",
147 |     # Documentation
148 |     "site-packages",
149 |     ".docusaurus",
150 |     ".next",
151 |     ".nuxt",
152 |     # Database
153 |     "*.db",
154 |     "*.sqlite",
155 |     "*.sqlite3",
156 |     # Other common patterns
157 |     ## Minified files
158 |     "*.min.js",
159 |     "*.min.css",
160 |     ## Source maps
161 |     "*.map",
162 |     ## Terraform
163 |     "*.tfstate*",
164 |     ## Dependencies in various languages
165 |     "vendor/",
166 |     # Gitingest
167 |     "digest.txt",
168 | }
169 | 
170 | 
171 | def load_ignore_patterns(root: Path, filename: str) -> set[str]:
172 |     """Load ignore patterns from ``filename`` found under ``root``.
173 | 
174 |     The loader walks the directory tree, looks for the supplied ``filename``,
175 |     and returns a unified set of patterns. It implements the same parsing rules
176 |     we use for ``.gitignore`` and ``.gitingestignore`` (git-wildmatch syntax with
177 |     support for negation and root-relative paths).
178 | 
179 |     Parameters
180 |     ----------
181 |     root : Path
182 |         Directory to walk.
183 |     filename : str
184 |         The filename to look for in each directory.
185 | 
186 |     Returns
187 |     -------
188 |     set[str]
189 |         A set of ignore patterns extracted from the ``filename`` file found under the ``root`` directory.
190 | 
191 |     """
192 |     patterns: set[str] = set()
193 | 
194 |     for ignore_file in root.rglob(filename):
195 |         if ignore_file.is_file():
196 |             patterns.update(_parse_ignore_file(ignore_file, root))
197 |     return patterns
198 | 
199 | 
200 | def _parse_ignore_file(ignore_file: Path, root: Path) -> set[str]:
201 |     """Parse an ignore file and return a set of ignore patterns.
202 | 
203 |     Parameters
204 |     ----------
205 |     ignore_file : Path
206 |         The path to the ignore file.
207 |     root : Path
208 |         The root directory of the repository.
209 | 
210 |     Returns
211 |     -------
212 |     set[str]
213 |         A set of ignore patterns.
214 | 
215 |     """
216 |     patterns: set[str] = set()
217 | 
218 |     # Path of the ignore file relative to the repository root
219 |     rel_dir = ignore_file.parent.relative_to(root)
220 |     base_dir = Path() if rel_dir == Path() else rel_dir
221 | 
222 |     with ignore_file.open(encoding="utf-8") as fh:
223 |         for raw in fh:
224 |             line = raw.strip()
225 |             if not line or line.startswith("#"):  # comments / blank lines
226 |                 continue
227 | 
228 |             # Handle negation ("!foobar")
229 |             negated = line.startswith("!")
230 |             if negated:
231 |                 line = line[1:]
232 | 
233 |             # Handle leading slash ("/foobar")
234 |             if line.startswith("/"):
235 |                 line = line.lstrip("/")
236 | 
237 |             pattern_body = (base_dir / line).as_posix()
238 |             patterns.add(f"!{pattern_body}" if negated else pattern_body)
239 | 
240 |     return patterns
241 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/ingestion_utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for the ingestion process."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | from pathspec import PathSpec
 8 | 
 9 | if TYPE_CHECKING:
10 |     from pathlib import Path
11 | 
12 | 
13 | def _should_include(path: Path, base_path: Path, include_patterns: set[str]) -> bool:
14 |     """Return ``True`` if ``path`` matches any of ``include_patterns``.
15 | 
16 |     Parameters
17 |     ----------
18 |     path : Path
19 |         The absolute path of the file or directory to check.
20 | 
21 |     base_path : Path
22 |         The base directory from which the relative path is calculated.
23 | 
24 |     include_patterns : set[str]
25 |         A set of patterns to check against the relative path.
26 | 
27 |     Returns
28 |     -------
29 |     bool
30 |         ``True`` if the path matches any of the include patterns, ``False`` otherwise.
31 | 
32 |     """
33 |     rel_path = _relative_or_none(path, base_path)
34 |     if rel_path is None:  # outside repo → do *not* include
35 |         return False
36 |     if path.is_dir():  # keep directories so children are visited
37 |         return True
38 | 
39 |     spec = PathSpec.from_lines("gitwildmatch", include_patterns)
40 |     return spec.match_file(str(rel_path))
41 | 
42 | 
43 | def _should_exclude(path: Path, base_path: Path, ignore_patterns: set[str]) -> bool:
44 |     """Return ``True`` if ``path`` matches any of ``ignore_patterns``.
45 | 
46 |     Parameters
47 |     ----------
48 |     path : Path
49 |         The absolute path of the file or directory to check.
50 |     base_path : Path
51 |         The base directory from which the relative path is calculated.
52 |     ignore_patterns : set[str]
53 |         A set of patterns to check against the relative path.
54 | 
55 |     Returns
56 |     -------
57 |     bool
58 |         ``True`` if the path matches any of the ignore patterns, ``False`` otherwise.
59 | 
60 |     """
61 |     rel_path = _relative_or_none(path, base_path)
62 |     if rel_path is None:  # outside repo → already “excluded”
63 |         return True
64 | 
65 |     spec = PathSpec.from_lines("gitwildmatch", ignore_patterns)
66 |     return spec.match_file(str(rel_path))
67 | 
68 | 
69 | def _relative_or_none(path: Path, base: Path) -> Path | None:
70 |     """Return *path* relative to *base* or ``None`` if *path* is outside *base*.
71 | 
72 |     Parameters
73 |     ----------
74 |     path : Path
75 |         The absolute path of the file or directory to check.
76 |     base : Path
77 |         The base directory from which the relative path is calculated.
78 | 
79 |     Returns
80 |     -------
81 |     Path | None
82 |         The relative path of ``path`` to ``base``, or ``None`` if ``path`` is outside ``base``.
83 | 
84 |     """
85 |     try:
86 |         return path.relative_to(base)
87 |     except ValueError:  # path is not a sub-path of base
88 |         return None
89 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/notebook.py:
--------------------------------------------------------------------------------
  1 | """Utilities for processing Jupyter notebooks."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import json
  6 | import warnings
  7 | from itertools import chain
  8 | from typing import TYPE_CHECKING, Any
  9 | 
 10 | from gitingest.utils.exceptions import InvalidNotebookError
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from pathlib import Path
 14 | 
 15 | 
 16 | def process_notebook(file: Path, *, include_output: bool = True) -> str:
 17 |     """Process a Jupyter notebook file and return an executable Python script as a string.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     file : Path
 22 |         The path to the Jupyter notebook file.
 23 |     include_output : bool
 24 |         Whether to include cell outputs in the generated script (default: ``True``).
 25 | 
 26 |     Returns
 27 |     -------
 28 |     str
 29 |         The executable Python script as a string.
 30 | 
 31 |     Raises
 32 |     ------
 33 |     InvalidNotebookError
 34 |         If the notebook file is invalid or cannot be processed.
 35 | 
 36 |     """
 37 |     try:
 38 |         with file.open(encoding="utf-8") as f:
 39 |             notebook: dict[str, Any] = json.load(f)
 40 |     except json.JSONDecodeError as exc:
 41 |         msg = f"Invalid JSON in notebook: {file}"
 42 |         raise InvalidNotebookError(msg) from exc
 43 | 
 44 |     # Check if the notebook contains worksheets
 45 |     worksheets = notebook.get("worksheets")
 46 |     if worksheets:
 47 |         warnings.warn(
 48 |             "Worksheets are deprecated as of IPEP-17. Consider updating the notebook. "
 49 |             "(See: https://github.com/jupyter/nbformat and "
 50 |             "https://github.com/ipython/ipython/wiki/IPEP-17:-Notebook-Format-4#remove-multiple-worksheets "
 51 |             "for more information.)",
 52 |             DeprecationWarning,
 53 |             stacklevel=2,
 54 |         )
 55 | 
 56 |         if len(worksheets) > 1:
 57 |             warnings.warn(
 58 |                 "Multiple worksheets detected. Combining all worksheets into a single script.",
 59 |                 UserWarning,
 60 |                 stacklevel=2,
 61 |             )
 62 | 
 63 |         cells = list(chain.from_iterable(ws["cells"] for ws in worksheets))
 64 | 
 65 |     else:
 66 |         cells = notebook["cells"]
 67 | 
 68 |     result = ["# Jupyter notebook converted to Python script."]
 69 | 
 70 |     for cell in cells:
 71 |         cell_str = _process_cell(cell, include_output=include_output)
 72 |         if cell_str:
 73 |             result.append(cell_str)
 74 | 
 75 |     return "\n\n".join(result) + "\n"
 76 | 
 77 | 
 78 | def _process_cell(cell: dict[str, Any], *, include_output: bool) -> str | None:
 79 |     """Process a Jupyter notebook cell and return the cell content as a string.
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     cell : dict[str, Any]
 84 |         The cell dictionary from a Jupyter notebook.
 85 |     include_output : bool
 86 |         Whether to include cell outputs in the generated script.
 87 | 
 88 |     Returns
 89 |     -------
 90 |     str | None
 91 |         The cell content as a string, or ``None`` if the cell is empty.
 92 | 
 93 |     Raises
 94 |     ------
 95 |     ValueError
 96 |         If an unexpected cell type is encountered.
 97 | 
 98 |     """
 99 |     cell_type = cell["cell_type"]
100 | 
101 |     # Validate cell type and handle unexpected types
102 |     if cell_type not in ("markdown", "code", "raw"):
103 |         msg = f"Unknown cell type: {cell_type}"
104 |         raise ValueError(msg)
105 | 
106 |     cell_str = "".join(cell["source"])
107 | 
108 |     # Skip empty cells
109 |     if not cell_str:
110 |         return None
111 | 
112 |     # Convert Markdown and raw cells to multi-line comments
113 |     if cell_type in ("markdown", "raw"):
114 |         return f'"""\n{cell_str}\n"""'
115 | 
116 |     # Add cell output as comments
117 |     outputs = cell.get("outputs")
118 |     if include_output and outputs:
119 |         # Include cell outputs as comments
120 |         raw_lines: list[str] = []
121 |         for output in outputs:
122 |             raw_lines += _extract_output(output)
123 | 
124 |         cell_str += "\n# Output:\n#   " + "\n#   ".join(raw_lines)
125 | 
126 |     return cell_str
127 | 
128 | 
129 | def _extract_output(output: dict[str, Any]) -> list[str]:
130 |     """Extract the output from a Jupyter notebook cell.
131 | 
132 |     Parameters
133 |     ----------
134 |     output : dict[str, Any]
135 |         The output dictionary from a Jupyter notebook cell.
136 | 
137 |     Returns
138 |     -------
139 |     list[str]
140 |         The output as a list of strings.
141 | 
142 |     Raises
143 |     ------
144 |     ValueError
145 |         If an unknown output type is encountered.
146 | 
147 |     """
148 |     output_type = output["output_type"]
149 | 
150 |     if output_type == "stream":
151 |         return output["text"]
152 | 
153 |     if output_type in ("execute_result", "display_data"):
154 |         return output["data"]["text/plain"]
155 | 
156 |     if output_type == "error":
157 |         return [f"Error: {output['ename']}: {output['evalue']}"]
158 | 
159 |     msg = f"Unknown output type: {output_type}"
160 |     raise ValueError(msg)
161 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/os_utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for working with the operating system."""
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | 
 6 | async def ensure_directory(path: Path) -> None:
 7 |     """Ensure the directory exists, creating it if necessary.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     path : Path
12 |         The path to ensure exists.
13 | 
14 |     Raises
15 |     ------
16 |     OSError
17 |         If the directory cannot be created.
18 | 
19 |     """
20 |     try:
21 |         path.mkdir(parents=True, exist_ok=True)
22 |     except OSError as exc:
23 |         msg = f"Failed to create directory {path}: {exc}"
24 |         raise OSError(msg) from exc
25 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/path_utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for working with file paths."""
 2 | 
 3 | import platform
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def _is_safe_symlink(symlink_path: Path, base_path: Path) -> bool:
 8 |     """Return ``True`` if ``symlink_path`` resolves inside ``base_path``.
 9 | 
10 |     Parameters
11 |     ----------
12 |     symlink_path : Path
13 |         Symlink whose target should be validated.
14 |     base_path : Path
15 |         Directory that the symlink target must remain within.
16 | 
17 |     Returns
18 |     -------
19 |     bool
20 |         Whether the symlink is “safe” (i.e., does not escape ``base_path``).
21 | 
22 |     """
23 |     # On Windows a non-symlink is immediately unsafe
24 |     if platform.system() == "Windows" and not symlink_path.is_symlink():
25 |         return False
26 | 
27 |     try:
28 |         target_path = symlink_path.resolve()
29 |         base_resolved = base_path.resolve()
30 |     except (OSError, ValueError):
31 |         # Any resolution error → treat as unsafe
32 |         return False
33 | 
34 |     return base_resolved in target_path.parents or target_path == base_resolved
35 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/query_parser_utils.py:
--------------------------------------------------------------------------------
  1 | """Utility functions for parsing and validating query parameters."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import string
  6 | 
  7 | HEX_DIGITS: set[str] = set(string.hexdigits)
  8 | 
  9 | 
 10 | KNOWN_GIT_HOSTS: list[str] = [
 11 |     "github.com",
 12 |     "gitlab.com",
 13 |     "bitbucket.org",
 14 |     "gitea.com",
 15 |     "codeberg.org",
 16 |     "gist.github.com",
 17 | ]
 18 | 
 19 | 
 20 | def _is_valid_git_commit_hash(commit: str) -> bool:
 21 |     """Validate if the provided string is a valid Git commit hash.
 22 | 
 23 |     This function checks if the commit hash is a 40-character string consisting only
 24 |     of hexadecimal digits, which is the standard format for Git commit hashes.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     commit : str
 29 |         The string to validate as a Git commit hash.
 30 | 
 31 |     Returns
 32 |     -------
 33 |     bool
 34 |         ``True`` if the string is a valid 40-character Git commit hash, otherwise ``False``.
 35 | 
 36 |     """
 37 |     sha_hex_length = 40
 38 |     return len(commit) == sha_hex_length and all(c in HEX_DIGITS for c in commit)
 39 | 
 40 | 
 41 | def _is_valid_pattern(pattern: str) -> bool:
 42 |     """Validate if the given pattern contains only valid characters.
 43 | 
 44 |     This function checks if the pattern contains only alphanumeric characters or one
 45 |     of the following allowed characters: dash ('-'), underscore ('_'), dot ('.'),
 46 |     forward slash ('/'), plus ('+'), asterisk ('*'), or the at sign ('@').
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     pattern : str
 51 |         The pattern to validate.
 52 | 
 53 |     Returns
 54 |     -------
 55 |     bool
 56 |         ``True`` if the pattern is valid, otherwise ``False``.
 57 | 
 58 |     """
 59 |     return all(c.isalnum() or c in "-_./+*@" for c in pattern)
 60 | 
 61 | 
 62 | def _validate_host(host: str) -> None:
 63 |     """Validate a hostname.
 64 | 
 65 |     The host is accepted if it is either present in the hard-coded ``KNOWN_GIT_HOSTS`` list or if it satisfies the
 66 |     simple heuristics in ``_looks_like_git_host``, which try to recognise common self-hosted Git services (e.g. GitLab
 67 |     instances on sub-domains such as 'gitlab.example.com' or 'git.example.com').
 68 | 
 69 |     Parameters
 70 |     ----------
 71 |     host : str
 72 |         Hostname (case-insensitive).
 73 | 
 74 |     Raises
 75 |     ------
 76 |     ValueError
 77 |         If the host cannot be recognised as a probable Git hosting domain.
 78 | 
 79 |     """
 80 |     host = host.lower()
 81 |     if host not in KNOWN_GIT_HOSTS and not _looks_like_git_host(host):
 82 |         msg = f"Unknown domain '{host}' in URL"
 83 |         raise ValueError(msg)
 84 | 
 85 | 
 86 | def _looks_like_git_host(host: str) -> bool:
 87 |     """Check if the given host looks like a Git host.
 88 | 
 89 |     The current heuristic returns ``True`` when the host starts with ``git.`` (e.g. 'git.example.com'), starts with
 90 |     'gitlab.' (e.g. 'gitlab.company.com'), or starts with 'github.' (e.g. 'github.company.com' for GitHub Enterprise).
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     host : str
 95 |         Hostname (case-insensitive).
 96 | 
 97 |     Returns
 98 |     -------
 99 |     bool
100 |         ``True`` if the host looks like a Git host, otherwise ``False``.
101 | 
102 |     """
103 |     host = host.lower()
104 |     return host.startswith(("git.", "gitlab.", "github."))
105 | 
106 | 
107 | def _validate_url_scheme(scheme: str) -> None:
108 |     """Validate the given scheme against the known schemes.
109 | 
110 |     Parameters
111 |     ----------
112 |     scheme : str
113 |         The scheme to validate.
114 | 
115 |     Raises
116 |     ------
117 |     ValueError
118 |         If the scheme is not 'http' or 'https'.
119 | 
120 |     """
121 |     scheme = scheme.lower()
122 |     if scheme not in ("https", "http"):
123 |         msg = f"Invalid URL scheme '{scheme}' in URL"
124 |         raise ValueError(msg)
125 | 
126 | 
127 | def _get_user_and_repo_from_path(path: str) -> tuple[str, str]:
128 |     """Extract the user and repository names from a given path.
129 | 
130 |     Parameters
131 |     ----------
132 |     path : str
133 |         The path to extract the user and repository names from.
134 | 
135 |     Returns
136 |     -------
137 |     tuple[str, str]
138 |         A tuple containing the user and repository names.
139 | 
140 |     Raises
141 |     ------
142 |     ValueError
143 |         If the path does not contain at least two parts.
144 | 
145 |     """
146 |     min_path_parts = 2
147 |     path_parts = path.lower().strip("/").split("/")
148 |     if len(path_parts) < min_path_parts:
149 |         msg = f"Invalid repository URL '{path}'"
150 |         raise ValueError(msg)
151 |     return path_parts[0], path_parts[1]
152 | 


--------------------------------------------------------------------------------
/src/gitingest/utils/timeout_wrapper.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for the Gitingest package."""
 2 | 
 3 | import asyncio
 4 | import functools
 5 | from typing import Awaitable, Callable, TypeVar
 6 | 
 7 | from gitingest.utils.compat_typing import ParamSpec
 8 | from gitingest.utils.exceptions import AsyncTimeoutError
 9 | 
10 | T = TypeVar("T")
11 | P = ParamSpec("P")
12 | 
13 | 
14 | def async_timeout(seconds: int) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
15 |     """Async Timeout decorator.
16 | 
17 |     This decorator wraps an asynchronous function and ensures it does not run for
18 |     longer than the specified number of seconds. If the function execution exceeds
19 |     this limit, it raises an ``AsyncTimeoutError``.
20 | 
21 |     Parameters
22 |     ----------
23 |     seconds : int
24 |         The maximum allowed time (in seconds) for the asynchronous function to complete.
25 | 
26 |     Returns
27 |     -------
28 |     Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]
29 |         A decorator that, when applied to an async function, ensures the function
30 |         completes within the specified time limit. If the function takes too long,
31 |         an ``AsyncTimeoutError`` is raised.
32 | 
33 |     """
34 | 
35 |     def decorator(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
36 |         @functools.wraps(func)
37 |         async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
38 |             try:
39 |                 return await asyncio.wait_for(func(*args, **kwargs), timeout=seconds)
40 |             except asyncio.TimeoutError as exc:
41 |                 msg = f"Operation timed out after {seconds} seconds"
42 |                 raise AsyncTimeoutError(msg) from exc
43 | 
44 |         return wrapper
45 | 
46 |     return decorator
47 | 


--------------------------------------------------------------------------------
/src/server/__init__.py:
--------------------------------------------------------------------------------
1 | """Server module."""
2 | 


--------------------------------------------------------------------------------
/src/server/form_types.py:
--------------------------------------------------------------------------------
 1 | """Reusable form type aliases for FastAPI form parameters."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Optional
 6 | 
 7 | from fastapi import Form
 8 | 
 9 | from gitingest.utils.compat_typing import Annotated
10 | 
11 | if TYPE_CHECKING:
12 |     from gitingest.utils.compat_typing import TypeAlias
13 | 
14 | StrForm: TypeAlias = Annotated[str, Form(...)]
15 | IntForm: TypeAlias = Annotated[int, Form(...)]
16 | OptStrForm: TypeAlias = Annotated[Optional[str], Form()]
17 | 


--------------------------------------------------------------------------------
/src/server/main.py:
--------------------------------------------------------------------------------
  1 | """Main module for the FastAPI application."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import os
  6 | import threading
  7 | from pathlib import Path
  8 | 
  9 | import sentry_sdk
 10 | from dotenv import load_dotenv
 11 | from fastapi import FastAPI, Request
 12 | from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
 13 | from fastapi.staticfiles import StaticFiles
 14 | from slowapi.errors import RateLimitExceeded
 15 | from starlette.middleware.trustedhost import TrustedHostMiddleware
 16 | 
 17 | from server.metrics_server import start_metrics_server
 18 | from server.routers import dynamic, index, ingest
 19 | from server.server_config import templates
 20 | from server.server_utils import lifespan, limiter, rate_limit_exception_handler
 21 | 
 22 | # Load environment variables from .env file
 23 | load_dotenv()
 24 | 
 25 | # Initialize Sentry SDK if enabled
 26 | if os.getenv("GITINGEST_SENTRY_ENABLED") is not None:
 27 |     sentry_dsn = os.getenv("GITINGEST_SENTRY_DSN")
 28 | 
 29 |     # Only initialize Sentry if DSN is provided
 30 |     if sentry_dsn:
 31 |         # Configure Sentry options from environment variables
 32 |         traces_sample_rate = float(os.getenv("GITINGEST_SENTRY_TRACES_SAMPLE_RATE", "1.0"))
 33 |         profile_session_sample_rate = float(os.getenv("GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE", "1.0"))
 34 |         profile_lifecycle = os.getenv("GITINGEST_SENTRY_PROFILE_LIFECYCLE", "trace")
 35 |         send_default_pii = os.getenv("GITINGEST_SENTRY_SEND_DEFAULT_PII", "true").lower() == "true"
 36 |         sentry_environment = os.getenv("GITINGEST_SENTRY_ENVIRONMENT", "")
 37 | 
 38 |         sentry_sdk.init(
 39 |             dsn=sentry_dsn,
 40 |             # Add data like request headers and IP for users
 41 |             send_default_pii=send_default_pii,
 42 |             # Set traces_sample_rate to capture transactions for tracing
 43 |             traces_sample_rate=traces_sample_rate,
 44 |             # Set profile_session_sample_rate to profile sessions
 45 |             profile_session_sample_rate=profile_session_sample_rate,
 46 |             # Set profile_lifecycle to automatically run the profiler
 47 |             profile_lifecycle=profile_lifecycle,
 48 |             # Set environment name
 49 |             environment=sentry_environment,
 50 |         )
 51 | 
 52 | # Initialize the FastAPI application with lifespan
 53 | app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None)
 54 | app.state.limiter = limiter
 55 | 
 56 | # Register the custom exception handler for rate limits
 57 | app.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler)
 58 | 
 59 | # Start metrics server in a separate thread if enabled
 60 | if os.getenv("GITINGEST_METRICS_ENABLED") is not None:
 61 |     metrics_host = os.getenv("GITINGEST_METRICS_HOST", "127.0.0.1")
 62 |     metrics_port = int(os.getenv("GITINGEST_METRICS_PORT", "9090"))
 63 |     metrics_thread = threading.Thread(
 64 |         target=start_metrics_server,
 65 |         args=(metrics_host, metrics_port),
 66 |         daemon=True,
 67 |     )
 68 |     metrics_thread.start()
 69 | 
 70 | 
 71 | # Mount static files dynamically to serve CSS, JS, and other static assets
 72 | static_dir = Path(__file__).parent.parent / "static"
 73 | app.mount("/static", StaticFiles(directory=static_dir), name="static")
 74 | 
 75 | 
 76 | # Fetch allowed hosts from the environment or use the default values
 77 | allowed_hosts = os.getenv("ALLOWED_HOSTS")
 78 | if allowed_hosts:
 79 |     allowed_hosts = allowed_hosts.split(",")
 80 | else:
 81 |     # Define the default allowed hosts for the application
 82 |     default_allowed_hosts = ["gitingest.com", "*.gitingest.com", "localhost", "127.0.0.1"]
 83 |     allowed_hosts = default_allowed_hosts
 84 | 
 85 | # Add middleware to enforce allowed hosts
 86 | app.add_middleware(TrustedHostMiddleware, allowed_hosts=allowed_hosts)
 87 | 
 88 | 
 89 | @app.get("/health")
 90 | async def health_check() -> dict[str, str]:
 91 |     """Health check endpoint to verify that the server is running.
 92 | 
 93 |     **Returns**
 94 | 
 95 |     - **dict[str, str]**: A JSON object with a "status" key indicating the server's health status.
 96 | 
 97 |     """
 98 |     return {"status": "healthy"}
 99 | 
100 | 
101 | @app.head("/", include_in_schema=False)
102 | async def head_root() -> HTMLResponse:
103 |     """Respond to HTTP HEAD requests for the root URL.
104 | 
105 |     **This endpoint mirrors the headers and status code of the index page**
106 |     for HTTP HEAD requests, providing a lightweight way to check if the server
107 |     is responding without downloading the full page content.
108 | 
109 |     **Returns**
110 | 
111 |     - **HTMLResponse**: An empty HTML response with appropriate headers
112 | 
113 |     """
114 |     return HTMLResponse(content=None, headers={"content-type": "text/html; charset=utf-8"})
115 | 
116 | 
117 | @app.get("/robots.txt", include_in_schema=False)
118 | async def robots() -> FileResponse:
119 |     """Serve the robots.txt file to guide search engine crawlers.
120 | 
121 |     **This endpoint serves the ``robots.txt`` file located in the static directory**
122 |     to provide instructions to search engine crawlers about which parts of the site
123 |     they should or should not index.
124 | 
125 |     **Returns**
126 | 
127 |     - **FileResponse**: The ``robots.txt`` file located in the static directory
128 | 
129 |     """
130 |     return FileResponse("static/robots.txt")
131 | 
132 | 
133 | @app.get("/llms.txt")
134 | async def llm_txt() -> FileResponse:
135 |     """Serve the llm.txt file to provide information about the site to LLMs.
136 | 
137 |     **This endpoint serves the ``llms.txt`` file located in the static directory**
138 |     to provide information about the site to Large Language Models (LLMs)
139 |     and other AI systems that may be crawling the site.
140 | 
141 |     **Returns**
142 | 
143 |     - **FileResponse**: The ``llms.txt`` file located in the static directory
144 | 
145 |     """
146 |     return FileResponse("static/llms.txt")
147 | 
148 | 
149 | @app.get("/docs", response_class=HTMLResponse, include_in_schema=False)
150 | async def custom_swagger_ui(request: Request) -> HTMLResponse:
151 |     """Serve custom Swagger UI documentation.
152 | 
153 |     **This endpoint serves a custom Swagger UI interface**
154 |     for the API documentation, providing an interactive way to explore
155 |     and test the available endpoints.
156 | 
157 |     **Parameters**
158 | 
159 |     - **request** (`Request`): The incoming HTTP request
160 | 
161 |     **Returns**
162 | 
163 |     - **HTMLResponse**: Custom Swagger UI documentation page
164 | 
165 |     """
166 |     return templates.TemplateResponse("swagger_ui.jinja", {"request": request})
167 | 
168 | 
169 | @app.get("/api", include_in_schema=True)
170 | def openapi_json_get() -> JSONResponse:
171 |     """Return the OpenAPI schema.
172 | 
173 |     **This endpoint returns the OpenAPI schema (openapi.json)**
174 |     that describes the API structure, endpoints, and data models
175 |     for documentation and client generation purposes.
176 | 
177 |     **Returns**
178 | 
179 |     - **JSONResponse**: The OpenAPI schema as JSON
180 | 
181 |     """
182 |     return JSONResponse(app.openapi())
183 | 
184 | 
185 | @app.api_route("/api", methods=["POST", "PUT", "DELETE", "OPTIONS", "HEAD"], include_in_schema=False)
186 | @app.api_route("/api/", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD"], include_in_schema=False)
187 | def openapi_json() -> JSONResponse:
188 |     """Return the OpenAPI schema for various HTTP methods.
189 | 
190 |     **This endpoint returns the OpenAPI schema (openapi.json)**
191 |     for multiple HTTP methods, providing API documentation
192 |     for clients that may use different request methods.
193 | 
194 |     **Returns**
195 | 
196 |     - **JSONResponse**: The OpenAPI schema as JSON
197 | 
198 |     """
199 |     return JSONResponse(app.openapi())
200 | 
201 | 
202 | # Include routers for modular endpoints
203 | app.include_router(index)
204 | app.include_router(ingest)
205 | app.include_router(dynamic)
206 | 


--------------------------------------------------------------------------------
/src/server/metrics_server.py:
--------------------------------------------------------------------------------
 1 | """Prometheus metrics server running on a separate port."""
 2 | 
 3 | import logging
 4 | 
 5 | import uvicorn
 6 | from fastapi import FastAPI
 7 | from fastapi.responses import HTMLResponse
 8 | from prometheus_client import REGISTRY, generate_latest
 9 | 
10 | # Create a logger for this module
11 | logger = logging.getLogger(__name__)
12 | 
13 | # Create a separate FastAPI app for metrics
14 | metrics_app = FastAPI(
15 |     title="Gitingest Metrics",
16 |     description="Prometheus metrics for Gitingest",
17 |     docs_url=None,
18 |     redoc_url=None,
19 | )
20 | 
21 | 
22 | @metrics_app.get("/metrics")
23 | async def metrics() -> HTMLResponse:
24 |     """Serve Prometheus metrics without authentication.
25 | 
26 |     This endpoint is only accessible from the local network.
27 | 
28 |     Returns
29 |     -------
30 |     HTMLResponse
31 |         Prometheus metrics in text format
32 | 
33 |     """
34 |     return HTMLResponse(
35 |         content=generate_latest(REGISTRY),
36 |         status_code=200,
37 |         media_type="text/plain",
38 |     )
39 | 
40 | 
41 | def start_metrics_server(host: str = "127.0.0.1", port: int = 9090) -> None:
42 |     """Start the metrics server on a separate port.
43 | 
44 |     Parameters
45 |     ----------
46 |     host : str
47 |         The host to bind to (default: 127.0.0.1 for local network only)
48 |     port : int
49 |         The port to bind to (default: 9090)
50 | 
51 |     Returns
52 |     -------
53 |     None
54 | 
55 |     """
56 |     logger.info("Starting metrics server on %s:%s", host, port)
57 |     uvicorn.run(metrics_app, host=host, port=port)
58 | 


--------------------------------------------------------------------------------
/src/server/models.py:
--------------------------------------------------------------------------------
  1 | """Pydantic models for the query form."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from enum import Enum
  6 | from typing import Union
  7 | 
  8 | from pydantic import BaseModel, Field, field_validator
  9 | 
 10 | # needed for type checking (pydantic)
 11 | from server.form_types import IntForm, OptStrForm, StrForm  # noqa: TC001 (typing-only-first-party-import)
 12 | 
 13 | 
 14 | class PatternType(str, Enum):
 15 |     """Enumeration for pattern types used in file filtering."""
 16 | 
 17 |     INCLUDE = "include"
 18 |     EXCLUDE = "exclude"
 19 | 
 20 | 
 21 | class IngestRequest(BaseModel):
 22 |     """Request model for the /api/ingest endpoint.
 23 | 
 24 |     Attributes
 25 |     ----------
 26 |     input_text : str
 27 |         The Git repository URL or slug to ingest.
 28 |     max_file_size : int
 29 |         Maximum file size slider position (0-500) for filtering files.
 30 |     pattern_type : PatternType
 31 |         Type of pattern to use for file filtering (include or exclude).
 32 |     pattern : str
 33 |         Glob/regex pattern string for file filtering.
 34 |     token : str | None
 35 |         GitHub personal access token (PAT) for accessing private repositories.
 36 | 
 37 |     """
 38 | 
 39 |     input_text: str = Field(..., description="Git repository URL or slug to ingest")
 40 |     max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)")
 41 |     pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering")
 42 |     pattern: str = Field(default="", description="Glob/regex pattern for file filtering")
 43 |     token: str | None = Field(default=None, description="GitHub PAT for private repositories")
 44 | 
 45 |     @field_validator("input_text")
 46 |     @classmethod
 47 |     def validate_input_text(cls, v: str) -> str:
 48 |         """Validate that input_text is not empty."""
 49 |         if not v.strip():
 50 |             err = "input_text cannot be empty"
 51 |             raise ValueError(err)
 52 |         return v.strip()
 53 | 
 54 |     @field_validator("pattern")
 55 |     @classmethod
 56 |     def validate_pattern(cls, v: str) -> str:
 57 |         """Validate pattern field."""
 58 |         return v.strip()
 59 | 
 60 | 
 61 | class IngestSuccessResponse(BaseModel):
 62 |     """Success response model for the /api/ingest endpoint.
 63 | 
 64 |     Attributes
 65 |     ----------
 66 |     repo_url : str
 67 |         The original repository URL that was processed.
 68 |     short_repo_url : str
 69 |         Short form of repository URL (user/repo).
 70 |     summary : str
 71 |         Summary of the ingestion process including token estimates.
 72 |     ingest_id : str
 73 |         Ingestion id used to download full context.
 74 |     tree : str
 75 |         File tree structure of the repository.
 76 |     content : str
 77 |         Processed content from the repository files.
 78 |     default_max_file_size : int
 79 |         The file size slider position used.
 80 |     pattern_type : str
 81 |         The pattern type used for filtering.
 82 |     pattern : str
 83 |         The pattern used for filtering.
 84 | 
 85 |     """
 86 | 
 87 |     repo_url: str = Field(..., description="Original repository URL")
 88 |     short_repo_url: str = Field(..., description="Short repository URL (user/repo)")
 89 |     summary: str = Field(..., description="Ingestion summary with token estimates")
 90 |     ingest_id: str = Field(..., description="Ingestion id used to download full context")
 91 |     tree: str = Field(..., description="File tree structure")
 92 |     content: str = Field(..., description="Processed file content")
 93 |     default_max_file_size: int = Field(..., description="File size slider position used")
 94 |     pattern_type: str = Field(..., description="Pattern type used")
 95 |     pattern: str = Field(..., description="Pattern used")
 96 | 
 97 | 
 98 | class IngestErrorResponse(BaseModel):
 99 |     """Error response model for the /api/ingest endpoint.
100 | 
101 |     Attributes
102 |     ----------
103 |     error : str
104 |         Error message describing what went wrong.
105 | 
106 |     """
107 | 
108 |     error: str = Field(..., description="Error message")
109 | 
110 | 
111 | # Union type for API responses
112 | IngestResponse = Union[IngestSuccessResponse, IngestErrorResponse]
113 | 
114 | 
115 | class QueryForm(BaseModel):
116 |     """Form data for the query.
117 | 
118 |     Attributes
119 |     ----------
120 |     input_text : str
121 |         Text or URL supplied in the form.
122 |     max_file_size : int
123 |         The maximum allowed file size for the input, specified by the user.
124 |     pattern_type : str
125 |         The type of pattern used for the query (``include`` or ``exclude``).
126 |     pattern : str
127 |         Glob/regex pattern string.
128 |     token : str | None
129 |         GitHub personal access token (PAT) for accessing private repositories.
130 | 
131 |     """
132 | 
133 |     input_text: str
134 |     max_file_size: int
135 |     pattern_type: str
136 |     pattern: str
137 |     token: str | None = None
138 | 
139 |     @classmethod
140 |     def as_form(
141 |         cls,
142 |         input_text: StrForm,
143 |         max_file_size: IntForm,
144 |         pattern_type: StrForm,
145 |         pattern: StrForm,
146 |         token: OptStrForm,
147 |     ) -> QueryForm:
148 |         """Create a QueryForm from FastAPI form parameters.
149 | 
150 |         Parameters
151 |         ----------
152 |         input_text : StrForm
153 |             The input text provided by the user.
154 |         max_file_size : IntForm
155 |             The maximum allowed file size for the input.
156 |         pattern_type : StrForm
157 |             The type of pattern used for the query (``include`` or ``exclude``).
158 |         pattern : StrForm
159 |             Glob/regex pattern string.
160 |         token : OptStrForm
161 |             GitHub personal access token (PAT) for accessing private repositories.
162 | 
163 |         Returns
164 |         -------
165 |         QueryForm
166 |             The QueryForm instance.
167 | 
168 |         """
169 |         return cls(
170 |             input_text=input_text,
171 |             max_file_size=max_file_size,
172 |             pattern_type=pattern_type,
173 |             pattern=pattern,
174 |             token=token,
175 |         )
176 | 


--------------------------------------------------------------------------------
/src/server/query_processor.py:
--------------------------------------------------------------------------------
  1 | """Process a query by parsing input, cloning a repository, and generating a summary."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from pathlib import Path
  6 | from typing import cast
  7 | 
  8 | from gitingest.clone import clone_repo
  9 | from gitingest.ingestion import ingest_query
 10 | from gitingest.query_parser import IngestionQuery, parse_query
 11 | from gitingest.utils.git_utils import validate_github_token
 12 | from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse
 13 | from server.server_config import MAX_DISPLAY_SIZE
 14 | from server.server_utils import Colors, log_slider_to_size
 15 | 
 16 | 
 17 | async def process_query(
 18 |     input_text: str,
 19 |     slider_position: int,
 20 |     pattern_type: str = "exclude",
 21 |     pattern: str = "",
 22 |     token: str | None = None,
 23 | ) -> IngestResponse:
 24 |     """Process a query by parsing input, cloning a repository, and generating a summary.
 25 | 
 26 |     Handle user input, process Git repository data, and prepare
 27 |     a response for rendering a template with the processed results or an error message.
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     input_text : str
 32 |         Input text provided by the user, typically a Git repository URL or slug.
 33 |     slider_position : int
 34 |         Position of the slider, representing the maximum file size in the query.
 35 |     pattern_type : str
 36 |         Type of pattern to use (either "include" or "exclude") (default: ``"exclude"``).
 37 |     pattern : str
 38 |         Pattern to include or exclude in the query, depending on the pattern type.
 39 |     token : str | None
 40 |         GitHub personal access token (PAT) for accessing private repositories.
 41 | 
 42 |     Returns
 43 |     -------
 44 |     IngestResponse
 45 |         A union type, corresponding to IngestErrorResponse or IngestSuccessResponse
 46 | 
 47 |     Raises
 48 |     ------
 49 |     ValueError
 50 |         If an invalid pattern type is provided.
 51 | 
 52 |     """
 53 |     if pattern_type == "include":
 54 |         include_patterns = pattern
 55 |         exclude_patterns = None
 56 |     elif pattern_type == "exclude":
 57 |         exclude_patterns = pattern
 58 |         include_patterns = None
 59 |     else:
 60 |         msg = f"Invalid pattern type: {pattern_type}"
 61 |         raise ValueError(msg)
 62 | 
 63 |     if token:
 64 |         validate_github_token(token)
 65 | 
 66 |     max_file_size = log_slider_to_size(slider_position)
 67 | 
 68 |     query: IngestionQuery | None = None
 69 |     short_repo_url = ""
 70 | 
 71 |     try:
 72 |         query = await parse_query(
 73 |             source=input_text,
 74 |             max_file_size=max_file_size,
 75 |             from_web=True,
 76 |             include_patterns=include_patterns,
 77 |             ignore_patterns=exclude_patterns,
 78 |             token=token,
 79 |         )
 80 |         query.ensure_url()
 81 | 
 82 |         # Sets the "<user>/<repo>" for the page title
 83 |         short_repo_url = f"{query.user_name}/{query.repo_name}"
 84 | 
 85 |         clone_config = query.extract_clone_config()
 86 |         await clone_repo(clone_config, token=token)
 87 | 
 88 |         summary, tree, content = ingest_query(query)
 89 | 
 90 |         local_txt_file = Path(clone_config.local_path).with_suffix(".txt")
 91 | 
 92 |         with local_txt_file.open("w", encoding="utf-8") as f:
 93 |             f.write(tree + "\n" + content)
 94 | 
 95 |     except Exception as exc:
 96 |         if query and query.url:
 97 |             _print_error(query.url, exc, max_file_size, pattern_type, pattern)
 98 |         else:
 99 |             print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<-  {Colors.END}", end="")
100 |             print(f"{Colors.RED}{exc}{Colors.END}")
101 | 
102 |         return IngestErrorResponse(error=str(exc))
103 | 
104 |     if len(content) > MAX_DISPLAY_SIZE:
105 |         content = (
106 |             f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
107 |             "download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
108 |         )
109 | 
110 |     query.ensure_url()
111 |     query.url = cast("str", query.url)
112 | 
113 |     _print_success(
114 |         url=query.url,
115 |         max_file_size=max_file_size,
116 |         pattern_type=pattern_type,
117 |         pattern=pattern,
118 |         summary=summary,
119 |     )
120 | 
121 |     return IngestSuccessResponse(
122 |         repo_url=input_text,
123 |         short_repo_url=short_repo_url,
124 |         summary=summary,
125 |         ingest_id=query.id,
126 |         tree=tree,
127 |         content=content,
128 |         default_max_file_size=slider_position,
129 |         pattern_type=pattern_type,
130 |         pattern=pattern,
131 |     )
132 | 
133 | 
134 | def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
135 |     """Print a formatted summary of the query details for debugging.
136 | 
137 |     Parameters
138 |     ----------
139 |     url : str
140 |         The URL associated with the query.
141 |     max_file_size : int
142 |         The maximum file size allowed for the query, in bytes.
143 |     pattern_type : str
144 |         Specifies the type of pattern to use, either "include" or "exclude".
145 |     pattern : str
146 |         The actual pattern string to include or exclude in the query.
147 | 
148 |     """
149 |     default_max_file_kb = 50
150 |     print(f"{Colors.WHITE}{url:<20}{Colors.END}", end="")
151 |     if int(max_file_size / 1024) != default_max_file_kb:
152 |         print(
153 |             f" | {Colors.YELLOW}Size: {int(max_file_size / 1024)}kB{Colors.END}",
154 |             end="",
155 |         )
156 |     if pattern_type == "include" and pattern != "":
157 |         print(f" | {Colors.YELLOW}Include {pattern}{Colors.END}", end="")
158 |     elif pattern_type == "exclude" and pattern != "":
159 |         print(f" | {Colors.YELLOW}Exclude {pattern}{Colors.END}", end="")
160 | 
161 | 
162 | def _print_error(url: str, exc: Exception, max_file_size: int, pattern_type: str, pattern: str) -> None:
163 |     """Print a formatted error message for debugging.
164 | 
165 |     Parameters
166 |     ----------
167 |     url : str
168 |         The URL associated with the query that caused the error.
169 |     exc : Exception
170 |         The exception raised during the query or process.
171 |     max_file_size : int
172 |         The maximum file size allowed for the query, in bytes.
173 |     pattern_type : str
174 |         Specifies the type of pattern to use, either "include" or "exclude".
175 |     pattern : str
176 |         The actual pattern string to include or exclude in the query.
177 | 
178 |     """
179 |     print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<-  {Colors.END}", end="")
180 |     _print_query(url, max_file_size, pattern_type, pattern)
181 |     print(f" | {Colors.RED}{exc}{Colors.END}")
182 | 
183 | 
184 | def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str, summary: str) -> None:
185 |     """Print a formatted success message for debugging.
186 | 
187 |     Parameters
188 |     ----------
189 |     url : str
190 |         The URL associated with the successful query.
191 |     max_file_size : int
192 |         The maximum file size allowed for the query, in bytes.
193 |     pattern_type : str
194 |         Specifies the type of pattern to use, either "include" or "exclude".
195 |     pattern : str
196 |         The actual pattern string to include or exclude in the query.
197 |     summary : str
198 |         A summary of the query result, including details like estimated tokens.
199 | 
200 |     """
201 |     estimated_tokens = summary[summary.index("Estimated tokens:") + len("Estimated ") :]
202 |     print(f"{Colors.GREEN}INFO{Colors.END}: {Colors.GREEN}<-  {Colors.END}", end="")
203 |     _print_query(url, max_file_size, pattern_type, pattern)
204 |     print(f" | {Colors.PURPLE}{estimated_tokens}{Colors.END}")
205 | 


--------------------------------------------------------------------------------
/src/server/routers/__init__.py:
--------------------------------------------------------------------------------
1 | """Module containing the routers for the FastAPI application."""
2 | 
3 | from server.routers.dynamic import router as dynamic
4 | from server.routers.index import router as index
5 | from server.routers.ingest import router as ingest
6 | 
7 | __all__ = ["dynamic", "index", "ingest"]
8 | 


--------------------------------------------------------------------------------
/src/server/routers/dynamic.py:
--------------------------------------------------------------------------------
 1 | """The dynamic router module defines handlers for dynamic path requests."""
 2 | 
 3 | from fastapi import APIRouter, Request
 4 | from fastapi.responses import HTMLResponse
 5 | 
 6 | from server.server_config import templates
 7 | 
 8 | router = APIRouter()
 9 | 
10 | 
11 | @router.get("/{full_path:path}", include_in_schema=False)
12 | async def catch_all(request: Request, full_path: str) -> HTMLResponse:
13 |     """Render a page with a Git URL based on the provided path.
14 | 
15 |     This endpoint catches all GET requests with a dynamic path, constructs a Git URL
16 |     using the ``full_path`` parameter, and renders the ``git.jinja`` template with that URL.
17 | 
18 |     Parameters
19 |     ----------
20 |     request : Request
21 |         The incoming request object, which provides context for rendering the response.
22 |     full_path : str
23 |         The full path extracted from the URL, which is used to build the Git URL.
24 | 
25 |     Returns
26 |     -------
27 |     HTMLResponse
28 |         An HTML response containing the rendered template, with the Git URL
29 |         and other default parameters such as file size.
30 | 
31 |     """
32 |     return templates.TemplateResponse(
33 |         "git.jinja",
34 |         {
35 |             "request": request,
36 |             "repo_url": full_path,
37 |             "default_max_file_size": 243,
38 |         },
39 |     )
40 | 


--------------------------------------------------------------------------------
/src/server/routers/index.py:
--------------------------------------------------------------------------------
 1 | """Module defining the FastAPI router for the home page of the application."""
 2 | 
 3 | from fastapi import APIRouter, Request
 4 | from fastapi.responses import HTMLResponse
 5 | 
 6 | from server.server_config import EXAMPLE_REPOS, templates
 7 | 
 8 | router = APIRouter()
 9 | 
10 | 
11 | @router.get("/", response_class=HTMLResponse, include_in_schema=False)
12 | async def home(request: Request) -> HTMLResponse:
13 |     """Render the home page with example repositories and default parameters.
14 | 
15 |     This endpoint serves the home page of the application, rendering the ``index.jinja`` template
16 |     and providing it with a list of example repositories and default file size values.
17 | 
18 |     Parameters
19 |     ----------
20 |     request : Request
21 |         The incoming request object, which provides context for rendering the response.
22 | 
23 |     Returns
24 |     -------
25 |     HTMLResponse
26 |         An HTML response containing the rendered home page template, with example repositories
27 |         and other default parameters such as file size.
28 | 
29 |     """
30 |     return templates.TemplateResponse(
31 |         "index.jinja",
32 |         {
33 |             "request": request,
34 |             "examples": EXAMPLE_REPOS,
35 |             "default_max_file_size": 243,
36 |         },
37 |     )
38 | 


--------------------------------------------------------------------------------
/src/server/routers/ingest.py:
--------------------------------------------------------------------------------
  1 | """Ingest endpoint for the API."""
  2 | 
  3 | from fastapi import APIRouter, HTTPException, Request, status
  4 | from fastapi.responses import FileResponse, JSONResponse
  5 | from prometheus_client import Counter
  6 | 
  7 | from gitingest.config import TMP_BASE_PATH
  8 | from server.models import IngestRequest
  9 | from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion
 10 | from server.server_config import MAX_DISPLAY_SIZE
 11 | from server.server_utils import limiter
 12 | 
 13 | ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"])
 14 | 
 15 | router = APIRouter()
 16 | 
 17 | 
 18 | @router.post("/api/ingest", responses=COMMON_INGEST_RESPONSES)
 19 | @limiter.limit("10/minute")
 20 | async def api_ingest(
 21 |     request: Request,  # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument
 22 |     ingest_request: IngestRequest,
 23 | ) -> JSONResponse:
 24 |     """Ingest a Git repository and return processed content.
 25 | 
 26 |     **This endpoint processes a Git repository by cloning it, analyzing its structure,**
 27 |     and returning a summary with the repository's content. The response includes
 28 |     file tree structure, processed content, and metadata about the ingestion.
 29 | 
 30 |     **Parameters**
 31 | 
 32 |     - **ingest_request** (`IngestRequest`): Pydantic model containing ingestion parameters
 33 | 
 34 |     **Returns**
 35 | 
 36 |     - **JSONResponse**: Success response with ingestion results or error response with appropriate HTTP status code
 37 | 
 38 |     """
 39 |     response = await _perform_ingestion(
 40 |         input_text=ingest_request.input_text,
 41 |         max_file_size=ingest_request.max_file_size,
 42 |         pattern_type=ingest_request.pattern_type,
 43 |         pattern=ingest_request.pattern,
 44 |         token=ingest_request.token,
 45 |     )
 46 |     # limit URL to 255 characters
 47 |     ingest_counter.labels(status=response.status_code, url=ingest_request.input_text[:255]).inc()
 48 |     return response
 49 | 
 50 | 
 51 | @router.get("/api/{user}/{repository}", responses=COMMON_INGEST_RESPONSES)
 52 | @limiter.limit("10/minute")
 53 | async def api_ingest_get(
 54 |     request: Request,  # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument
 55 |     user: str,
 56 |     repository: str,
 57 |     max_file_size: int = MAX_DISPLAY_SIZE,
 58 |     pattern_type: str = "exclude",
 59 |     pattern: str = "",
 60 |     token: str = "",
 61 | ) -> JSONResponse:
 62 |     """Ingest a GitHub repository via GET and return processed content.
 63 | 
 64 |     **This endpoint processes a GitHub repository by analyzing its structure and returning a summary**
 65 |     with the repository's content. The response includes file tree structure, processed content, and
 66 |     metadata about the ingestion. All ingestion parameters are optional and can be provided as query parameters.
 67 | 
 68 |     **Path Parameters**
 69 |     - **user** (`str`): GitHub username or organization
 70 |     - **repository** (`str`): GitHub repository name
 71 | 
 72 |     **Query Parameters**
 73 |     - **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB)
 74 |     - **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude")
 75 |     - **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "")
 76 |     - **token** (`str`, optional): GitHub personal access token for private repositories (default: "")
 77 | 
 78 |     **Returns**
 79 |     - **JSONResponse**: Success response with ingestion results or error response with appropriate HTTP status code
 80 |     """
 81 |     response = await _perform_ingestion(
 82 |         input_text=f"{user}/{repository}",
 83 |         max_file_size=max_file_size,
 84 |         pattern_type=pattern_type,
 85 |         pattern=pattern,
 86 |         token=token or None,
 87 |     )
 88 |     # limit URL to 255 characters
 89 |     ingest_counter.labels(status=response.status_code, url=f"{user}/{repository}"[:255]).inc()
 90 |     return response
 91 | 
 92 | 
 93 | @router.get("/api/download/file/{ingest_id}", response_class=FileResponse)
 94 | async def download_ingest(ingest_id: str) -> FileResponse:
 95 |     """Download the first text file produced for an ingest ID.
 96 | 
 97 |     **This endpoint retrieves the first ``*.txt`` file produced during the ingestion process**
 98 |     and returns it as a downloadable file. The file is streamed with media type ``text/plain``
 99 |     and prompts the browser to download it.
100 | 
101 |     **Parameters**
102 | 
103 |     - **ingest_id** (`str`): Identifier that the ingest step emitted
104 | 
105 |     **Returns**
106 | 
107 |     - **FileResponse**: Streamed response with media type ``text/plain``
108 | 
109 |     **Raises**
110 | 
111 |     - **HTTPException**: **404** - digest directory is missing or contains no ``*.txt`` file
112 |     - **HTTPException**: **403** - the process lacks permission to read the directory or file
113 | 
114 |     """
115 |     directory = TMP_BASE_PATH / ingest_id
116 | 
117 |     if not directory.is_dir():
118 |         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Digest {ingest_id!r} not found")
119 | 
120 |     try:
121 |         first_txt_file = next(directory.glob("*.txt"))
122 |     except StopIteration as exc:
123 |         raise HTTPException(
124 |             status_code=status.HTTP_404_NOT_FOUND,
125 |             detail=f"No .txt file found for digest {ingest_id!r}",
126 |         ) from exc
127 | 
128 |     try:
129 |         return FileResponse(path=first_txt_file, media_type="text/plain", filename=first_txt_file.name)
130 |     except PermissionError as exc:
131 |         raise HTTPException(
132 |             status_code=status.HTTP_403_FORBIDDEN,
133 |             detail=f"Permission denied for {first_txt_file}",
134 |         ) from exc
135 | 


--------------------------------------------------------------------------------
/src/server/routers_utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for the ingest endpoints."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | from fastapi import status
 8 | from fastapi.responses import JSONResponse
 9 | 
10 | from server.models import IngestErrorResponse, IngestSuccessResponse
11 | from server.query_processor import process_query
12 | 
13 | COMMON_INGEST_RESPONSES: dict[int | str, dict[str, Any]] = {
14 |     status.HTTP_200_OK: {"model": IngestSuccessResponse, "description": "Successful ingestion"},
15 |     status.HTTP_400_BAD_REQUEST: {"model": IngestErrorResponse, "description": "Bad request or processing error"},
16 |     status.HTTP_500_INTERNAL_SERVER_ERROR: {"model": IngestErrorResponse, "description": "Internal server error"},
17 | }
18 | 
19 | 
20 | async def _perform_ingestion(
21 |     input_text: str,
22 |     max_file_size: int,
23 |     pattern_type: str,
24 |     pattern: str,
25 |     token: str | None,
26 | ) -> JSONResponse:
27 |     """Run ``process_query`` and wrap the result in a ``FastAPI`` ``JSONResponse``.
28 | 
29 |     Consolidates error handling shared by the ``POST`` and ``GET`` ingest endpoints.
30 |     """
31 |     try:
32 |         result = await process_query(
33 |             input_text=input_text,
34 |             slider_position=max_file_size,
35 |             pattern_type=pattern_type,
36 |             pattern=pattern,
37 |             token=token,
38 |         )
39 | 
40 |         if isinstance(result, IngestErrorResponse):
41 |             # Return structured error response with 400 status code
42 |             return JSONResponse(status_code=status.HTTP_400_BAD_REQUEST, content=result.model_dump())
43 | 
44 |         # Return structured success response with 200 status code
45 |         return JSONResponse(status_code=status.HTTP_200_OK, content=result.model_dump())
46 | 
47 |     except ValueError as ve:
48 |         # Handle validation errors with 400 status code
49 |         error_response = IngestErrorResponse(error=f"Validation error: {ve!s}")
50 |         return JSONResponse(status_code=status.HTTP_400_BAD_REQUEST, content=error_response.model_dump())
51 | 
52 |     except Exception as exc:
53 |         # Handle unexpected errors with 500 status code
54 |         error_response = IngestErrorResponse(error=f"Internal server error: {exc!s}")
55 |         return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=error_response.model_dump())
56 | 


--------------------------------------------------------------------------------
/src/server/server_config.py:
--------------------------------------------------------------------------------
 1 | """Configuration for the server."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | 
 7 | from fastapi.templating import Jinja2Templates
 8 | 
 9 | MAX_DISPLAY_SIZE: int = 300_000
10 | DELETE_REPO_AFTER: int = 60 * 60  # In seconds (1 hour)
11 | 
12 | # Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)
13 | MAX_FILE_SIZE_KB: int = 100 * 1024  # 100 MB
14 | MAX_SLIDER_POSITION: int = 500  # Maximum slider position
15 | 
16 | EXAMPLE_REPOS: list[dict[str, str]] = [
17 |     {"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"},
18 |     {"name": "FastAPI", "url": "https://github.com/tiangolo/fastapi"},
19 |     {"name": "Flask", "url": "https://github.com/pallets/flask"},
20 |     {"name": "Excalidraw", "url": "https://github.com/excalidraw/excalidraw"},
21 |     {"name": "ApiAnalytics", "url": "https://github.com/tom-draper/api-analytics"},
22 | ]
23 | 
24 | 
25 | # Use absolute path to templates directory
26 | templates_dir = Path(__file__).parent / "templates"
27 | templates = Jinja2Templates(directory=templates_dir)
28 | 


--------------------------------------------------------------------------------
/src/server/server_utils.py:
--------------------------------------------------------------------------------
  1 | """Utility functions for the server."""
  2 | 
  3 | import asyncio
  4 | import math
  5 | import shutil
  6 | import time
  7 | from contextlib import asynccontextmanager, suppress
  8 | from pathlib import Path
  9 | from typing import AsyncGenerator
 10 | 
 11 | from fastapi import FastAPI, Request
 12 | from fastapi.responses import Response
 13 | from slowapi import Limiter, _rate_limit_exceeded_handler
 14 | from slowapi.errors import RateLimitExceeded
 15 | from slowapi.util import get_remote_address
 16 | 
 17 | from gitingest.config import TMP_BASE_PATH
 18 | from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION
 19 | 
 20 | # Initialize a rate limiter
 21 | limiter = Limiter(key_func=get_remote_address)
 22 | 
 23 | 
 24 | async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response:
 25 |     """Handle rate-limiting errors with a custom exception handler.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     request : Request
 30 |         The incoming HTTP request.
 31 |     exc : Exception
 32 |         The exception raised, expected to be RateLimitExceeded.
 33 | 
 34 |     Returns
 35 |     -------
 36 |     Response
 37 |         A response indicating that the rate limit has been exceeded.
 38 | 
 39 |     Raises
 40 |     ------
 41 |     exc
 42 |         If the exception is not a RateLimitExceeded error, it is re-raised.
 43 | 
 44 |     """
 45 |     if isinstance(exc, RateLimitExceeded):
 46 |         # Delegate to the default rate limit handler
 47 |         return _rate_limit_exceeded_handler(request, exc)
 48 |     # Re-raise other exceptions
 49 |     raise exc
 50 | 
 51 | 
 52 | @asynccontextmanager
 53 | async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
 54 |     """Manage startup & graceful-shutdown tasks for the FastAPI app.
 55 | 
 56 |     Returns
 57 |     -------
 58 |     AsyncGenerator[None, None]
 59 |         Yields control back to the FastAPI application while the background task runs.
 60 | 
 61 |     """
 62 |     task = asyncio.create_task(_remove_old_repositories())
 63 | 
 64 |     yield  # app runs while the background task is alive
 65 | 
 66 |     task.cancel()  # ask the worker to stop
 67 |     with suppress(asyncio.CancelledError):
 68 |         await task  # swallow the cancellation signal
 69 | 
 70 | 
 71 | async def _remove_old_repositories(
 72 |     base_path: Path = TMP_BASE_PATH,
 73 |     scan_interval: int = 60,
 74 |     delete_after: int = DELETE_REPO_AFTER,
 75 | ) -> None:
 76 |     """Periodically delete old repositories/directories.
 77 | 
 78 |     Every ``scan_interval`` seconds the coroutine scans ``base_path`` and deletes directories older than
 79 |     ``delete_after`` seconds. The repository URL is extracted from the first ``.txt`` file in each directory
 80 |     and appended to ``history.txt``, assuming the filename format: "owner-repository.txt". Filesystem errors are
 81 |     logged and the loop continues.
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     base_path : Path
 86 |         The path to the base directory where repositories are stored (default: ``TMP_BASE_PATH``).
 87 |     scan_interval : int
 88 |         The number of seconds between scans (default: 60).
 89 |     delete_after : int
 90 |         The number of seconds after which a repository is considered old and will be deleted
 91 |         (default: ``DELETE_REPO_AFTER``).
 92 | 
 93 |     """
 94 |     while True:
 95 |         if not base_path.exists():
 96 |             await asyncio.sleep(scan_interval)
 97 |             continue
 98 | 
 99 |         now = time.time()
100 |         try:
101 |             for folder in base_path.iterdir():
102 |                 if now - folder.stat().st_ctime <= delete_after:  # Not old enough
103 |                     continue
104 | 
105 |                 await _process_folder(folder)
106 | 
107 |         except (OSError, PermissionError) as exc:
108 |             print(f"Error in _remove_old_repositories: {exc}")
109 | 
110 |         await asyncio.sleep(scan_interval)
111 | 
112 | 
113 | async def _process_folder(folder: Path) -> None:
114 |     """Append the repo URL (if discoverable) to ``history.txt`` and delete ``folder``.
115 | 
116 |     Parameters
117 |     ----------
118 |     folder : Path
119 |         The path to the folder to be processed.
120 | 
121 |     """
122 |     history_file = Path("history.txt")
123 |     loop = asyncio.get_running_loop()
124 | 
125 |     try:
126 |         first_txt_file = next(folder.glob("*.txt"))
127 |     except StopIteration:  # No .txt file found
128 |         return
129 | 
130 |     # Append owner/repo to history.txt
131 |     try:
132 |         filename = first_txt_file.stem  # "owner-repo"
133 |         if "-" in filename:
134 |             owner, repo = filename.split("-", 1)
135 |             repo_url = f"{owner}/{repo}"
136 |             await loop.run_in_executor(None, _append_line, history_file, repo_url)
137 |     except (OSError, PermissionError) as exc:
138 |         print(f"Error logging repository URL for {folder}: {exc}")
139 | 
140 |     # Delete the cloned repo
141 |     try:
142 |         await loop.run_in_executor(None, shutil.rmtree, folder)
143 |     except PermissionError as exc:
144 |         print(f"No permission to delete {folder}: {exc}")
145 |     except OSError as exc:
146 |         print(f"Could not delete {folder}: {exc}")
147 | 
148 | 
149 | def _append_line(path: Path, line: str) -> None:
150 |     """Append a line to a file.
151 | 
152 |     Parameters
153 |     ----------
154 |     path : Path
155 |         The path to the file to append the line to.
156 |     line : str
157 |         The line to append to the file.
158 | 
159 |     """
160 |     with path.open("a", encoding="utf-8") as fp:
161 |         fp.write(f"{line}\n")
162 | 
163 | 
164 | def log_slider_to_size(position: int) -> int:
165 |     """Convert a slider position to a file size in bytes using a logarithmic scale.
166 | 
167 |     Parameters
168 |     ----------
169 |     position : int
170 |         Slider position ranging from 0 to 500.
171 | 
172 |     Returns
173 |     -------
174 |     int
175 |         File size in bytes corresponding to the slider position.
176 | 
177 |     """
178 |     maxv = math.log(MAX_FILE_SIZE_KB)
179 |     return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024
180 | 
181 | 
182 | ## Color printing utility
183 | class Colors:
184 |     """ANSI color codes."""
185 | 
186 |     BLACK = "\033[0;30m"
187 |     RED = "\033[0;31m"
188 |     GREEN = "\033[0;32m"
189 |     BROWN = "\033[0;33m"
190 |     BLUE = "\033[0;34m"
191 |     PURPLE = "\033[0;35m"
192 |     CYAN = "\033[0;36m"
193 |     LIGHT_GRAY = "\033[0;37m"
194 |     DARK_GRAY = "\033[1;30m"
195 |     LIGHT_RED = "\033[1;31m"
196 |     LIGHT_GREEN = "\033[1;32m"
197 |     YELLOW = "\033[1;33m"
198 |     LIGHT_BLUE = "\033[1;34m"
199 |     LIGHT_PURPLE = "\033[1;35m"
200 |     LIGHT_CYAN = "\033[1;36m"
201 |     WHITE = "\033[1;37m"
202 |     BOLD = "\033[1m"
203 |     FAINT = "\033[2m"
204 |     ITALIC = "\033[3m"
205 |     UNDERLINE = "\033[4m"
206 |     BLINK = "\033[5m"
207 |     NEGATIVE = "\033[7m"
208 |     CROSSED = "\033[9m"
209 |     END = "\033[0m"
210 | 


--------------------------------------------------------------------------------
/src/server/templates/base.jinja:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |     <head>
 4 |         <meta charset="UTF-8">
 5 |         <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |         {# Favicons #}
 7 |         <link rel="icon" type="image/x-icon" href="/static/favicons/favicon.ico">
 8 |         <link rel="icon" type="image/svg+xml" href="/static/favicons/favicon.svg">
 9 |         <link rel="icon"
10 |               type="image/png"
11 |               href="/static/favicons/favicon-64.png"
12 |               sizes="64x64">
13 |         <link rel="apple-touch-icon"
14 |               type="image/png"
15 |               href="/static/favicons/apple-touch-icon.png"
16 |               sizes="180x180">
17 |         {# Search Engine Meta Tags #}
18 |         <meta name="title"       content="Gitingest">
19 |         <meta name="description"
20 |               content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text.">
21 |         <meta name="keywords"
22 |               content="Gitingest, AI tools, LLM integration, Ingest, Digest, Context, Prompt, Git workflow, codebase extraction, Git repository, Git automation, Summarize, prompt-friendly">
23 |         <meta name="robots"      content="index, follow">
24 |         {# Open Graph Meta Tags #}
25 |         <meta property="og:title"       content="Gitingest">
26 |         <meta property="og:description"
27 |               content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text.">
28 |         <meta property="og:type"        content="website">
29 |         <meta property="og:url"         content="{{ request.url }}">
30 |         <meta property="og:image"       content="/static/og-image.png">
31 |         {# Web App Meta #}
32 |         <meta name="apple-mobile-web-app-title"            content="Gitingest">
33 |         <meta name="application-name"                      content="Gitingest">
34 |         <meta name="theme-color"                           content="#FCA847">
35 |         <meta name="mobile-web-app-capable"                content="yes">
36 |         <meta name="apple-mobile-web-app-status-bar-style" content="default">
37 |         {# Twitter card #}
38 |         <meta name="twitter:card"        content="summary_large_image">
39 |         <meta name="twitter:title"       content="Gitingest">
40 |         <meta name="twitter:description"
41 |               content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text.">
42 |         <meta name="twitter:image"       content="/static/og-image.png">
43 |         {# Title #}
44 |         <title>
45 |             {% block title %}
46 |                 {% if short_repo_url %}
47 |                     Gitingest - {{ short_repo_url }}
48 |                 {% else %}
49 |                     Gitingest
50 |                 {% endif %}
51 |             {% endblock %}
52 |         </title>
53 |         <script src="https://cdn.tailwindcss.com"></script>
54 |         {% include 'components/tailwind_components.html' %}
55 |     </head>
56 |     <body class="bg-[#FFFDF8] min-h-screen flex flex-col">
57 |         {% include 'components/navbar.jinja' %}
58 |         {# Main content wrapper #}
59 |         <main class="flex-1 w-full">
60 |             <div class="max-w-4xl mx-auto px-4 py-8">
61 |                 {% block content %}{% endblock %}
62 |             </div>
63 |         </main>
64 |         {# Footer #}
65 |         {% include 'components/footer.jinja' %}
66 |         {# Scripts #}
67 |         <script defer src="/static/js/index.js"></script>
68 |         <script defer src="/static/js/utils.js"></script>
69 |         <script defer src="/static/js/posthog.js"></script>
70 |     </body>
71 | </html>
72 | 


--------------------------------------------------------------------------------
/src/server/templates/components/_macros.jinja:
--------------------------------------------------------------------------------
 1 | {# Icon link #}
 2 | {% macro footer_icon_link(href, icon, label) -%}
 3 |     <a href="{{ href }}"
 4 |        target="_blank"
 5 |        rel="noopener noreferrer"
 6 |        class="hover:underline flex items-center">
 7 |         <img src="/static/{{ icon }}" alt="{{ label }} logo" class="w-4 h-4 mr-1">
 8 |         {{ label }}
 9 |     </a>
10 | {%- endmacro %}
11 | 


--------------------------------------------------------------------------------
/src/server/templates/components/footer.jinja:
--------------------------------------------------------------------------------
 1 | {% from 'components/_macros.jinja' import footer_icon_link %}
 2 | <footer class="w-full border-t-[3px] border-gray-900 mt-auto">
 3 |     <div class="max-w-4xl mx-auto px-4 py-4">
 4 |         <div class="grid grid-cols-2 items-center text-gray-900 text-sm">
 5 |             {# Left column — Chrome + PyPI #}
 6 |             <div class="flex items-center space-x-4">
 7 |                 {{ footer_icon_link('https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood',
 8 |                                 'icons/chrome.svg',
 9 |                                 'Chrome Extension') }}
10 |                 {{ footer_icon_link('https://pypi.org/project/gitingest',
11 |                                 'icons/python.svg',
12 |                                 'Python Package') }}
13 |             </div>
14 |             {# Right column - Discord #}
15 |             <div class="flex justify-end">
16 |                 {{ footer_icon_link('https://discord.gg/zerRaGK9EC',
17 |                                 'icons/discord.svg',
18 |                                 'Discord') }}
19 |             </div>
20 |         </div>
21 |     </div>
22 | </footer>
23 | 


--------------------------------------------------------------------------------
/src/server/templates/components/navbar.jinja:
--------------------------------------------------------------------------------
 1 | <header class="sticky top-0 bg-[#FFFDF8] border-b-[3px] border-gray-900 z-50">
 2 |     <div class="max-w-4xl mx-auto px-4">
 3 |         <div class="flex justify-between items-center h-16">
 4 |             {# Logo #}
 5 |             <div class="flex items-center gap-4">
 6 |                 <h1 class="text-2xl font-bold tracking-tight">
 7 |                     <a href="/" class="hover:opacity-80 transition-opacity">
 8 |                         <span class="text-gray-900">Git</span><span class="text-[#FE4A60]">ingest</span>
 9 |                     </a>
10 |                 </h1>
11 |             </div>
12 |             {# Navigation with updated styling #}
13 |             <nav class="flex items-center space-x-6">
14 |                 <a href="/llms.txt" class="link-bounce flex items-center text-gray-900">
15 |                     <span class="badge-new">NEW</span>
16 |                     /llms.txt
17 |                 </a>
18 |                 {# GitHub link #}
19 |                 <div class="flex items-center gap-2">
20 |                     <a href="https://github.com/coderamp-labs/gitingest"
21 |                        target="_blank"
22 |                        rel="noopener noreferrer"
23 |                        class="link-bounce flex items-center gap-1.5 text-gray-900">
24 |                         <img src="/static/icons/github.svg" class="w-4 h-4" alt="GitHub logo">
25 |                         GitHub
26 |                     </a>
27 |                     {# Star counter #}
28 |                     <div class="no-drag flex items-center text-sm text-gray-600">
29 |                         <img src="/static/svg/github-star.svg"
30 |                              class="w-4 h-4 mr-1"
31 |                              alt="GitHub star icon">
32 |                         <span id="github-stars">0</span>
33 |                     </div>
34 |                 </div>
35 |             </nav>
36 |         </div>
37 |     </div>
38 | </header>
39 | {# Load GitHub stars script #}
40 | <script defer src="/static/js/navbar.js"></script>
41 | 


--------------------------------------------------------------------------------
/src/server/templates/components/result.jinja:
--------------------------------------------------------------------------------
  1 | <div class="mt-10">
  2 |     <!-- Error Message (hidden by default) -->
  3 |     <div id="results-error" style="display:none"></div>
  4 |     <!-- Loading Spinner (hidden by default) -->
  5 |     <div id="results-loading" style="display:none">
  6 |         <div class="relative mt-10">
  7 |             <div class="w-full h-full absolute inset-0 bg-black rounded-xl translate-y-2 translate-x-2"></div>
  8 |             <div class="bg-[#fafafa] rounded-xl border-[3px] border-gray-900 p-6 relative z-20 flex flex-col items-center space-y-4">
  9 |                 <div class="loader border-8 border-[#fff4da] border-t-8 border-t-[#ffc480] rounded-full w-16 h-16 animate-spin"></div>
 10 |                 <p class="text-lg font-bold text-gray-900">Loading...</p>
 11 |             </div>
 12 |         </div>
 13 |     </div>
 14 |     <!-- Results Section (hidden by default) -->
 15 |     <div id="results-section" style="display:none">
 16 |         <div class="relative">
 17 |             <div class="w-full h-full absolute inset-0 bg-gray-900 rounded-xl translate-y-2 translate-x-2"></div>
 18 |             <div class="bg-[#fafafa] rounded-xl border-[3px] border-gray-900 p-6 relative z-20 space-y-6">
 19 |                 <div class="grid grid-cols-1 md:grid-cols-12 gap-6">
 20 |                     <div class="md:col-span-5">
 21 |                         <div class="flex justify-between items-center mb-4 py-2">
 22 |                             <h3 class="text-lg font-bold text-gray-900">Summary</h3>
 23 |                         </div>
 24 |                         <div class="relative">
 25 |                             <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div>
 26 |                             <textarea id="result-summary"
 27 |                                       class="w-full h-[160px] p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-none focus:outline-none relative z-10"
 28 |                                       readonly></textarea>
 29 |                         </div>
 30 |                         <div class="relative mt-4 inline-block group ml-4">
 31 |                             <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div>
 32 |                             <button onclick="copyFullDigest()"
 33 |                                     class="inline-flex items-center px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10">
 34 |                                 <svg class="w-4 h-4 mr-2"
 35 |                                      fill="none"
 36 |                                      stroke="currentColor"
 37 |                                      viewBox="0 0 24 24">
 38 |                                     <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" />
 39 |                                 </svg>
 40 |                                 Copy all
 41 |                             </button>
 42 |                         </div>
 43 |                         <div class="relative mt-4 inline-block group ml-4">
 44 |                             <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div>
 45 |                             <button onclick="downloadFullDigest()"
 46 |                                     class="inline-flex items-center px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10">
 47 |                                 <svg class="w-4 h-4 mr-2"
 48 |                                      fill="none"
 49 |                                      stroke="currentColor"
 50 |                                      viewBox="0 0 24 24">
 51 |                                     <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
 52 |                                 </svg>
 53 |                                 Download
 54 |                             </button>
 55 |                         </div>
 56 |                     </div>
 57 |                     <div class="md:col-span-7">
 58 |                         <div class="flex justify-between items-center mb-4">
 59 |                             <h3 class="text-lg font-bold text-gray-900">Directory Structure</h3>
 60 |                             <div class="relative group">
 61 |                                 <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div>
 62 |                                 <button onclick="copyText('directory-structure')"
 63 |                                         class="px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10 flex items-center gap-2">
 64 |                                     <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
 65 |                                         <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" />
 66 |                                     </svg>
 67 |                                     Copy
 68 |                                 </button>
 69 |                             </div>
 70 |                         </div>
 71 |                         <div class="relative">
 72 |                             <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div>
 73 |                             <div class="directory-structure w-full p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-y focus:outline-none relative z-10 h-[215px] overflow-auto"
 74 |                                  id="directory-structure-container"
 75 |                                  readonly>
 76 |                                 <input type="hidden" id="directory-structure-content" value="" />
 77 |                                 <pre id="directory-structure-pre"></pre>
 78 |                             </div>
 79 |                         </div>
 80 |                     </div>
 81 |                 </div>
 82 |                 <div>
 83 |                     <div class="flex justify-between items-center mb-4">
 84 |                         <h3 class="text-lg font-bold text-gray-900">Files Content</h3>
 85 |                         <div class="relative group">
 86 |                             <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div>
 87 |                             <button onclick="copyText('result-text')"
 88 |                                     class="px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10 flex items-center gap-2">
 89 |                                 <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
 90 |                                     <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" />
 91 |                                 </svg>
 92 |                                 Copy
 93 |                             </button>
 94 |                         </div>
 95 |                     </div>
 96 |                     <div class="relative">
 97 |                         <div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0"></div>
 98 |                         <textarea id="result-content"
 99 |                                   class="result-text w-full p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-y focus:outline-none relative z-10"
100 |                                   style="min-height: 600px"
101 |                                   readonly></textarea>
102 |                     </div>
103 |                 </div>
104 |             </div>
105 |         </div>
106 |     </div>
107 | </div>
108 | 


--------------------------------------------------------------------------------
/src/server/templates/components/tailwind_components.html:
--------------------------------------------------------------------------------
 1 | <style type="text/tailwindcss">
 2 |   @layer components {
 3 |     .badge-new {
 4 |       @apply inline-block -rotate-6 -translate-y-1 mx-1 px-1 bg-[#FE4A60] border border-gray-900 text-white text-[10px] font-bold shadow-[2px_2px_0_0_rgba(0,0,0,1)];
 5 |     }
 6 |     .landing-page-title {
 7 |       @apply inline-block w-full relative text-center text-4xl sm:text-5xl md:text-6xl lg:text-7xl sm:pt-20 lg:pt-5 font-bold tracking-tighter;
 8 |     }
 9 |     .intro-text {
10 |       @apply text-center text-gray-600 text-lg max-w-2xl mx-auto;
11 |     }
12 |     .sparkle-red {
13 |       @apply absolute flex-shrink-0 h-auto w-14 sm:w-20 md:w-24 p-2 left-0 lg:ml-32 -translate-x-2 md:translate-x-10 lg:-translate-x-full -translate-y-4 sm:-translate-y-8 md:-translate-y-0 lg:-translate-y-10;
14 |     }
15 |     .sparkle-green {
16 |       @apply absolute flex-shrink-0 right-0 bottom-0 w-10 sm:w-16 lg:w-20 -translate-x-10 lg:-translate-x-12 translate-y-4 sm:translate-y-10 md:translate-y-2 lg:translate-y-4;
17 |     }
18 |     .pattern-select {
19 |       @apply min-w-max appearance-none pr-6 pl-2 py-2 bg-[#e6e8eb] border-r-[3px] border-gray-900 cursor-pointer focus:outline-none;
20 |     }
21 |   }
22 | 
23 |   @layer utilities {
24 |     .no-drag {
25 |       @apply pointer-events-none select-none;
26 |       -webkit-user-drag: none;
27 |     }
28 |     .link-bounce {
29 |       @apply transition-transform hover:-translate-y-0.5;
30 |     }
31 |   }
32 | </style>
33 | 


--------------------------------------------------------------------------------
/src/server/templates/git.jinja:
--------------------------------------------------------------------------------
 1 | {% extends "base.jinja" %}
 2 | {% block content %}
 3 |     {% if error_message %}
 4 |         <div class="mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700"
 5 |              id="error-message"
 6 |              data-message="{{ error_message }}">{{ error_message }}</div>
 7 |     {% endif %}
 8 |     {% with show_examples=false %}
 9 |         {% include 'components/git_form.jinja' %}
10 |     {% endwith %}
11 |     {% include 'components/result.jinja' %}
12 | {% endblock content %}
13 | 


--------------------------------------------------------------------------------
/src/server/templates/index.jinja:
--------------------------------------------------------------------------------
 1 | {% extends "base.jinja" %}
 2 | {% block content %}
 3 |     <div class="mb-8">
 4 |         <div class="relative w-full flex sm:flex-row flex-col justify-center sm:items-center">
 5 |             {# Title & Sparkles #}
 6 |             <h1 class="landing-page-title">
 7 |                 Prompt-friendly
 8 |                 <br>
 9 |                 codebase&nbsp;
10 |             </h1>
11 |             <img src="/static/svg/sparkle-red.svg" class="sparkle-red no-drag">
12 |             <img src="/static/svg/sparkle-green.svg" class="sparkle-green no-drag">
13 |         </div>
14 |         <p class="intro-text mt-8">Turn any Git repository into a simple text digest of its codebase.</p>
15 |         <p class="intro-text mt-0">This is useful for feeding a codebase into any LLM.</p>
16 |     </div>
17 |     {% if error_message %}
18 |         <div class="mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700"
19 |              id="error-message"
20 |              data-message="{{ error_message }}">{{ error_message }}</div>
21 |     {% endif %}
22 |     {% with show_examples=true %}
23 |         {% include 'components/git_form.jinja' %}
24 |     {% endwith %}
25 |     <p class="text-gray-600 text-sm max-w-2xl mx-auto text-center mt-4">
26 |         You can also replace 'hub' with 'ingest' in any GitHub URL.
27 |     </p>
28 |     {% include 'components/result.jinja' %}
29 | {% endblock %}
30 | 


--------------------------------------------------------------------------------
/src/server/templates/swagger_ui.jinja:
--------------------------------------------------------------------------------
 1 | {% extends "base.jinja" %}
 2 | {% block title %}GitIngest API{% endblock %}
 3 | {% block content %}
 4 |     <div class="mb-8">
 5 |         <div class="relative w-full flex sm:flex-row flex-col justify-center sm:items-center">
 6 |             {# Title & Sparkles #}
 7 |             <h1 class="landing-page-title">
 8 |                 GitIngest
 9 |                 <br>
10 |                 API&nbsp;
11 |             </h1>
12 |             <img src="/static/svg/sparkle-red.svg" class="sparkle-red no-drag">
13 |             <img src="/static/svg/sparkle-green.svg" class="sparkle-green no-drag">
14 |         </div>
15 |         <p class="intro-text mt-8">Turn any Git repository into a simple text digest of its codebase.</p>
16 |         <p class="intro-text mt-0">This is useful for feeding a codebase into any LLM.</p>
17 |     </div>
18 |     <div class="bg-[#fff4da] rounded-xl border-[3px] border-gray-900 p-4 md:p-8 relative z-20">
19 |         <div id="swagger-ui"></div>
20 |     </div>
21 |     <link rel="stylesheet"
22 |           href="https://unpkg.com/swagger-ui-dist@5/swagger-ui.css">
23 |     <script src="https://unpkg.com/swagger-ui-dist@5/swagger-ui-bundle.js"></script>
24 |     <script>
25 |   window.onload = function() {
26 |     SwaggerUIBundle({
27 |       url: "/openapi.json",
28 |       dom_id: '#swagger-ui',
29 |       presets: [
30 |         SwaggerUIBundle.presets.apis,
31 |         SwaggerUIBundle.SwaggerUIStandalonePreset
32 |       ],
33 |       layout: "BaseLayout",
34 |       deepLinking: true,
35 |     });
36 |   }
37 |     </script>
38 | {% endblock %}
39 | 


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/src/static/favicons/apple-touch-icon.png


--------------------------------------------------------------------------------
/src/static/favicons/favicon-64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/src/static/favicons/favicon-64.png


--------------------------------------------------------------------------------
/src/static/favicons/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/src/static/favicons/favicon.ico


--------------------------------------------------------------------------------
/src/static/favicons/favicon.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg"
 2 |      viewBox="0 0 64 64"
 3 |      width="512px"
 4 |      height="512px">
 5 |     <defs><style>.cls-1{fill:#d4e9ff;}.cls-2{fill:#f1f8ff;}.cls-3{fill:#b7daff;}</style></defs>
 6 |     <title>1</title>
 7 |     <g id="Layer_70"
 8 |        data-name="Layer 70">
 9 |         <path class="cls-1" d="M52,35V16L41,4H14.42A2.42,2.42,0,0,0,12,6.42V35H8V49h4v8.58A2.42,2.42,0,0,0,14.42,60H49.58A2.42,2.42,0,0,0,52,57.58V49h4V35Z"/>
10 |         <polygon class="cls-2" points="52 16 41 16 41 4 52 16"/>
11 |         <rect class="cls-3" x="12" y="49" width="40" height="4"/>
12 |         <path d="M19.76,46.06a.91.91,0,0,1-.63-.23.83.83,0,0,1-.26-.66.85.85,0,0,1,.25-.62.83.83,0,0,1,.62-.26.87.87,0,0,1,.63.26.83.83,0,0,1,.26.62.84.84,0,0,1-.26.65A.88.88,0,0,1,19.76,46.06Z"/>
13 |         <path d="M24.27,38.24v6.25H27.8a.93.93,0,0,1,.65.21.67.67,0,0,1,.23.52.65.65,0,0,1-.22.51,1,1,0,0,1-.65.2H23.6a.91.91,0,0,1-1.07-1.07V38.24a1.15,1.15,0,0,1,.24-.79.8.8,0,0,1,.62-.26.83.83,0,0,1,.64.26A1.13,1.13,0,0,1,24.27,38.24Z"/>
14 |         <path d="M31.52,38.24v6.25H35a.93.93,0,0,1,.65.21.67.67,0,0,1,.23.52.65.65,0,0,1-.22.51,1,1,0,0,1-.65.2h-4.2a.91.91,0,0,1-1.07-1.07V38.24a1.15,1.15,0,0,1,.24-.79.8.8,0,0,1,.62-.26.83.83,0,0,1,.64.26A1.13,1.13,0,0,1,31.52,38.24Z"/>
15 |         <path d="M40,44.62l-1.38-5.47v5.93a1.08,1.08,0,0,1-.22.74.81.81,0,0,1-1.16,0,1.07,1.07,0,0,1-.22-.74v-6.8a.85.85,0,0,1,.29-.76,1.4,1.4,0,0,1,.79-.2h.54a2.06,2.06,0,0,1,.71.09.59.59,0,0,1,.33.32,4.91,4.91,0,0,1,.24.74l1.25,4.71,1.25-4.71a4.91,4.91,0,0,1,.24-.74.59.59,0,0,1,.33-.32,2.06,2.06,0,0,1,.71-.09h.54a1.4,1.4,0,0,1,.79.2.85.85,0,0,1,.29.76v6.8a1.08,1.08,0,0,1-.22.74.75.75,0,0,1-.59.25.73.73,0,0,1-.57-.25,1.07,1.07,0,0,1-.22-.74V39.15L42.3,44.62c-.09.36-.16.62-.22.78a1.08,1.08,0,0,1-.31.45.91.91,0,0,1-.63.21.92.92,0,0,1-.84-.47,1.92,1.92,0,0,1-.18-.45Z"/>
16 |         <path d="M57,35a1,1,0,0,0-1-1H53V16s0,0,0-.06a1,1,0,0,0,0-.21s0-.05,0-.07l0,0a1,1,0,0,0-.18-.29l-11-12a1,1,0,0,0-.29-.21l0,0-.06,0A1,1,0,0,0,41.11,3H14.42A3.42,3.42,0,0,0,11,6.42V34H8a1,1,0,0,0-1,1V49s0,0,0,.07a1.08,1.08,0,0,0,.34.68l0,.05L11,52.5v5.08A3.42,3.42,0,0,0,14.42,61H49.58A3.42,3.42,0,0,0,53,57.58V52.5l3.6-2.7,0-.05a1.08,1.08,0,0,0,.34-.68s0,0,0-.07ZM21,34V5H40V16a1,1,0,0,0,1,1H51V34ZM42,6.57,49.73,15H42ZM13,6.42A1.42,1.42,0,0,1,14.42,5H19V34H13ZM9,36H55V48H9Zm4,21.58V50h6v9H14.42A1.42,1.42,0,0,1,13,57.58ZM49.58,59H21V50H51v7.58A1.42,1.42,0,0,1,49.58,59Z"/>
17 |     </g>
18 | </svg>
19 | 


--------------------------------------------------------------------------------
/src/static/icons/chrome.svg:
--------------------------------------------------------------------------------
1 | <svg role="img" viewBox="0 0 24 24"
2 |     xmlns="http://www.w3.org/2000/svg">
3 |     <title>Google Chrome</title>
4 |     <path d="M12 0C8.21 0 4.831 1.757 2.632 4.501l3.953 6.848A5.454 5.454 0 0 1 12 6.545h10.691A12 12 0 0 0 12 0zM1.931 5.47A11.943 11.943 0 0 0 0 12c0 6.012 4.42 10.991 10.189 11.864l3.953-6.847a5.45 5.45 0 0 1-6.865-2.29zm13.342 2.166a5.446 5.446 0 0 1 1.45 7.09l.002.001h-.002l-5.344 9.257c.206.01.413.016.621.016 6.627 0 12-5.373 12-12 0-1.54-.29-3.011-.818-4.364zM12 16.364a4.364 4.364 0 1 1 0-8.728 4.364 4.364 0 0 1 0 8.728Z"/>
5 | </svg>
6 | 


--------------------------------------------------------------------------------
/src/static/icons/discord.svg:
--------------------------------------------------------------------------------
1 | <svg role="img" viewBox="0 0 24 24"
2 |     xmlns="http://www.w3.org/2000/svg">
3 |     <title>Discord</title>
4 |     <path d="M20.317 4.3698a19.7913 19.7913 0 00-4.8851-1.5152.0741.0741 0 00-.0785.0371c-.211.3753-.4447.8648-.6083 1.2495-1.8447-.2762-3.68-.2762-5.4868 0-.1636-.3933-.4058-.8742-.6177-1.2495a.077.077 0 00-.0785-.037 19.7363 19.7363 0 00-4.8852 1.515.0699.0699 0 00-.0321.0277C.5334 9.0458-.319 13.5799.0992 18.0578a.0824.0824 0 00.0312.0561c2.0528 1.5076 4.0413 2.4228 5.9929 3.0294a.0777.0777 0 00.0842-.0276c.4616-.6304.8731-1.2952 1.226-1.9942a.076.076 0 00-.0416-.1057c-.6528-.2476-1.2743-.5495-1.8722-.8923a.077.077 0 01-.0076-.1277c.1258-.0943.2517-.1923.3718-.2914a.0743.0743 0 01.0776-.0105c3.9278 1.7933 8.18 1.7933 12.0614 0a.0739.0739 0 01.0785.0095c.1202.099.246.1981.3728.2924a.077.077 0 01-.0066.1276 12.2986 12.2986 0 01-1.873.8914.0766.0766 0 00-.0407.1067c.3604.698.7719 1.3628 1.225 1.9932a.076.076 0 00.0842.0286c1.961-.6067 3.9495-1.5219 6.0023-3.0294a.077.077 0 00.0313-.0552c.5004-5.177-.8382-9.6739-3.5485-13.6604a.061.061 0 00-.0312-.0286zM8.02 15.3312c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9555-2.4189 2.157-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.9555 2.4189-2.1569 2.4189zm7.9748 0c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9554-2.4189 2.1569-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.946 2.4189-2.1568 2.4189Z"/>
5 | </svg>
6 | 


--------------------------------------------------------------------------------
/src/static/icons/github.svg:
--------------------------------------------------------------------------------
1 | <svg role="img" viewBox="0 0 24 24"
2 |     xmlns="http://www.w3.org/2000/svg" fill="#000">
3 |     <path fill-rule="evenodd" d="M12 2C6.477 2 2 6.484 2 12.017c0 4.425 2.865 8.18 6.839 9.504.5.092.682-.217.682-.483 0-.237-.008-.868-.013-1.703-2.782.605-3.369-1.343-3.369-1.343-.454-1.158-1.11-1.466-1.11-1.466-.908-.62.069-.608.069-.608 1.003.07 1.531 1.032 1.531 1.032.892 1.53 2.341 1.088 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.113-4.555-4.951 0-1.093.39-1.988 1.029-2.688-.103-.253-.446-1.272.098-2.65 0 0 .84-.27 2.75 1.026A9.564 9.564 0 0112 6.844c.85.004 1.705.115 2.504.337 1.909-1.296 2.747-1.027 2.747-1.027.546 1.379.202 2.398.1 2.651.64.7 1.028 1.595 1.028 2.688 0 3.848-2.339 4.695-4.566 4.943.359.309.678.92.678 1.855 0 1.338-.012 2.419-.012 2.747 0 .268.18.58.688.482A10.019 10.019 0 0022 12.017C22 6.484 17.522 2 12 2z" clip-rule="evenodd"/>
4 | </svg>
5 | 


--------------------------------------------------------------------------------
/src/static/icons/python-color.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg version="1.0" id="svg2" sodipodi:version="0.32" inkscape:version="1.2.1 (9c6d41e410, 2022-07-14)" sodipodi:docname="python-logo-only.svg" width="83.371017pt" height="101.00108pt" inkscape:export-filename="python-logo-only.png" inkscape:export-xdpi="232.44" inkscape:export-ydpi="232.44"
 3 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 4 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 5 |    xmlns:xlink="http://www.w3.org/1999/xlink"
 6 |    xmlns="http://www.w3.org/2000/svg"
 7 |    xmlns:svg="http://www.w3.org/2000/svg"
 8 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 9 |    xmlns:cc="http://creativecommons.org/ns#"
10 |    xmlns:dc="http://purl.org/dc/elements/1.1/">
11 |    <metadata id="metadata371">
12 |       <rdf:RDF>
13 |          <cc:Work rdf:about="">
14 |             <dc:format>image/svg+xml</dc:format>
15 |             <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
16 |          </cc:Work>
17 |       </rdf:RDF>
18 |    </metadata>
19 |    <sodipodi:namedview inkscape:window-height="2080" inkscape:window-width="1976" inkscape:pageshadow="2" inkscape:pageopacity="0.0" guidetolerance="10.0" gridtolerance="10.0" objecttolerance="10.0" borderopacity="1.0" bordercolor="#666666" pagecolor="#ffffff" id="base" inkscape:zoom="2.1461642" inkscape:cx="91.558698" inkscape:cy="47.9926" inkscape:window-x="1092" inkscape:window-y="72" inkscape:current-layer="svg2" width="210mm" height="40mm" units="mm" inkscape:showpageshadow="2" inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" inkscape:document-units="pt" showgrid="false" inkscape:window-maximized="0" />
20 |    <defs id="defs4">
21 |       <linearGradient id="linearGradient2795">
22 |          <stop style="stop-color:#b8b8b8;stop-opacity:0.49803922;" offset="0" id="stop2797" />
23 |          <stop style="stop-color:#7f7f7f;stop-opacity:0;" offset="1" id="stop2799" />
24 |       </linearGradient>
25 |       <linearGradient id="linearGradient2787">
26 |          <stop style="stop-color:#7f7f7f;stop-opacity:0.5;" offset="0" id="stop2789" />
27 |          <stop style="stop-color:#7f7f7f;stop-opacity:0;" offset="1" id="stop2791" />
28 |       </linearGradient>
29 |       <linearGradient id="linearGradient3676">
30 |          <stop style="stop-color:#b2b2b2;stop-opacity:0.5;" offset="0" id="stop3678" />
31 |          <stop style="stop-color:#b3b3b3;stop-opacity:0;" offset="1" id="stop3680" />
32 |       </linearGradient>
33 |       <linearGradient id="linearGradient3236">
34 |          <stop style="stop-color:#f4f4f4;stop-opacity:1" offset="0" id="stop3244" />
35 |          <stop style="stop-color:white;stop-opacity:1" offset="1" id="stop3240" />
36 |       </linearGradient>
37 |       <linearGradient id="linearGradient4671">
38 |          <stop style="stop-color:#ffd43b;stop-opacity:1;" offset="0" id="stop4673" />
39 |          <stop style="stop-color:#ffe873;stop-opacity:1" offset="1" id="stop4675" />
40 |       </linearGradient>
41 |       <linearGradient id="linearGradient4689">
42 |          <stop style="stop-color:#5a9fd4;stop-opacity:1;" offset="0" id="stop4691" />
43 |          <stop style="stop-color:#306998;stop-opacity:1;" offset="1" id="stop4693" />
44 |       </linearGradient>
45 |       <linearGradient x1="224.23996" y1="144.75717" x2="-65.308502" y2="144.75717" id="linearGradient2987" xlink:href="#linearGradient4671" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" />
46 |       <linearGradient x1="172.94208" y1="77.475983" x2="26.670298" y2="76.313133" id="linearGradient2990" xlink:href="#linearGradient4689" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" />
47 |       <linearGradient inkscape:collect="always" xlink:href="#linearGradient4689" id="linearGradient2587" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" x1="172.94208" y1="77.475983" x2="26.670298" y2="76.313133" />
48 |       <linearGradient inkscape:collect="always" xlink:href="#linearGradient4671" id="linearGradient2589" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" x1="224.23996" y1="144.75717" x2="-65.308502" y2="144.75717" />
49 |       <linearGradient inkscape:collect="always" xlink:href="#linearGradient4689" id="linearGradient2248" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" x1="172.94208" y1="77.475983" x2="26.670298" y2="76.313133" />
50 |       <linearGradient inkscape:collect="always" xlink:href="#linearGradient4671" id="linearGradient2250" gradientUnits="userSpaceOnUse" gradientTransform="translate(100.2702,99.61116)" x1="224.23996" y1="144.75717" x2="-65.308502" y2="144.75717" />
51 |       <linearGradient inkscape:collect="always" xlink:href="#linearGradient4671" id="linearGradient2255" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.562541,0,0,0.567972,-11.5974,-7.60954)" x1="224.23996" y1="144.75717" x2="-65.308502" y2="144.75717" />
52 |       <linearGradient inkscape:collect="always" xlink:href="#linearGradient4689" id="linearGradient2258" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.562541,0,0,0.567972,-11.5974,-7.60954)" x1="172.94208" y1="76.176224" x2="26.670298" y2="76.313133" />
53 |       <radialGradient inkscape:collect="always" xlink:href="#linearGradient2795" id="radialGradient2801" cx="61.518883" cy="132.28575" fx="61.518883" fy="132.28575" r="29.036913" gradientTransform="matrix(1,0,0,0.177966,0,108.7434)" gradientUnits="userSpaceOnUse" />
54 |       <linearGradient inkscape:collect="always" xlink:href="#linearGradient4671" id="linearGradient1475" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.562541,0,0,0.567972,-14.99112,-11.702371)" x1="150.96111" y1="192.35176" x2="112.03144" y2="137.27299" />
55 |       <linearGradient inkscape:collect="always" xlink:href="#linearGradient4689" id="linearGradient1478" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.562541,0,0,0.567972,-14.99112,-11.702371)" x1="26.648937" y1="20.603781" x2="135.66525" y2="114.39767" />
56 |       <radialGradient inkscape:collect="always" xlink:href="#linearGradient2795" id="radialGradient1480" gradientUnits="userSpaceOnUse" gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-83.7008,142.46201)" cx="61.518883" cy="132.28575" fx="61.518883" fy="132.28575" r="29.036913" />
57 |    </defs>
58 |    <path style="fill:url(#linearGradient1478);fill-opacity:1" d="M 54.918785,9.1927421e-4 C 50.335132,0.02221727 45.957846,0.41313697 42.106285,1.0946693 30.760069,3.0991731 28.700036,7.2947714 28.700035,15.032169 v 10.21875 h 26.8125 v 3.40625 h -26.8125 -10.0625 c -7.792459,0 -14.6157588,4.683717 -16.7499998,13.59375 -2.46181998,10.212966 -2.57101508,16.586023 0,27.25 1.9059283,7.937852 6.4575432,13.593748 14.2499998,13.59375 h 9.21875 v -12.25 c 0,-8.849902 7.657144,-16.656248 16.75,-16.65625 h 26.78125 c 7.454951,0 13.406253,-6.138164 13.40625,-13.625 v -25.53125 c 0,-7.2663386 -6.12998,-12.7247771 -13.40625,-13.9374997 C 64.281548,0.32794397 59.502438,-0.02037903 54.918785,9.1927421e-4 Z m -14.5,8.21875012579 c 2.769547,0 5.03125,2.2986456 5.03125,5.1249996 -2e-6,2.816336 -2.261703,5.09375 -5.03125,5.09375 -2.779476,-1e-6 -5.03125,-2.277415 -5.03125,-5.09375 -10e-7,-2.826353 2.251774,-5.1249996 5.03125,-5.1249996 z" id="path1948" />
59 |    <path style="fill:url(#linearGradient1475);fill-opacity:1" d="m 85.637535,28.657169 v 11.90625 c 0,9.230755 -7.825895,16.999999 -16.75,17 h -26.78125 c -7.335833,0 -13.406249,6.278483 -13.40625,13.625 v 25.531247 c 0,7.266344 6.318588,11.540324 13.40625,13.625004 8.487331,2.49561 16.626237,2.94663 26.78125,0 6.750155,-1.95439 13.406253,-5.88761 13.40625,-13.625004 V 86.500919 h -26.78125 v -3.40625 h 26.78125 13.406254 c 7.792461,0 10.696251,-5.435408 13.406241,-13.59375 2.79933,-8.398886 2.68022,-16.475776 0,-27.25 -1.92578,-7.757441 -5.60387,-13.59375 -13.406241,-13.59375 z m -15.0625,64.65625 c 2.779478,3e-6 5.03125,2.277417 5.03125,5.093747 -2e-6,2.826354 -2.251775,5.125004 -5.03125,5.125004 -2.76955,0 -5.03125,-2.29865 -5.03125,-5.125004 2e-6,-2.81633 2.261697,-5.093747 5.03125,-5.093747 z" id="path1950" />
60 |    <ellipse style="opacity:0.44382;fill:url(#radialGradient1480);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:15.4174;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" id="path1894" cx="55.816761" cy="127.70079" rx="35.930977" ry="6.9673119" />
61 | </svg>
62 | 


--------------------------------------------------------------------------------
/src/static/icons/python.svg:
--------------------------------------------------------------------------------
1 | <svg role="img" viewBox="0 0 24 24"
2 |     xmlns="http://www.w3.org/2000/svg">
3 |     <title>Python</title>
4 |     <path d="M14.25.18l.9.2.73.26.59.3.45.32.34.34.25.34.16.33.1.3.04.26.02.2-.01.13V8.5l-.05.63-.13.55-.21.46-.26.38-.3.31-.33.25-.35.19-.35.14-.33.1-.3.07-.26.04-.21.02H8.77l-.69.05-.59.14-.5.22-.41.27-.33.32-.27.35-.2.36-.15.37-.1.35-.07.32-.04.27-.02.21v3.06H3.17l-.21-.03-.28-.07-.32-.12-.35-.18-.36-.26-.36-.36-.35-.46-.32-.59-.28-.73-.21-.88-.14-1.05-.05-1.23.06-1.22.16-1.04.24-.87.32-.71.36-.57.4-.44.42-.33.42-.24.4-.16.36-.1.32-.05.24-.01h.16l.06.01h8.16v-.83H6.18l-.01-2.75-.02-.37.05-.34.11-.31.17-.28.25-.26.31-.23.38-.2.44-.18.51-.15.58-.12.64-.1.71-.06.77-.04.84-.02 1.27.05zm-6.3 1.98l-.23.33-.08.41.08.41.23.34.33.22.41.09.41-.09.33-.22.23-.34.08-.41-.08-.41-.23-.33-.33-.22-.41-.09-.41.09zm13.09 3.95l.28.06.32.12.35.18.36.27.36.35.35.47.32.59.28.73.21.88.14 1.04.05 1.23-.06 1.23-.16 1.04-.24.86-.32.71-.36.57-.4.45-.42.33-.42.24-.4.16-.36.09-.32.05-.24.02-.16-.01h-8.22v.82h5.84l.01 2.76.02.36-.05.34-.11.31-.17.29-.25.25-.31.24-.38.2-.44.17-.51.15-.58.13-.64.09-.71.07-.77.04-.84.01-1.27-.04-1.07-.14-.9-.2-.73-.25-.59-.3-.45-.33-.34-.34-.25-.34-.16-.33-.1-.3-.04-.25-.02-.2.01-.13v-5.34l.05-.64.13-.54.21-.46.26-.38.3-.32.33-.24.35-.2.35-.14.33-.1.3-.06.26-.04.21-.02.13-.01h5.84l.69-.05.59-.14.5-.21.41-.28.33-.32.27-.35.2-.36.15-.36.1-.35.07-.32.04-.28.02-.21V6.07h2.09l.14.01zm-6.47 14.25l-.23.33-.08.41.08.41.23.33.33.23.41.08.41-.08.33-.23.23-.33.08-.41-.08-.41-.23-.33-.33-.23-.41-.08-.41.08z"/>
5 | </svg>
6 | 


--------------------------------------------------------------------------------
/src/static/js/git.js:
--------------------------------------------------------------------------------
 1 | function waitForStars() {
 2 |     return new Promise((resolve) => {
 3 |         const check = () => {
 4 |             const stars = document.getElementById('github-stars');
 5 | 
 6 |             if (stars && stars.textContent !== '0') {resolve();}
 7 |             else {setTimeout(check, 10);}
 8 |         };
 9 | 
10 |         check();
11 |     });
12 | }
13 | 
14 | document.addEventListener('DOMContentLoaded', () => {
15 |     const urlInput = document.getElementById('input_text');
16 |     const form = document.getElementById('ingestForm');
17 | 
18 |     if (urlInput && urlInput.value.trim() && form) {
19 |     // Wait for stars to be loaded before submitting
20 |         waitForStars().then(() => {
21 |             const submitEvent = new SubmitEvent('submit', {
22 |                 cancelable: true,
23 |                 bubbles: true
24 |             });
25 | 
26 |             Object.defineProperty(submitEvent, 'target', {
27 |                 value: form,
28 |                 enumerable: true
29 |             });
30 |             handleSubmit(submitEvent, true);
31 |         });
32 |     }
33 | });
34 | 


--------------------------------------------------------------------------------
/src/static/js/git_form.js:
--------------------------------------------------------------------------------
 1 | // Strike-through / un-strike file lines when the pattern-type menu flips.
 2 | function changePattern() {
 3 |     const dirPre = document.getElementById('directory-structure-pre');
 4 | 
 5 |     if (!dirPre) {return;}
 6 | 
 7 |     const treeLineElements = Array.from(dirPre.querySelectorAll('pre[name="tree-line"]'));
 8 | 
 9 |     // Skip the first tree line element
10 |     treeLineElements.slice(2).forEach((element) => {
11 |         element.classList.toggle('line-through');
12 |         element.classList.toggle('text-gray-500');
13 |     });
14 | }
15 | 
16 | // Show/hide the Personal-Access-Token section when the "Private repository" checkbox is toggled.
17 | function toggleAccessSettings() {
18 |     const container = document.getElementById('accessSettingsContainer');
19 |     const examples = document.getElementById('exampleRepositories');
20 |     const show = document.getElementById('showAccessSettings')?.checked;
21 | 
22 |     container?.classList.toggle('hidden', !show);
23 |     examples?.classList.toggle('lg:mt-0', show);
24 | }
25 | 
26 | 
27 | 
28 | document.addEventListener('DOMContentLoaded', () => {
29 |     toggleAccessSettings();
30 |     changePattern();
31 | });
32 | 
33 | 
34 | // Make them available to existing inline attributes
35 | window.changePattern = changePattern;
36 | window.toggleAccessSettings = toggleAccessSettings;
37 | 


--------------------------------------------------------------------------------
/src/static/js/index.js:
--------------------------------------------------------------------------------
 1 | function submitExample(repoName) {
 2 |     const input = document.getElementById('input_text');
 3 | 
 4 |     if (input) {
 5 |         input.value = repoName;
 6 |         input.focus();
 7 |     }
 8 | }
 9 | 
10 | // Make it visible to inline onclick handlers
11 | window.submitExample = submitExample;
12 | 


--------------------------------------------------------------------------------
/src/static/js/navbar.js:
--------------------------------------------------------------------------------
 1 | // Fetch GitHub stars
 2 | function formatStarCount(count) {
 3 |     if (count >= 1000) {return `${ (count / 1000).toFixed(1) }k`;}
 4 | 
 5 |     return count.toString();
 6 | }
 7 | 
 8 | async function fetchGitHubStars() {
 9 |     try {
10 |         const res = await fetch('https://api.github.com/repos/coderamp-labs/gitingest');
11 | 
12 |         if (!res.ok) {throw new Error(`${res.status} ${res.statusText}`);}
13 |         const data = await res.json();
14 | 
15 |         document.getElementById('github-stars').textContent =
16 |         formatStarCount(data.stargazers_count);
17 |     } catch (err) {
18 |         console.error('Error fetching GitHub stars:', err);
19 |         const el = document.getElementById('github-stars').parentElement;
20 | 
21 |         if (el) {el.style.display = 'none';}
22 |     }
23 | }
24 | 
25 | // auto-run when script loads
26 | fetchGitHubStars();
27 | 


--------------------------------------------------------------------------------
/src/static/js/posthog.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable */
 2 | !function (t, e) {
 3 |     let o, n, p, r;
 4 |     if (e.__SV) {return;}                 // already loaded
 5 | 
 6 |     window.posthog = e;
 7 |     e._i = [];
 8 |     e.init = function (i, s, a) {
 9 |         function g(t, e) {
10 |             const o = e.split(".");
11 |             if (o.length === 2) {
12 |                 t = t[o[0]];
13 |                 e = o[1];
14 |             }
15 |             t[e] = function () {
16 |                 t.push([e].concat(Array.prototype.slice.call(arguments, 0)));
17 |             };
18 |         }
19 | 
20 |         p = t.createElement("script");
21 |         p.type = "text/javascript";
22 |         p.crossOrigin = "anonymous";
23 |         p.async = true;
24 |         p.src = `${ s.api_host.replace(".i.posthog.com", "-assets.i.posthog.com") }/static/array.js`;
25 | 
26 |         r = t.getElementsByTagName("script")[0];
27 |         r.parentNode.insertBefore(p, r);
28 | 
29 |         let u = e;
30 |         if (a !== undefined) {
31 |             u = e[a] = [];
32 |         } else {
33 |             a = "posthog";
34 |         }
35 | 
36 |         u.people = u.people || [];
37 |         u.toString = function (t) {
38 |             let e = "posthog";
39 |             if (a !== "posthog") {e += `.${ a }`;}
40 |             if (!t) {e += " (stub)";}
41 |             return e;
42 |         };
43 |         u.people.toString = function () {
44 |             return `${ u.toString(1) }.people (stub)`;
45 |         };
46 | 
47 | 
48 |         o = [
49 |             "init", "capture", "register", "register_once", "register_for_session", "unregister",
50 |             "unregister_for_session", "getFeatureFlag", "getFeatureFlagPayload", "isFeatureEnabled",
51 |             "reloadFeatureFlags", "updateEarlyAccessFeatureEnrollment", "getEarlyAccessFeatures",
52 |             "on", "onFeatureFlags", "onSessionId", "getSurveys", "getActiveMatchingSurveys",
53 |             "renderSurvey", "canRenderSurvey", "getNextSurveyStep", "identify", "setPersonProperties",
54 |             "group", "resetGroups", "setPersonPropertiesForFlags", "resetPersonPropertiesForFlags",
55 |             "setGroupPropertiesForFlags", "resetGroupPropertiesForFlags", "reset", "get_distinct_id",
56 |             "getGroups", "get_session_id", "get_session_replay_url", "alias", "set_config",
57 |             "startSessionRecording", "stopSessionRecording", "sessionRecordingStarted",
58 |             "captureException", "loadToolbar", "get_property", "getSessionProperty",
59 |             "createPersonProfile", "opt_in_capturing", "opt_out_capturing",
60 |             "has_opted_in_capturing", "has_opted_out_capturing", "clear_opt_in_out_capturing",
61 |             "debug", "getPageViewId"
62 |         ];
63 | 
64 |         for (n = 0; n < o.length; n++) {g(u, o[n]);}
65 |         e._i.push([i, s, a]);
66 |     };
67 | 
68 |     e.__SV = 1;
69 | }(document, window.posthog || []);
70 | 
71 | /* Initialise PostHog */
72 | posthog.init('phc_9aNpiIVH2zfTWeY84vdTWxvrJRCQQhP5kcVDXUvcdou', {
73 |     api_host: 'https://eu.i.posthog.com',
74 |     person_profiles: 'always',
75 | });
76 | 


--------------------------------------------------------------------------------
/src/static/og-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyclotruc/gitingest/74e503fa1140feb74aa5350a32f0025c43097da1/src/static/og-image.png


--------------------------------------------------------------------------------
/src/static/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Allow: /
3 | Allow: /api/
4 | Allow: /coderamp-labs/gitingest/
5 | 


--------------------------------------------------------------------------------
/src/static/svg/github-star.svg:
--------------------------------------------------------------------------------
1 | <svg role="img" viewBox="0 0 20 20"
2 |     xmlns="http://www.w3.org/2000/svg" fill="#ffc480">
3 |     <path d="M9.049 2.927c.3-.921 1.603-.921 1.902 0l1.07 3.292a1 1 0 00.95.69h3.462c.969 0 1.371 1.24.588 1.81l-2.8 2.034a1 1 0 00-.364 1.118l1.07 3.292c.3.921-.755 1.688-1.54 1.118l-2.8-2.034a1 1 0 00-1.175 0l-2.8 2.034c-.784.57-1.838-.197-1.539-1.118l1.07-3.292a1 1 0 00-.364-1.118L2.98 8.72c-.783-.57-.38-1.81.588-1.81h3.461a1 1 0 00.951-.69l1.07-3.292z"/>
4 | </svg>
5 | 


--------------------------------------------------------------------------------
/src/static/svg/sparkle-green.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 90 80" fill="none">
2 |       <!-- Large diamond -->
3 |       <path d="m35.213 16.953.595-5.261 2.644 4.587a35.056 35.056 0 0 0 26.432 17.33l5.261.594-4.587 2.644A35.056 35.056 0 0 0 48.23 63.28l-.595 5.26-2.644-4.587a35.056 35.056 0 0 0-26.432-17.328l-5.261-.595 4.587-2.644a35.056 35.056 0 0 0 17.329-26.433Z"
4 |             fill="#5CF1A4" stroke="#000" stroke-width="2.868"/>
5 |       <!-- Sparkle rays -->
6 |       <path d="M75.062 40.108c1.07 5.255 1.072 16.52-7.472 19.54m7.422-19.682c1.836 2.965 7.643 8.14 16.187 5.121-8.544 3.02-8.207 15.23-6.971 20.957-1.97-3.343-8.044-9.274-16.588-6.254M12.054 28.012c1.34-5.22 6.126-15.4 14.554-14.369M12.035 28.162c-.274-3.487-2.93-10.719-11.358-11.75C9.104 17.443 14.013 6.262 15.414.542c.226 3.888 2.784 11.92 11.212 12.95"
7 |             stroke="#000" stroke-width="2.319" stroke-linecap="round"/>
8 | </svg>
9 | 


--------------------------------------------------------------------------------
/src/static/svg/sparkle-red.svg:
--------------------------------------------------------------------------------
 1 | 
 2 | <svg xmlns="http://www.w3.org/2000/svg"
 3 |      viewBox="0 0 90 100"
 4 |      fill="none">
 5 |     <!-- Large diamond -->
 6 |     <path d="m35.878 14.162 1.333-5.369 1.933 5.183c4.47 11.982 14.036 21.085 25.828 24.467l5.42 1.555-5.209 2.16c-11.332 4.697-19.806 14.826-22.888 27.237l-1.333 5.369-1.933-5.183C34.56 57.599 24.993 48.496 13.201 45.114l-5.42-1.555 5.21-2.16c11.331-4.697 19.805-14.826 22.887-27.237Z"
 7 |           fill="#FE4A60" stroke="#000" stroke-width="3.445"/>
 8 |     <!-- Sparkle rays -->
 9 |     <path d="M79.653 5.729c-2.436 5.323-9.515 15.25-18.341 12.374m9.197 16.336c2.6-5.851 10.008-16.834 18.842-13.956m-9.738-15.07c-.374 3.787 1.076 12.078 9.869 14.943M70.61 34.6c.503-4.21-.69-13.346-9.49-16.214M14.922 65.967c1.338 5.677 6.372 16.756 15.808 15.659M18.21 95.832c-1.392-6.226-6.54-18.404-15.984-17.305m12.85-12.892c-.41 3.771-3.576 11.588-12.968 12.681M18.025 96c.367-4.21 3.453-12.905 12.854-14"
10 |           stroke="#000" stroke-width="2.548" stroke-linecap="round"/>
11 | </svg>
12 | 


--------------------------------------------------------------------------------
/tests/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MASTER]
 2 | init-hook=
 3 |     import sys
 4 |     sys.path.append('./src')
 5 | 
 6 | [MESSAGES CONTROL]
 7 | disable=missing-class-docstring,missing-function-docstring,protected-access,fixme
 8 | 
 9 | [FORMAT]
10 | max-line-length=119
11 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for the gitingest package."""
2 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | """Fixtures for tests.
  2 | 
  3 | This file provides shared fixtures for creating sample queries, a temporary directory structure, and a helper function
  4 | to write ``.ipynb`` notebooks for testing notebook utilities.
  5 | """
  6 | 
  7 | from __future__ import annotations
  8 | 
  9 | import json
 10 | from pathlib import Path
 11 | from typing import TYPE_CHECKING, Any, Callable, Dict
 12 | from unittest.mock import AsyncMock
 13 | 
 14 | import pytest
 15 | 
 16 | from gitingest.query_parser import IngestionQuery
 17 | 
 18 | if TYPE_CHECKING:
 19 |     from pytest_mock import MockerFixture
 20 | 
 21 | WriteNotebookFunc = Callable[[str, Dict[str, Any]], Path]
 22 | 
 23 | DEMO_URL = "https://github.com/user/repo"
 24 | LOCAL_REPO_PATH = "/tmp/repo"
 25 | 
 26 | 
 27 | @pytest.fixture
 28 | def sample_query() -> IngestionQuery:
 29 |     """Provide a default ``IngestionQuery`` object for use in tests.
 30 | 
 31 |     This fixture returns a ``IngestionQuery`` pre-populated with typical fields and some default ignore patterns.
 32 | 
 33 |     Returns
 34 |     -------
 35 |     IngestionQuery
 36 |         The sample ``IngestionQuery`` object.
 37 | 
 38 |     """
 39 |     return IngestionQuery(
 40 |         user_name="test_user",
 41 |         repo_name="test_repo",
 42 |         local_path=Path("/tmp/test_repo").resolve(),
 43 |         slug="test_user/test_repo",
 44 |         id="id",
 45 |         branch="main",
 46 |         max_file_size=1_000_000,
 47 |         ignore_patterns={"*.pyc", "__pycache__", ".git"},
 48 |     )
 49 | 
 50 | 
 51 | @pytest.fixture
 52 | def temp_directory(tmp_path: Path) -> Path:
 53 |     """Create a temporary directory structure for testing repository scanning.
 54 | 
 55 |     The structure includes:
 56 |     test_repo/
 57 |     ├── file1.txt
 58 |     ├── file2.py
 59 |     ├── src/
 60 |     │   ├── subfile1.txt
 61 |     │   ├── subfile2.py
 62 |     │   └── subdir/
 63 |     │       ├── file_subdir.txt
 64 |     │       └── file_subdir.py
 65 |     ├── dir1/
 66 |     │   └── file_dir1.txt
 67 |     └── dir2/
 68 |         └── file_dir2.txt
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     tmp_path : Path
 73 |         The temporary directory path provided by the ``tmp_path`` fixture.
 74 | 
 75 |     Returns
 76 |     -------
 77 |     Path
 78 |         The path to the created ``test_repo`` directory.
 79 | 
 80 |     """
 81 |     test_dir = tmp_path / "test_repo"
 82 |     test_dir.mkdir()
 83 | 
 84 |     # Root files
 85 |     (test_dir / "file1.txt").write_text("Hello World")
 86 |     (test_dir / "file2.py").write_text("print('Hello')")
 87 | 
 88 |     # src directory and its files
 89 |     src_dir = test_dir / "src"
 90 |     src_dir.mkdir()
 91 |     (src_dir / "subfile1.txt").write_text("Hello from src")
 92 |     (src_dir / "subfile2.py").write_text("print('Hello from src')")
 93 | 
 94 |     # src/subdir and its files
 95 |     subdir = src_dir / "subdir"
 96 |     subdir.mkdir()
 97 |     (subdir / "file_subdir.txt").write_text("Hello from subdir")
 98 |     (subdir / "file_subdir.py").write_text("print('Hello from subdir')")
 99 | 
100 |     # dir1 and its file
101 |     dir1 = test_dir / "dir1"
102 |     dir1.mkdir()
103 |     (dir1 / "file_dir1.txt").write_text("Hello from dir1")
104 | 
105 |     # dir2 and its file
106 |     dir2 = test_dir / "dir2"
107 |     dir2.mkdir()
108 |     (dir2 / "file_dir2.txt").write_text("Hello from dir2")
109 | 
110 |     return test_dir
111 | 
112 | 
113 | @pytest.fixture
114 | def write_notebook(tmp_path: Path) -> WriteNotebookFunc:
115 |     """Provide a helper function to write a ``.ipynb`` notebook file with the given content.
116 | 
117 |     Parameters
118 |     ----------
119 |     tmp_path : Path
120 |         The temporary directory path provided by the ``tmp_path`` fixture.
121 | 
122 |     Returns
123 |     -------
124 |     WriteNotebookFunc
125 |         A callable that accepts a filename and a dictionary (representing JSON notebook data), writes it to a
126 |         ``.ipynb`` file, and returns the path to the file.
127 | 
128 |     """
129 | 
130 |     def _write_notebook(name: str, content: dict[str, Any]) -> Path:
131 |         notebook_path = tmp_path / name
132 |         with notebook_path.open(mode="w", encoding="utf-8") as f:
133 |             json.dump(content, f)
134 |         return notebook_path
135 | 
136 |     return _write_notebook
137 | 
138 | 
139 | @pytest.fixture
140 | def stub_branches(mocker: MockerFixture) -> Callable[[list[str]], None]:
141 |     """Return a function that stubs git branch discovery to *branches*."""
142 | 
143 |     def _factory(branches: list[str]) -> None:
144 |         mocker.patch(
145 |             "gitingest.utils.git_utils.run_command",
146 |             new_callable=AsyncMock,
147 |             return_value=("\n".join(f"refs/heads/{b}" for b in branches).encode() + b"\n", b""),
148 |         )
149 |         mocker.patch(
150 |             "gitingest.utils.git_utils.fetch_remote_branches_or_tags",
151 |             new_callable=AsyncMock,
152 |             return_value=branches,
153 |         )
154 | 
155 |     return _factory
156 | 
157 | 
158 | @pytest.fixture
159 | def repo_exists_true(mocker: MockerFixture) -> AsyncMock:
160 |     """Patch ``gitingest.clone.check_repo_exists`` to always return ``True``."""
161 |     return mocker.patch("gitingest.clone.check_repo_exists", return_value=True)
162 | 
163 | 
164 | @pytest.fixture
165 | def run_command_mock(mocker: MockerFixture) -> AsyncMock:
166 |     """Patch ``gitingest.clone.run_command`` with an ``AsyncMock``.
167 | 
168 |     The mocked function returns a dummy process whose ``communicate`` method yields generic
169 |     ``stdout`` / ``stderr`` bytes. Tests can still access / tweak the mock via the fixture argument.
170 |     """
171 |     mock_exec = mocker.patch("gitingest.clone.run_command", new_callable=AsyncMock)
172 | 
173 |     # Provide a default dummy process so most tests don't have to create one.
174 |     dummy_process = AsyncMock()
175 |     dummy_process.communicate.return_value = (b"output", b"error")
176 |     mock_exec.return_value = dummy_process
177 | 
178 |     return mock_exec
179 | 


--------------------------------------------------------------------------------
/tests/query_parser/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for the query parser."""
2 | 


--------------------------------------------------------------------------------
/tests/query_parser/test_git_host_agnostic.py:
--------------------------------------------------------------------------------
 1 | """Tests to verify that the query parser is Git host agnostic.
 2 | 
 3 | These tests confirm that ``parse_query`` correctly identifies user/repo pairs and canonical URLs for GitHub, GitLab,
 4 | Bitbucket, Gitea, and Codeberg, even if the host is omitted.
 5 | """
 6 | 
 7 | from __future__ import annotations
 8 | 
 9 | import pytest
10 | 
11 | from gitingest.query_parser import parse_query
12 | from gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS
13 | 
14 | # Repository matrix: (host, user, repo)
15 | _REPOS: list[tuple[str, str, str]] = [
16 |     ("github.com", "tiangolo", "fastapi"),
17 |     ("gitlab.com", "gitlab-org", "gitlab-runner"),
18 |     ("bitbucket.org", "na-dna", "llm-knowledge-share"),
19 |     ("gitea.com", "xorm", "xorm"),
20 |     ("codeberg.org", "forgejo", "forgejo"),
21 |     ("git.rwth-aachen.de", "medialab", "19squared"),
22 |     ("gitlab.alpinelinux.org", "alpine", "apk-tools"),
23 | ]
24 | 
25 | 
26 | # Generate cartesian product of repository tuples with URL variants.
27 | @pytest.mark.parametrize(("host", "user", "repo"), _REPOS, ids=[f"{h}:{u}/{r}" for h, u, r in _REPOS])
28 | @pytest.mark.parametrize("variant", ["full", "noscheme", "slug"])
29 | @pytest.mark.asyncio
30 | async def test_parse_query_without_host(
31 |     host: str,
32 |     user: str,
33 |     repo: str,
34 |     variant: str,
35 | ) -> None:
36 |     """Verify that ``parse_query`` handles URLs, host-omitted URLs and raw slugs."""
37 |     # Build the input URL based on the selected variant
38 |     if variant == "full":
39 |         url = f"https://{host}/{user}/{repo}"
40 |     elif variant == "noscheme":
41 |         url = f"{host}/{user}/{repo}"
42 |     else:  # "slug"
43 |         url = f"{user}/{repo}"
44 | 
45 |     expected_url = f"https://{host}/{user}/{repo}"
46 | 
47 |     # For slug form with a custom host (not in KNOWN_GIT_HOSTS) we expect a failure,
48 |     # because the parser cannot guess which domain to use.
49 |     if variant == "slug" and host not in KNOWN_GIT_HOSTS:
50 |         with pytest.raises(ValueError, match="Could not find a valid repository host"):
51 |             await parse_query(url, max_file_size=50, from_web=True)
52 |         return
53 | 
54 |     query = await parse_query(url, max_file_size=50, from_web=True)
55 | 
56 |     # Compare against the canonical dict while ignoring unpredictable fields.
57 |     actual = query.model_dump(exclude={"id", "local_path", "ignore_patterns"})
58 | 
59 |     expected = {
60 |         "user_name": user,
61 |         "repo_name": repo,
62 |         "url": expected_url,
63 |         "slug": f"{user}-{repo}",
64 |         "subpath": "/",
65 |         "type": None,
66 |         "branch": None,
67 |         "tag": None,
68 |         "commit": None,
69 |         "max_file_size": 50,
70 |         "include_patterns": None,
71 |         "include_submodules": False,
72 |     }
73 | 
74 |     assert actual == expected
75 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | """Tests for the Gitingest CLI."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from inspect import signature
  6 | from pathlib import Path
  7 | 
  8 | import pytest
  9 | from click.testing import CliRunner, Result
 10 | 
 11 | from gitingest.__main__ import main
 12 | from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME
 13 | 
 14 | 
 15 | @pytest.mark.parametrize(
 16 |     ("cli_args", "expect_file"),
 17 |     [
 18 |         pytest.param(["./"], True, id="default-options"),
 19 |         pytest.param(
 20 |             [
 21 |                 "./",
 22 |                 "--output",
 23 |                 str(OUTPUT_FILE_NAME),
 24 |                 "--max-size",
 25 |                 str(MAX_FILE_SIZE),
 26 |                 "--exclude-pattern",
 27 |                 "tests/",
 28 |                 "--include-pattern",
 29 |                 "src/",
 30 |                 "--include-submodules",
 31 |             ],
 32 |             True,
 33 |             id="custom-options",
 34 |         ),
 35 |     ],
 36 | )
 37 | def test_cli_writes_file(
 38 |     tmp_path: Path,
 39 |     monkeypatch: pytest.MonkeyPatch,
 40 |     *,
 41 |     cli_args: list[str],
 42 |     expect_file: bool,
 43 | ) -> None:
 44 |     """Run the CLI and verify that the SARIF file is created (or not)."""
 45 |     expectes_exit_code = 0
 46 |     # Work inside an isolated temp directory
 47 |     monkeypatch.chdir(tmp_path)
 48 | 
 49 |     result = _invoke_isolated_cli_runner(cli_args)
 50 | 
 51 |     assert result.exit_code == expectes_exit_code, result.stderr
 52 | 
 53 |     # Summary line should be on STDOUT
 54 |     stdout_lines = result.stdout.splitlines()
 55 |     assert f"Analysis complete! Output written to: {OUTPUT_FILE_NAME}" in stdout_lines
 56 | 
 57 |     # File side-effect
 58 |     sarif_file = tmp_path / OUTPUT_FILE_NAME
 59 |     assert sarif_file.exists() is expect_file, f"{OUTPUT_FILE_NAME} existence did not match expectation"
 60 | 
 61 | 
 62 | def test_cli_with_stdout_output() -> None:
 63 |     """Test CLI invocation with output directed to STDOUT."""
 64 |     output_file = Path(OUTPUT_FILE_NAME)
 65 |     # Clean up any existing digest.txt file before test
 66 |     if output_file.exists():
 67 |         output_file.unlink()
 68 | 
 69 |     try:
 70 |         result = _invoke_isolated_cli_runner(["./", "--output", "-", "--exclude-pattern", "tests/"])
 71 | 
 72 |         # ─── core expectations (stdout) ────────────────────────────────────-
 73 |         assert result.exit_code == 0, f"CLI exited with code {result.exit_code}, stderr: {result.stderr}"
 74 |         assert "---" in result.stdout, "Expected file separator '---' not found in STDOUT"
 75 |         assert "src/gitingest/__main__.py" in result.stdout, (
 76 |             "Expected content (e.g., src/gitingest/__main__.py) not found in STDOUT"
 77 |         )
 78 |         assert not output_file.exists(), f"Output file {output_file} was unexpectedly created."
 79 | 
 80 |         # ─── the summary must *not* pollute STDOUT, must appear on STDERR ───
 81 |         summary = "Analysis complete! Output sent to stdout."
 82 |         stdout_lines = result.stdout.splitlines()
 83 |         stderr_lines = result.stderr.splitlines()
 84 |         assert summary not in stdout_lines, "Unexpected summary message found in STDOUT"
 85 |         assert summary in stderr_lines, "Expected summary message not found in STDERR"
 86 |         assert f"Output written to: {output_file.name}" not in stderr_lines
 87 |     finally:
 88 |         # Clean up any digest.txt file that might have been created during test
 89 |         if output_file.exists():
 90 |             output_file.unlink()
 91 | 
 92 | 
 93 | def _invoke_isolated_cli_runner(args: list[str]) -> Result:
 94 |     """Return a ``CliRunner`` that keeps ``stderr`` separate on Click 8.0-8.1."""
 95 |     kwargs = {}
 96 |     if "mix_stderr" in signature(CliRunner.__init__).parameters:
 97 |         kwargs["mix_stderr"] = False  # Click 8.0-8.1
 98 |     runner = CliRunner(**kwargs)
 99 |     return runner.invoke(main, args)
100 | 


--------------------------------------------------------------------------------
/tests/test_flow_integration.py:
--------------------------------------------------------------------------------
  1 | """Integration tests covering core functionalities, edge cases, and concurrency handling."""
  2 | 
  3 | import shutil
  4 | from concurrent.futures import ThreadPoolExecutor
  5 | from pathlib import Path
  6 | from typing import Generator
  7 | 
  8 | import pytest
  9 | from fastapi import status
 10 | from fastapi.testclient import TestClient
 11 | from pytest_mock import MockerFixture
 12 | 
 13 | from src.server.main import app
 14 | 
 15 | BASE_DIR = Path(__file__).resolve().parent.parent
 16 | TEMPLATE_DIR = BASE_DIR / "src" / "templates"
 17 | 
 18 | 
 19 | @pytest.fixture(scope="module")
 20 | def test_client() -> Generator[TestClient, None, None]:
 21 |     """Create a test client fixture."""
 22 |     with TestClient(app) as client_instance:
 23 |         client_instance.headers.update({"Host": "localhost"})
 24 |         yield client_instance
 25 | 
 26 | 
 27 | @pytest.fixture(autouse=True)
 28 | def mock_static_files(mocker: MockerFixture) -> None:
 29 |     """Mock the static file mount to avoid directory errors."""
 30 |     mock_static = mocker.patch("src.server.main.StaticFiles", autospec=True)
 31 |     mock_static.return_value = None
 32 |     return mock_static
 33 | 
 34 | 
 35 | @pytest.fixture(scope="module", autouse=True)
 36 | def cleanup_tmp_dir() -> Generator[None, None, None]:
 37 |     """Remove ``/tmp/gitingest`` after this test-module is done."""
 38 |     yield  # run tests
 39 |     temp_dir = Path("/tmp/gitingest")
 40 |     if temp_dir.exists():
 41 |         try:
 42 |             shutil.rmtree(temp_dir)
 43 |         except PermissionError as exc:
 44 |             print(f"Error cleaning up {temp_dir}: {exc}")
 45 | 
 46 | 
 47 | @pytest.mark.asyncio
 48 | async def test_remote_repository_analysis(request: pytest.FixtureRequest) -> None:
 49 |     """Test the complete flow of analyzing a remote repository."""
 50 |     client = request.getfixturevalue("test_client")
 51 |     form_data = {
 52 |         "input_text": "https://github.com/octocat/Hello-World",
 53 |         "max_file_size": "243",
 54 |         "pattern_type": "exclude",
 55 |         "pattern": "",
 56 |         "token": "",
 57 |     }
 58 | 
 59 |     response = client.post("/api/ingest", json=form_data)
 60 |     assert response.status_code == status.HTTP_200_OK, f"Form submission failed: {response.text}"
 61 | 
 62 |     # Check that response is JSON
 63 |     response_data = response.json()
 64 |     assert "content" in response_data
 65 |     assert response_data["content"]
 66 |     assert "repo_url" in response_data
 67 |     assert "summary" in response_data
 68 |     assert "tree" in response_data
 69 |     assert "content" in response_data
 70 | 
 71 | 
 72 | @pytest.mark.asyncio
 73 | async def test_invalid_repository_url(request: pytest.FixtureRequest) -> None:
 74 |     """Test handling of an invalid repository URL."""
 75 |     client = request.getfixturevalue("test_client")
 76 |     form_data = {
 77 |         "input_text": "https://github.com/nonexistent/repo",
 78 |         "max_file_size": "243",
 79 |         "pattern_type": "exclude",
 80 |         "pattern": "",
 81 |         "token": "",
 82 |     }
 83 | 
 84 |     response = client.post("/api/ingest", json=form_data)
 85 |     # Should return 400 for invalid repository
 86 |     assert response.status_code == status.HTTP_400_BAD_REQUEST, f"Request failed: {response.text}"
 87 | 
 88 |     # Check that response is JSON error
 89 |     response_data = response.json()
 90 |     assert "error" in response_data
 91 | 
 92 | 
 93 | @pytest.mark.asyncio
 94 | async def test_large_repository(request: pytest.FixtureRequest) -> None:
 95 |     """Simulate analysis of a large repository with nested folders."""
 96 |     client = request.getfixturevalue("test_client")
 97 |     # TODO: ingesting a large repo take too much time (eg: godotengine/godot repository)
 98 |     form_data = {
 99 |         "input_text": "https://github.com/octocat/hello-world",
100 |         "max_file_size": "10",
101 |         "pattern_type": "exclude",
102 |         "pattern": "",
103 |         "token": "",
104 |     }
105 | 
106 |     response = client.post("/api/ingest", json=form_data)
107 |     assert response.status_code == status.HTTP_200_OK, f"Request failed: {response.text}"
108 | 
109 |     response_data = response.json()
110 |     if response.status_code == status.HTTP_200_OK:
111 |         assert "content" in response_data
112 |         assert response_data["content"]
113 |     else:
114 |         assert "error" in response_data
115 | 
116 | 
117 | @pytest.mark.asyncio
118 | async def test_concurrent_requests(request: pytest.FixtureRequest) -> None:
119 |     """Test handling of multiple concurrent requests."""
120 |     client = request.getfixturevalue("test_client")
121 | 
122 |     def make_request() -> None:
123 |         form_data = {
124 |             "input_text": "https://github.com/octocat/hello-world",
125 |             "max_file_size": "243",
126 |             "pattern_type": "exclude",
127 |             "pattern": "",
128 |             "token": "",
129 |         }
130 |         response = client.post("/api/ingest", json=form_data)
131 |         assert response.status_code == status.HTTP_200_OK, f"Request failed: {response.text}"
132 | 
133 |         response_data = response.json()
134 |         if response.status_code == status.HTTP_200_OK:
135 |             assert "content" in response_data
136 |             assert response_data["content"]
137 |         else:
138 |             assert "error" in response_data
139 | 
140 |     with ThreadPoolExecutor(max_workers=5) as executor:
141 |         futures = [executor.submit(make_request) for _ in range(5)]
142 |         for future in futures:
143 |             future.result()
144 | 
145 | 
146 | @pytest.mark.asyncio
147 | async def test_large_file_handling(request: pytest.FixtureRequest) -> None:
148 |     """Test handling of repositories with large files."""
149 |     client = request.getfixturevalue("test_client")
150 |     form_data = {
151 |         "input_text": "https://github.com/octocat/Hello-World",
152 |         "max_file_size": "1",
153 |         "pattern_type": "exclude",
154 |         "pattern": "",
155 |         "token": "",
156 |     }
157 | 
158 |     response = client.post("/api/ingest", json=form_data)
159 |     assert response.status_code == status.HTTP_200_OK, f"Request failed: {response.text}"
160 | 
161 |     response_data = response.json()
162 |     if response.status_code == status.HTTP_200_OK:
163 |         assert "content" in response_data
164 |         assert response_data["content"]
165 |     else:
166 |         assert "error" in response_data
167 | 
168 | 
169 | @pytest.mark.asyncio
170 | async def test_repository_with_patterns(request: pytest.FixtureRequest) -> None:
171 |     """Test repository analysis with include/exclude patterns."""
172 |     client = request.getfixturevalue("test_client")
173 |     form_data = {
174 |         "input_text": "https://github.com/octocat/Hello-World",
175 |         "max_file_size": "243",
176 |         "pattern_type": "include",
177 |         "pattern": "*.md",
178 |         "token": "",
179 |     }
180 | 
181 |     response = client.post("/api/ingest", json=form_data)
182 |     assert response.status_code == status.HTTP_200_OK, f"Request failed: {response.text}"
183 | 
184 |     response_data = response.json()
185 |     if response.status_code == status.HTTP_200_OK:
186 |         assert "content" in response_data
187 |         assert "pattern_type" in response_data
188 |         assert response_data["pattern_type"] == "include"
189 |         assert "pattern" in response_data
190 |         assert response_data["pattern"] == "*.md"
191 |     else:
192 |         assert "error" in response_data
193 | 


--------------------------------------------------------------------------------
/tests/test_gitignore_feature.py:
--------------------------------------------------------------------------------
 1 | """Tests for the gitignore functionality in Gitingest."""
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | from gitingest.entrypoint import ingest_async
 8 | from gitingest.utils.ignore_patterns import load_ignore_patterns
 9 | 
10 | 
11 | @pytest.fixture(name="repo_path")
12 | def repo_fixture(tmp_path: Path) -> Path:
13 |     """Create a temporary repository structure.
14 | 
15 |     The repository structure includes:
16 |     - A ``.gitignore`` that excludes ``exclude.txt``
17 |     - ``include.txt`` (should be processed)
18 |     - ``exclude.txt`` (should be skipped when gitignore rules are respected)
19 |     """
20 |     # Create a .gitignore file that excludes 'exclude.txt'
21 |     gitignore_file = tmp_path / ".gitignore"
22 |     gitignore_file.write_text("exclude.txt\n")
23 | 
24 |     # Create a file that should be included
25 |     include_file = tmp_path / "include.txt"
26 |     include_file.write_text("This file should be included.")
27 | 
28 |     # Create a file that should be excluded
29 |     exclude_file = tmp_path / "exclude.txt"
30 |     exclude_file.write_text("This file should be excluded.")
31 | 
32 |     return tmp_path
33 | 
34 | 
35 | def test_load_gitignore_patterns(tmp_path: Path) -> None:
36 |     """Test that ``load_ignore_patterns()`` correctly loads patterns from a ``.gitignore`` file."""
37 |     gitignore = tmp_path / ".gitignore"
38 |     # Write some sample patterns with a comment line included
39 |     gitignore.write_text("exclude.txt\n*.log\n# a comment\n")
40 | 
41 |     patterns = load_ignore_patterns(tmp_path, filename=".gitignore")
42 | 
43 |     # Check that the expected patterns are loaded
44 |     assert "exclude.txt" in patterns
45 |     assert "*.log" in patterns
46 |     # Ensure that comment lines are not added
47 |     for pattern in patterns:
48 |         assert not pattern.startswith("#")
49 | 
50 | 
51 | @pytest.mark.asyncio
52 | async def test_ingest_with_gitignore(repo_path: Path) -> None:
53 |     """Integration test for ``ingest_async()`` respecting ``.gitignore`` rules.
54 | 
55 |     When ``include_gitignored`` is ``False`` (default), the content of ``exclude.txt`` should be omitted.
56 |     When ``include_gitignored`` is ``True``, both files should be present.
57 |     """
58 |     # Run ingestion with the gitignore functionality enabled.
59 |     _, _, content_with_ignore = await ingest_async(source=str(repo_path))
60 |     # 'exclude.txt' should be skipped.
61 |     assert "This file should be excluded." not in content_with_ignore
62 |     # 'include.txt' should be processed.
63 |     assert "This file should be included." in content_with_ignore
64 | 
65 |     # Run ingestion with the gitignore functionality disabled.
66 |     _, _, content_without_ignore = await ingest_async(source=str(repo_path), include_gitignored=True)
67 |     # Now both files should be present.
68 |     assert "This file should be excluded." in content_without_ignore
69 |     assert "This file should be included." in content_without_ignore
70 | 


--------------------------------------------------------------------------------