├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.yml
    ├── dependabot.yml
    ├── labeler.yml
    └── workflows
    │   ├── code_quality_control.yml
    │   ├── cos_integration.yml
    │   ├── docs.yml
    │   ├── docs_test.yml
    │   ├── label.yml
    │   ├── lints.yml
    │   ├── pr_request_checks.yml
    │   ├── pull-request-links.yml
    │   ├── pylint.yml
    │   ├── python-publish.yml
    │   ├── quality.yml
    │   ├── ruff.yml
    │   ├── run_test.yml
    │   ├── stale.yml
    │   ├── test.yml
    │   ├── testing.yml
    │   ├── unit-test.yml
    │   └── welcome.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── agorabanner.png
├── docs
    ├── .DS_Store
    ├── applications
    │   ├── customer_support.md
    │   ├── enterprise.md
    │   └── marketing_agencies.md
    ├── architecture.md
    ├── assets
    │   ├── css
    │   │   └── extra.css
    │   └── img
    │   │   ├── SwarmsLogoIcon.png
    │   │   ├── swarmsbanner.png
    │   │   ├── tools
    │   │       ├── output.png
    │   │       ├── poetry_setup.png
    │   │       └── toml.png
    │   │   └── zetascale.png
    ├── bounties.md
    ├── contributing.md
    ├── demos.md
    ├── design.md
    ├── examples
    │   ├── count-tokens.md
    │   ├── index.md
    │   ├── load-and-query-pinecone.md
    │   ├── load-query-and-chat-marqo.md
    │   ├── query-webpage.md
    │   ├── store-conversation-memory-in-dynamodb.md
    │   ├── talk-to-a-pdf.md
    │   ├── talk-to-a-webpage.md
    │   ├── talk-to-redshift.md
    │   └── using-text-generation-web-ui.md
    ├── faq.md
    ├── flywheel.md
    ├── hiring.md
    ├── index.md
    ├── metric.md
    ├── overrides
    │   └── main.html
    ├── purpose.md
    ├── research.md
    ├── roadmap.md
    ├── stylesheets
    │   └── extra.css
    └── zeta
    │   ├── .DS_Store
    │   ├── index.md
    │   ├── nn
    │       ├── architecture
    │       │   ├── decoder.md
    │       │   └── transformer.md
    │       ├── attention
    │       │   ├── base.md
    │       │   ├── flash2.md
    │       │   ├── flash_attention.md
    │       │   ├── multihead.md
    │       │   └── multiquery.md
    │       ├── biases
    │       │   ├── alibi.md
    │       │   ├── relative_bias.md
    │       │   └── xpos.md
    │       ├── embeddings
    │       │   ├── multiway.md
    │       │   ├── rope.md
    │       │   └── truncated_rope.md
    │       ├── modules
    │       │   ├── lora.md
    │       │   └── token_learner.md
    │       └── utils
    │       │   └── helpers.md
    │   ├── tokenizers
    │       ├── language_tokenizer.md
    │       ├── multi_modal_tokenizer.md
    │       └── sentencepiece.md
    │   └── training
    │       ├── nebula.md
    │       ├── optimizers
    │           ├── decoupled_lion.md
    │           └── sophia.md
    │       └── train.md
├── example.py
├── mkdocs.yml
├── package
    ├── __init__.py
    ├── main.py
    └── subfolder
    │   ├── __init__.py
    │   └── main.py
├── pyproject.toml
├── requirements.txt
└── scripts
    ├── code_quality.sh
    ├── merge_all_prs.sh
    ├── test_name.sh
    └── tests.sh


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [kyegomez]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: #Nothing
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a detailed report on the bug and it's root cause. Conduct root cause error analysis
 4 | title: "[BUG] "
 5 | labels: bug
 6 | assignees: kyegomez
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is and what the main root cause error is. Test very thoroughly before submitting.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Additional context**
27 | Add any other context about the problem here.
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: 'kyegomez'
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.yml:
--------------------------------------------------------------------------------
 1 | <!-- Thank you for contributing to Zeta!
 2 | 
 3 | Replace this comment with:
 4 |   - Description: a description of the change, 
 5 |   - Issue: the issue # it fixes (if applicable),
 6 |   - Dependencies: any dependencies required for this change,
 7 |   - Tag maintainer: for a quicker response, tag the relevant maintainer (see below),
 8 |   - Twitter handle: we announce bigger features on Twitter. If your PR gets announced and you'd like a mention, we'll gladly shout you out!
 9 | 
10 | If you're adding a new integration, please include:
11 |   1. a test for the integration, preferably unit tests that do not rely on network access,
12 |   2. an example notebook showing its use.
13 | 
14 | Maintainer responsibilities:
15 |   - nn / Misc / if you don't know who to tag: kye@apac.ai
16 |   - tokenizers: kye@apac.ai
17 |   - training / Prompts: kye@apac.ai
18 |   - models: kye@apac.ai
19 | 
20 | If no one reviews your PR within a few days, feel free to kye@apac.ai
21 | 
22 | See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/kyegomez/zeta


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates
 2 | 
 3 | version: 2
 4 | updates:
 5 |   - package-ecosystem: "github-actions"
 6 |     directory: "/"
 7 |     schedule:
 8 |       interval: "weekly"
 9 | 
10 |   - package-ecosystem: "pip"
11 |     directory: "/"
12 |     schedule:
13 |       interval: "weekly"
14 | 
15 | 


--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
 1 | # this is a config file for the github action labeler
 2 | 
 3 | # Add 'label1' to any changes within 'example' folder or any subfolders
 4 | example_change:
 5 | - example/**
 6 | 
 7 | # Add 'label2' to any file changes within 'example2' folder
 8 | example2_change: example2/*
 9 | 
10 | # Add label3 to any change to .txt files within the entire repository. Quotation marks are required for the leading asterisk
11 | text_files:
12 | - '**/*.txt'
13 | 


--------------------------------------------------------------------------------
/.github/workflows/code_quality_control.yml:
--------------------------------------------------------------------------------
 1 | name: Linting and Formatting
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   lint_and_format:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install dependencies
22 |         run: pip install --no-cache-dir -r requirements.txt
23 | 
24 |       - name: Find Python files
25 |         run: find swarms_torch -name "*.py" -type f -exec autopep8 --in-place --aggressive --aggressive {} +
26 | 
27 |       - name: Push changes
28 |         uses: ad-m/github-push-action@master
29 |         with:
30 |           github_token: ${{ secrets.GITHUB_TOKEN }}


--------------------------------------------------------------------------------
/.github/workflows/cos_integration.yml:
--------------------------------------------------------------------------------
 1 | name: Continuous Integration
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout code
13 |         uses: actions/checkout@v4
14 | 
15 |       - name: Set up Python
16 |         uses: actions/setup-python@v5
17 |         with:
18 |           python-version: '3.10'
19 | 
20 |       - name: Install dependencies
21 |         run: pip install --no-cache-dir -r requirements.txt
22 | 
23 |       - name: Run unit tests
24 |         run: pytest tests/unit
25 | 
26 |       - name: Run integration tests
27 |         run: pytest tests/integration
28 | 
29 |       - name: Run code coverage
30 |         run: pytest --cov=swarms tests/
31 | 
32 |       - name: Run linters
33 |         run: pylint swarms
34 | 
35 |       - name: Build documentation
36 |         run: make docs
37 | 
38 |       - name: Validate documentation
39 |         run: sphinx-build -b linkcheck docs build/docs
40 | 
41 |       - name: Run performance tests
42 |         run: pytest tests/performance


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Docs WorkFlow
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |       - main
 8 |       - develop
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - uses: actions/setup-python@v5
15 |         with:
16 |           python-version: '3.10'
17 |       - run: pip install mkdocs-material
18 |       - run: pip install "mkdocstrings[python]"
19 |       - run: mkdocs gh-deploy --force


--------------------------------------------------------------------------------
/.github/workflows/docs_test.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install dependencies
22 |         run: pip install --no-cache-dir -r requirements.txt
23 | 
24 |       - name: Build documentation
25 |         run: make docs
26 | 
27 |       - name: Validate documentation
28 |         run: sphinx-build -b linkcheck docs build/docs


--------------------------------------------------------------------------------
/.github/workflows/label.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will triage pull requests and apply a label based on the
 2 | # paths that are modified in the pull request.
 3 | #
 4 | # To use this workflow, you will need to set up a .github/labeler.yml
 5 | # file with configuration.  For more information, see:
 6 | # https://github.com/actions/labeler
 7 | 
 8 | name: Labeler
 9 | on: [pull_request_target]
10 | 
11 | jobs:
12 |   label:
13 | 
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       contents: read
17 |       pull-requests: write
18 | 
19 |     steps:
20 |     - uses: actions/labeler@v5.0.0
21 |       with:
22 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
23 | 


--------------------------------------------------------------------------------
/.github/workflows/lints.yml:
--------------------------------------------------------------------------------
 1 | name: Linting
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   lint:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install dependencies
22 |         run: pip install --no-cache-dir -r requirements.txt
23 | 
24 |       - name: Run linters
25 |         run: pylint swarms_torch


--------------------------------------------------------------------------------
/.github/workflows/pr_request_checks.yml:
--------------------------------------------------------------------------------
 1 | name: Pull Request Checks
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install dependencies
22 |         run: pip install --no-cache-dir -r requirements.txt
23 | 
24 |       - name: Run tests and checks
25 |         run: |
26 |           pytest tests/


--------------------------------------------------------------------------------
/.github/workflows/pull-request-links.yml:
--------------------------------------------------------------------------------
 1 | name: readthedocs/actions
 2 | on:
 3 |   pull_request_target:
 4 |     types:
 5 |       - opened
 6 |     paths:
 7 |       - "docs/**"
 8 | 
 9 | permissions:
10 |   pull-requests: write
11 | 
12 | jobs:
13 |   pull-request-links:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: readthedocs/actions/preview@v1
17 |         with:
18 |           project-slug: swarms_torch


--------------------------------------------------------------------------------
/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
 1 | name: Pylint
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.9", "3.10"]
11 |     steps:
12 |     - uses: actions/checkout@v4
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v5
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --no-cache-dir --upgrade pip
20 |         pip install pylint
21 |     - name: Analysing the code with pylint
22 |       run: |
23 |         pylint $(git ls-files '*.py')
24 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: Upload Python Package
 3 | 
 4 | on:
 5 |   release:
 6 |     types: [published]
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   deploy:
13 | 
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - name: Set up Python
19 |       uses: actions/setup-python@v5
20 |       with:
21 |         python-version: '3.10'
22 |     - name: Install dependencies
23 |       run: |
24 |         python -m pip install --no-cache-dir --upgrade pip
25 |         pip install build
26 |     - name: Build package
27 |       run: python -m build
28 |     - name: Publish package
29 |       uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450
30 |       with:
31 |         user: __token__
32 |         password: ${{ secrets.PYPI_API_TOKEN }}


--------------------------------------------------------------------------------
/.github/workflows/quality.yml:
--------------------------------------------------------------------------------
 1 | name: Quality
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | jobs:
10 |   lint:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       fail-fast: false
14 |     steps:
15 |       - name: Checkout actions
16 |         uses: actions/checkout@v4
17 |         with:
18 |           fetch-depth: 0
19 |       - name: Init environment 
20 |         uses: ./.github/actions/init-environment 
21 |       - name: Run linter
22 |         run: |
23 |           pylint `git diff --name-only --diff-filter=d origin/main HEAD | grep -E '\.py$' | tr '\n' ' '`


--------------------------------------------------------------------------------
/.github/workflows/ruff.yml:
--------------------------------------------------------------------------------
1 | name: Ruff
2 | on: [ push, pull_request ]
3 | jobs:
4 |   ruff:
5 |     runs-on: ubuntu-latest
6 |     steps:
7 |       - uses: actions/checkout@v4
8 |       - uses: chartboost/ruff-action@v1
9 | 


--------------------------------------------------------------------------------
/.github/workflows/run_test.yml:
--------------------------------------------------------------------------------
 1 | name: Python application test
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |     - uses: actions/checkout@v4
12 |     - name: Set up Python 3.10
13 |       uses: actions/setup-python@v5
14 |       with:
15 |         python-version: '3.10'
16 |     - name: Install dependencies
17 |       run: |
18 |         python -m pip install --no-cache-dir --upgrade pip
19 |         pip install pytest
20 |         if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; fi
21 |     - name: Run tests with pytest
22 |       run: |
23 |         pytest tests/
24 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
 2 | #
 3 | # You can adjust the behavior by modifying this file.
 4 | # For more information, see:
 5 | # https://github.com/actions/stale
 6 | name: Mark stale issues and pull requests
 7 | 
 8 | on:
 9 |   schedule:
10 |   - cron: '26 12 * * *'
11 | 
12 | jobs:
13 |   stale:
14 | 
15 |     runs-on: ubuntu-latest
16 |     permissions:
17 |       issues: write
18 |       pull-requests: write
19 | 
20 |     steps:
21 |     - uses: actions/stale@v9
22 |       with:
23 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
24 |         stale-issue-message: 'Stale issue message'
25 |         stale-pr-message: 'Stale pull request message'
26 |         stale-issue-label: 'no-issue-activity'
27 |         stale-pr-label: 'no-pr-activity'


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 |   pull_request:
 7 |   workflow_dispatch:
 8 | 
 9 | env:
10 |   POETRY_VERSION: "1.4.2"
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       matrix:
17 |         python-version:
18 |           - "3.9"
19 |           - "3.10"
20 |           - "3.11"
21 |         test_type:
22 |           - "core"
23 |           - "extended"
24 |     name: Python ${{ matrix.python-version }} ${{ matrix.test_type }}
25 |     steps:
26 |       - uses: actions/checkout@v4
27 |       - name: Set up Python ${{ matrix.python-version }}
28 |         uses: "./.github/actions/poetry_setup"
29 |         with:
30 |           python-version: ${{ matrix.python-version }}
31 |           poetry-version: "1.4.2"
32 |           cache-key: ${{ matrix.test_type }}
33 |           install-command: |
34 |               if [ "${{ matrix.test_type }}" == "core" ]; then
35 |                 echo "Running core tests, installing dependencies with poetry..."
36 |                 poetry install
37 |               else
38 |                 echo "Running extended tests, installing dependencies with poetry..."
39 |                 poetry install -E extended_testing
40 |               fi
41 |       - name: Run ${{matrix.test_type}} tests
42 |         run: |
43 |           if [ "${{ matrix.test_type }}" == "core" ]; then
44 |             make test
45 |           else
46 |             make extended_tests
47 |           fi
48 |         shell: bash


--------------------------------------------------------------------------------
/.github/workflows/testing.yml:
--------------------------------------------------------------------------------
 1 | name: Unit Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install dependencies
22 |         run: pip install --no-cache-dir -r requirements.txt
23 | 
24 |       - name: Run unit tests
25 |         run: pytest tests/


--------------------------------------------------------------------------------
/.github/workflows/unit-test.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v4
17 | 
18 |     - name: Setup Python
19 |       uses: actions/setup-python@v5
20 |       with:
21 |         python-version: '3.10'
22 | 
23 |     - name: Install dependencies
24 |       run: pip install --no-cache-dir -r requirements.txt
25 | 
26 |     - name: Run Python unit tests
27 |       run: python3 -m unittest tests/
28 | 
29 |     - name: Verify that the Docker image for the action builds
30 |       run: docker build . --file Dockerfile
31 | 
32 |     - name: Verify integration test results
33 |       run: python3 -m unittest tests/
34 | 


--------------------------------------------------------------------------------
/.github/workflows/welcome.yml:
--------------------------------------------------------------------------------
 1 | name: Welcome WorkFlow
 2 | 
 3 | on:
 4 |   issues:
 5 |     types: [opened]
 6 |   pull_request_target:
 7 |     types: [opened]
 8 | 
 9 | jobs:
10 |   build:
11 |     name: 👋 Welcome
12 |     permissions: write-all
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/first-interaction@v1.3.0
16 |         with:
17 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
18 |           issue-message: "Hello there, thank you for opening an Issue ! 🙏🏻 The team was notified and they will get back to you asap."
19 |           pr-message:  "Hello there, thank you for opening an PR ! 🙏🏻 The team was notified and they will get back to you asap."


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | .vscode/
  9 | .vscode
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | .ruff_cache/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # PyCharm
159 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
162 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/ambv/black
 3 |     rev: 22.3.0
 4 |     hooks:
 5 |     - id: black
 6 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
 7 |     rev: 'v0.0.255'
 8 |     hooks:
 9 |       - id: ruff
10 |         args: [--fix]
11 |   - repo: https://github.com/nbQA-dev/nbQA
12 |     rev: 1.6.3
13 |     hooks:
14 |     - id: nbqa-black
15 |       additional_dependencies: [ipython==8.12, black]
16 |     - id: nbqa-ruff 
17 |       args: ["--ignore=I001"]
18 |       additional_dependencies: [ipython==8.12, ruff]


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.11"
 7 | 
 8 | mkdocs:
 9 |   configuration: mkdocs.yml
10 | 
11 | python:
12 |    install:
13 |    - requirements: requirements.txt


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # ==================================
 2 | # Use an official Python runtime as a parent image
 3 | FROM python:3.10-slim
 4 | RUN apt-get update && apt-get -y install libgl1-mesa-dev libglib2.0-0 build-essential; apt-get clean
 5 | RUN pip install opencv-contrib-python-headless
 6 | 
 7 | # Set environment variables
 8 | ENV PYTHONDONTWRITEBYTECODE 1
 9 | ENV PYTHONUNBUFFERED 1
10 | 
11 | # Set the working directory in the container
12 | WORKDIR /usr/src/zeta
13 | 
14 | 
15 | # Install Python dependencies
16 | # COPY requirements.txt and pyproject.toml if you're using poetry for dependency management
17 | COPY requirements.txt .
18 | RUN pip install --no-cache-dir --upgrade pip
19 | RUN pip install --no-cache-dir -r requirements.txt
20 | 
21 | RUN pip install --no-cache-dir zetascale
22 | 
23 | # Copy the rest of the application
24 | COPY . .
25 | 
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Eternal Reclaimer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: style check_code_quality
 2 | 
 3 | export PYTHONPATH = .
 4 | check_dirs := src
 5 | 
 6 | style:
 7 | 	black  $(check_dirs)
 8 | 	isort --profile black $(check_dirs)
 9 | 
10 | check_code_quality:
11 | 	black --check $(check_dirs)
12 | 	isort --check-only --profile black $(check_dirs)
13 | 	# stop the build if there are Python syntax errors or undefined names
14 | 	flake8 $(check_dirs) --count --select=E9,F63,F7,F82 --show-source --statistics
15 | 	# exit-zero treats all errors as warnings. E203 for black, E501 for docstring, W503 for line breaks before logical operators 
16 | 	flake8 $(check_dirs) --count --max-line-length=88 --exit-zero  --ignore=D --extend-ignore=E203,E501,W503  --statistics
17 | 	
18 | publish:
19 | 	python setup.py sdist bdist_wheel
20 | 	twine upload -r testpypi dist/* -u ${PYPI_USERNAME} -p ${PYPI_TEST_PASSWORD} --verbose 
21 | 	twine check dist/*
22 | 	twine upload dist/* -u ${PYPI_USERNAME} -p ${PYPI_PASSWORD} --verbose 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Multi-Modality](agorabanner.png)](https://discord.gg/qUtxnK2NMf)
 2 | 
 3 | # Multi-Agent Template App
 4 | 
 5 | [![Join our Discord](https://img.shields.io/badge/Discord-Join%20our%20server-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/agora-999382051935506503) [![Subscribe on YouTube](https://img.shields.io/badge/YouTube-Subscribe-red?style=for-the-badge&logo=youtube&logoColor=white)](https://www.youtube.com/@kyegomez3242) [![Connect on LinkedIn](https://img.shields.io/badge/LinkedIn-Connect-blue?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/in/kye-g-38759a207/) [![Follow on X.com](https://img.shields.io/badge/X.com-Follow-1DA1F2?style=for-the-badge&logo=x&logoColor=white)](https://x.com/kyegomezb)
 6 | 
 7 | A radically simple, reliable, and high performance template to enable you to quickly get set up building multi-agent applications
 8 | 
 9 | 
10 | 
11 | 
12 | ## Installation
13 | 
14 | You can install the package using pip
15 | 
16 | ```bash
17 | $ pip3 install -r requirements.txt
18 | ```
19 | 
20 | 
21 | ### Code Quality 🧹
22 | 
23 | - `make style` to format the code
24 | - `make check_code_quality` to check code quality (PEP8 basically)
25 | - `black .`
26 | - `ruff . --fix`
27 | 
28 | ### Tests 🧪
29 | 
30 | [`pytests`](https://docs.pytest.org/en/7.1.x/) is used to run our tests.
31 | 
32 | ### Publish on PyPi 🚀
33 | 
34 | **Important**: Before publishing, edit `__version__` in [src/__init__](/src/__init__.py) to match the wanted new version.
35 | 
36 | ```
37 | poetry build
38 | poetry publish
39 | ```
40 | 
41 | ### CI/CD 🤖
42 | 
43 | We use [GitHub actions](https://github.com/features/actions) to automatically run tests and check code quality when a new PR is done on `main`.
44 | 
45 | On any pull request, we will check the code quality and tests.
46 | 
47 | When a new release is created, we will try to push the new code to PyPi. We use [`twine`](https://twine.readthedocs.io/en/stable/) to make our life easier. 
48 | 
49 | The **correct steps** to create a new realease are the following:
50 | - edit `__version__` in [src/__init__](/src/__init__.py) to match the wanted new version.
51 | - create a new [`tag`](https://git-scm.com/docs/git-tag) with the release name, e.g. `git tag v0.0.1 && git push origin v0.0.1` or from the GitHub UI.
52 | - create a new release from GitHub UI
53 | 
54 | The CI will run when you create the new release.
55 | 
56 | # Docs
57 | We use MK docs. This repo comes with the zeta docs. All the docs configurations are already here along with the readthedocs configs.
58 | 
59 | 
60 | 
61 | # License
62 | MIT
63 | 
64 | 
65 | # Citation
66 | Please cite Swarms in your paper or your project if you found it beneficial in any way! Appreciate you.
67 | 
68 | ```bibtex
69 | @misc{swarms,
70 |   author = {Gomez, Kye},
71 |   title = {{Swarms: The Multi-Agent Collaboration Framework}},
72 |   howpublished = {\url{https://github.com/kyegomez/swarms}},
73 |   year = {2023},
74 |   note = {Accessed: Date}
75 | }
76 | ```
77 | 
78 | 


--------------------------------------------------------------------------------
/agorabanner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/agorabanner.png


--------------------------------------------------------------------------------
/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/.DS_Store


--------------------------------------------------------------------------------
/docs/applications/customer_support.md:
--------------------------------------------------------------------------------
 1 | ## **Applications of Zeta: Revolutionizing Customer Support**
 2 | 
 3 | ---
 4 | 
 5 | **Introduction**:  
 6 | In today's fast-paced digital world, responsive and efficient customer support is a linchpin for business success. The introduction of AI-driven zeta in the customer support domain can transform the way businesses interact with and assist their customers. By leveraging the combined power of multiple AI agents working in concert, businesses can achieve unprecedented levels of efficiency, customer satisfaction, and operational cost savings.
 7 | 
 8 | ---
 9 | 
10 | ### **The Benefits of Using Zeta for Customer Support:**
11 | 
12 | 1. **24/7 Availability**: Zeta never sleep. Customers receive instantaneous support at any hour, ensuring constant satisfaction and loyalty.
13 |   
14 | 2. **Infinite Scalability**: Whether it's ten inquiries or ten thousand, zeta can handle fluctuating volumes with ease, eliminating the need for vast human teams and minimizing response times.
15 |   
16 | 3. **Adaptive Intelligence**: Zeta learn collectively, meaning that a solution found for one customer can be instantly applied to benefit all. This leads to constantly improving support experiences, evolving with every interaction.
17 | 
18 | ---
19 | 
20 | ### **Features - Reinventing Customer Support**:
21 | 
22 | - **AI Inbox Monitor**: Continuously scans email inboxes, identifying and categorizing support requests for swift responses.
23 |   
24 | - **Intelligent Debugging**: Proactively helps customers by diagnosing and troubleshooting underlying issues.
25 |   
26 | - **Automated Refunds & Coupons**: Seamless integration with payment systems like Stripe allows for instant issuance of refunds or coupons if a problem remains unresolved.
27 |   
28 | - **Full System Integration**: Holistically connects with CRM, email systems, and payment portals, ensuring a cohesive and unified support experience.
29 |   
30 | - **Conversational Excellence**: With advanced LLMs (Language Model Transformers), the swarm agents can engage in natural, human-like conversations, enhancing customer comfort and trust.
31 |   
32 | - **Rule-based Operation**: By working with rule engines, zeta ensure that all actions adhere to company guidelines, ensuring consistent, error-free support.
33 |   
34 | - **Turing Test Ready**: Crafted to meet and exceed the Turing Test standards, ensuring that every customer interaction feels genuine and personal.
35 | 
36 | ---
37 | 
38 | **Conclusion**:  
39 | Zeta are not just another technological advancement; they represent the future of customer support. Their ability to provide round-the-clock, scalable, and continuously improving support can redefine customer experience standards. By adopting zeta, businesses can stay ahead of the curve, ensuring unparalleled customer loyalty and satisfaction.
40 | 
41 | **Experience the future of customer support. Dive into the swarm revolution.**
42 | 
43 | 


--------------------------------------------------------------------------------
/docs/applications/enterprise.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/applications/enterprise.md


--------------------------------------------------------------------------------
/docs/applications/marketing_agencies.md:
--------------------------------------------------------------------------------
 1 | ## **Zeta in Marketing Agencies: A New Era of Automated Media Strategy**
 2 | 
 3 | ---
 4 | 
 5 | ### **Introduction**: 
 6 | - Brief background on marketing agencies and their role in driving brand narratives and sales.
 7 | - Current challenges and pain points faced in media planning, placements, and budgeting.
 8 | - Introduction to the transformative potential of zeta in reshaping the marketing industry.
 9 | 
10 | ---
11 | 
12 | ### **1. Fundamental Problem: Media Plan Creation**:
13 |    - **Definition**: The challenge of creating an effective media plan that resonates with a target audience and aligns with brand objectives.
14 |    
15 |    - **Traditional Solutions and Their Shortcomings**: Manual brainstorming sessions, over-reliance on past strategies, and long turnaround times leading to inefficiency.
16 |    
17 |    - **How Zeta Address This Problem**: 
18 |       - **Benefit 1**: Automated Media Plan Generation – Zeta ingest branding summaries, objectives, and marketing strategies to generate media plans, eliminating guesswork and human error.
19 |       - **Real-world Application of Zeta**: The automation of media plans based on client briefs, including platform selections, audience targeting, and creative versions.
20 | 
21 | ---
22 | 
23 | ### **2. Fundamental Problem: Media Placements**:
24 |    - **Definition**: The tedious task of determining where ads will be placed, considering demographics, platform specifics, and more.
25 |    
26 |    - **Traditional Solutions and Their Shortcomings**: Manual placement leading to possible misalignment with target audiences and brand objectives.
27 |    
28 |    - **How Zeta Address This Problem**: 
29 |       - **Benefit 2**: Precision Media Placements – Zeta analyze audience data and demographics to suggest the best placements, optimizing for conversions and brand reach.
30 |       - **Real-world Application of Zeta**: Automated selection of ad placements across platforms like Facebook, Google, and DSPs based on media plans.
31 | 
32 | ---
33 | 
34 | ### **3. Fundamental Problem: Budgeting**:
35 |    - **Definition**: Efficiently allocating and managing advertising budgets across multiple campaigns, platforms, and timeframes.
36 |    
37 |    - **Traditional Solutions and Their Shortcomings**: Manual budgeting using tools like Excel, prone to errors, and inefficient shifts in allocations.
38 |    
39 |    - **How Zeta Address This Problem**: 
40 |       - **Benefit 3**: Intelligent Media Budgeting – Zeta enable dynamic budget allocation based on performance analytics, maximizing ROI.
41 |       - **Real-world Application of Zeta**: Real-time adjustments in budget allocations based on campaign performance, eliminating long waiting periods and manual recalculations.
42 | 
43 | ---
44 | 
45 | ### **Features**:
46 | 1. Automated Media Plan Generator: Input your objectives and receive a comprehensive media plan.
47 | 2. Precision Media Placement Tool: Ensure your ads appear in the right places to the right people.
48 | 3. Dynamic Budget Allocation: Maximize ROI with real-time budget adjustments.
49 | 4. Integration with Common Tools: Seamless integration with tools like Excel and APIs for exporting placements.
50 | 5. Conversational Platform: A suite of tools built for modern marketing agencies, bringing all tasks under one umbrella.
51 | 
52 | ---
53 | 
54 | ### **Testimonials**:
55 | - "Zeta have completely revolutionized our media planning process. What used to take weeks now takes mere hours." - *Senior Media Strategist, Top-tier Marketing Agency*
56 | - "The precision with which we can place ads now is unprecedented. It's like having a crystal ball for marketing!" - *Campaign Manager, Global Advertising Firm*
57 | 
58 | ---
59 | 
60 | ### **Conclusion**: 
61 | - Reiterate the immense potential of zeta in revolutionizing media planning, placements, and budgeting for marketing agencies.
62 | - Call to action: For marketing agencies looking to step into the future and leave manual inefficiencies behind, zeta are the answer.
63 | 
64 | ---


--------------------------------------------------------------------------------
/docs/architecture.md:
--------------------------------------------------------------------------------
1 | # Architecture 
2 | * Simple file structure
3 | * Fluid API 
4 | * Useful error handling that provides potential solutions and root cause error understanding
5 | * nn, tokenizers, models, training
6 | * 


--------------------------------------------------------------------------------
/docs/assets/css/extra.css:
--------------------------------------------------------------------------------
1 | .md-typeset__table {
2 |    min-width: 100%;
3 | }
4 | 
5 | .md-typeset table:not([class]) {
6 |     display: table;
7 | }


--------------------------------------------------------------------------------
/docs/assets/img/SwarmsLogoIcon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/assets/img/SwarmsLogoIcon.png


--------------------------------------------------------------------------------
/docs/assets/img/swarmsbanner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/assets/img/swarmsbanner.png


--------------------------------------------------------------------------------
/docs/assets/img/tools/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/assets/img/tools/output.png


--------------------------------------------------------------------------------
/docs/assets/img/tools/poetry_setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/assets/img/tools/poetry_setup.png


--------------------------------------------------------------------------------
/docs/assets/img/tools/toml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/assets/img/tools/toml.png


--------------------------------------------------------------------------------
/docs/assets/img/zetascale.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/assets/img/zetascale.png


--------------------------------------------------------------------------------
/docs/bounties.md:
--------------------------------------------------------------------------------
 1 | # Bounty Program
 2 | 
 3 | Our bounty program is an exciting opportunity for contributors to help us build the future of Zeta. By participating, you can earn rewards while contributing to a project that aims to revolutionize digital activity.
 4 | 
 5 | Here's how it works:
 6 | 
 7 | 1. **Check out our Roadmap**: We've shared our roadmap detailing our short and long-term goals. These are the areas where we're seeking contributions.
 8 | 
 9 | 2. **Pick a Task**: Choose a task from the roadmap that aligns with your skills and interests. If you're unsure, you can reach out to our team for guidance.
10 | 
11 | 3. **Get to Work**: Once you've chosen a task, start working on it. Remember, quality is key. We're looking for contributions that truly make a difference.
12 | 
13 | 4. **Submit your Contribution**: Once your work is complete, submit it for review. We'll evaluate your contribution based on its quality, relevance, and the value it brings to Zeta.
14 | 
15 | 5. **Earn Rewards**: If your contribution is approved, you'll earn a bounty. The amount of the bounty depends on the complexity of the task, the quality of your work, and the value it brings to Zeta.
16 | 
17 | ## The Three Phases of Our Bounty Program
18 | 
19 | ### Phase 1: Building the Foundation
20 | In the first phase, our focus is on building the basic infrastructure of Zeta. This includes developing key components like the Zeta class, integrating essential tools, and establishing task completion and evaluation logic. We'll also start developing our testing and evaluation framework during this phase. If you're interested in foundational work and have a knack for building robust, scalable systems, this phase is for you.
21 | 
22 | ### Phase 2: Enhancing the System
23 | In the second phase, we'll focus on enhancing Zeta by integrating more advanced features, improving the system's efficiency, and refining our testing and evaluation framework. This phase involves more complex tasks, so if you enjoy tackling challenging problems and contributing to the development of innovative features, this is the phase for you.
24 | 
25 | ### Phase 3: Towards Super-Intelligence
26 | The third phase of our bounty program is the most exciting - this is where we aim to achieve super-intelligence. In this phase, we'll be working on improving the swarm's capabilities, expanding its skills, and fine-tuning the system based on real-world testing and feedback. If you're excited about the future of AI and want to contribute to a project that could potentially transform the digital world, this is the phase for you.
27 | 
28 | Remember, our roadmap is a guide, and we encourage you to bring your own ideas and creativity to the table. We believe that every contribution, no matter how small, can make a difference. So join us on this exciting journey and help us create the future of Zeta.
29 | 
30 | **To participate in our bounty program, visit the [Zeta Bounty Program Page](https://zeta.ai/bounty).** Let's build the future together!
31 | 
32 | 
33 | 
34 | 
35 | 
36 | ## Bounties for Roadmap Items
37 | 
38 | To accelerate the development of Zeta and to encourage more contributors to join our journey towards automating every digital activity in existence, we are announcing a Bounty Program for specific roadmap items. Each bounty will be rewarded based on the complexity and importance of the task. Below are the items available for bounty:
39 | 
40 | 1. **Multi-Agent Debate Integration**: $2000
41 | 2. **Meta Prompting Integration**: $1500
42 | 3. **Zeta Class**: $1500
43 | 4. **Integration of Additional Tools**: $1000
44 | 5. **Task Completion and Evaluation Logic**: $2000
45 | 6. **Ocean Integration**: $2500
46 | 7. **Improved Communication**: $2000
47 | 8. **Testing and Evaluation**: $1500
48 | 9. **Worker Swarm Class**: $2000
49 | 10. **Documentation**: $500
50 | 
51 | For each bounty task, there will be a strict evaluation process to ensure the quality of the contribution. This process includes a thorough review of the code and extensive testing to ensure it meets our standards.
52 | 
53 | # 3-Phase Testing Framework
54 | 
55 | To ensure the quality and efficiency of the Swarm, we will introduce a 3-phase testing framework which will also serve as our evaluation criteria for each of the bounty tasks.
56 | 
57 | ## Phase 1: Unit Testing
58 | In this phase, individual modules will be tested to ensure that they work correctly in isolation. Unit tests will be designed for all functions and methods, with an emphasis on edge cases.
59 | 
60 | ## Phase 2: Integration Testing
61 | After passing unit tests, we will test the integration of different modules to ensure they work correctly together. This phase will also test the interoperability of the Swarm with external systems and libraries.
62 | 
63 | ## Phase 3: Benchmarking & Stress Testing
64 | In the final phase, we will perform benchmarking and stress tests. We'll push the limits of the Swarm under extreme conditions to ensure it performs well in real-world scenarios. This phase will measure the performance, speed, and scalability of the Swarm under high load conditions.
65 | 
66 | By following this 3-phase testing framework, we aim to develop a reliable, high-performing, and scalable Swarm that can automate all digital activities. 
67 | 
68 | # Reverse Engineering to Reach Phase 3
69 | 
70 | To reach the Phase 3 level, we need to reverse engineer the tasks we need to complete. Here's an example of what this might look like:
71 | 
72 | 1. **Set Clear Expectations**: Define what success looks like for each task. Be clear about the outputs and outcomes we expect. This will guide our testing and development efforts.
73 | 
74 | 2. **Develop Testing Scenarios**: Create a comprehensive list of testing scenarios that cover both common and edge cases. This will help us ensure that our Swarm can handle a wide range of situations.
75 | 
76 | 3. **Write Test Cases**: For each scenario, write detailed test cases that outline the exact steps to be followed, the inputs to be used, and the expected outputs.
77 | 
78 | 4. **Execute the Tests**: Run the test cases on our Swarm, making note of any issues or bugs that arise.
79 | 
80 | 5. **Iterate and Improve**: Based on the results of our tests, iterate and improve our Swarm. This may involve fixing bugs, optimizing code, or redesigning parts of our system.
81 | 
82 | 6. **Repeat**: Repeat this process until our Swarm meets our expectations and passes all test cases.
83 | 
84 | By following these steps, we will systematically build, test, and improve our Swarm until it reaches the Phase 3 level. This methodical approach will help us ensure that we create a reliable, high-performing, and scalable Swarm that can truly automate all digital activities.
85 | 
86 | Let's shape the future of digital automation together!
87 | 


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | Thank you for your interest in contributing to Zeta! We welcome contributions from the community to help improve usability and readability. By contributing, you can be a part of creating a dynamic and interactive AI system.
  4 | 
  5 | To get started, please follow the guidelines below.
  6 | 
  7 | 
  8 | ## Optimization Priorities
  9 | 
 10 | To continuously improve Zeta, we prioritize the following design objectives:
 11 | 
 12 | 1. **Usability**: Increase the ease of use and user-friendliness of the swarm system to facilitate adoption and interaction with basic input.
 13 | 
 14 | 2. **Reliability**: Improve the swarm's ability to obtain the desired output even with basic and un-detailed input.
 15 | 
 16 | 3. **Speed**: Reduce the time it takes for the swarm to accomplish tasks by improving the communication layer, critiquing, and self-alignment with meta prompting.
 17 | 
 18 | 4. **Scalability**: Ensure that the system is asynchronous, concurrent, and self-healing to support scalability.
 19 | 
 20 | Our goal is to continuously improve Zeta by following this roadmap while also being adaptable to new needs and opportunities as they arise.
 21 | 
 22 | ## Join the Zeta Community
 23 | 
 24 | Join the Zeta community on Discord to connect with other contributors, coordinate work, and receive support.
 25 | 
 26 | - [Join the Zeta Discord Server](https://discord.gg/qUtxnK2NMf)
 27 | 
 28 | 
 29 | ## Report and Issue
 30 | The easiest way to contribute to our docs is through our public [issue tracker](https://github.com/kyegomez/zeta-docs/issues). Feel free to submit bugs, request features or changes, or contribute to the project directly. 
 31 | 
 32 | ## Pull Requests
 33 | 
 34 | Zeta docs are built using [MkDocs](https://squidfunk.github.io/mkdocs-material/getting-started/). 
 35 | 
 36 | To directly contribute to Zeta documentation, first fork the [zeta-docs](https://github.com/kyegomez/zeta-docs) repository to your GitHub account. Then clone your repository to your local machine.
 37 | 
 38 | From inside the directory run: 
 39 | 
 40 | ```pip install -r requirements.txt```
 41 | 
 42 | To run `zeta-docs` locally run: 
 43 | 
 44 | ```mkdocs serve```
 45 | 
 46 | You should see something similar to the following: 
 47 | 
 48 | ```
 49 | INFO     -  Building documentation...
 50 | INFO     -  Cleaning site directory
 51 | INFO     -  Documentation built in 0.19 seconds
 52 | INFO     -  [09:28:33] Watching paths for changes: 'docs', 'mkdocs.yml'
 53 | INFO     -  [09:28:33] Serving on http://127.0.0.1:8000/
 54 | INFO     -  [09:28:37] Browser connected: http://127.0.0.1:8000/
 55 | ```
 56 | 
 57 | Follow the typical PR process to contribute changes. 
 58 | 
 59 | * Create a feature branch.
 60 | * Commit changes.
 61 | * Submit a PR.
 62 | 
 63 | 
 64 | -------
 65 | ---
 66 | 
 67 | ## Taking on Tasks
 68 | 
 69 | We have a growing list of tasks and issues that you can contribute to. To get started, follow these steps:
 70 | 
 71 | 1. Visit the [Zeta GitHub repository](https://github.com/kyegomez/zeta) and browse through the existing issues.
 72 | 
 73 | 2. Find an issue that interests you and make a comment stating that you would like to work on it. Include a brief description of how you plan to solve the problem and any questions you may have.
 74 | 
 75 | 3. Once a project coordinator assigns the issue to you, you can start working on it.
 76 | 
 77 | If you come across an issue that is unclear but still interests you, please post in the Discord server mentioned above. Someone from the community will be able to help clarify the issue in more detail.
 78 | 
 79 | We also welcome contributions to documentation, such as updating markdown files, adding docstrings, creating system architecture diagrams, and other related tasks.
 80 | 
 81 | ## Submitting Your Work
 82 | 
 83 | To contribute your changes to Zeta, please follow these steps:
 84 | 
 85 | 1. Fork the Zeta repository to your GitHub account. You can do this by clicking on the "Fork" button on the repository page.
 86 | 
 87 | 2. Clone the forked repository to your local machine using the `git clone` command.
 88 | 
 89 | 3. Before making any changes, make sure to sync your forked repository with the original repository to keep it up to date. You can do this by following the instructions [here](https://docs.github.com/en/github/collaborating-with-pull-requests/syncing-a-fork).
 90 | 
 91 | 4. Create a new branch for your changes. This branch should have a descriptive name that reflects the task or issue you are working on.
 92 | 
 93 | 5. Make your changes in the branch, focusing on a small, focused change that only affects a few files.
 94 | 
 95 | 6. Run any necessary formatting or linting tools to ensure that your changes adhere to the project's coding standards.
 96 | 
 97 | 7. Once your changes are ready, commit them to your branch with descriptive commit messages.
 98 | 
 99 | 8. Push the branch to your forked repository.
100 | 
101 | 9. Create a pull request (PR) from your branch to the main Zeta repository. Provide a clear and concise description of your changes in the PR.
102 | 
103 | 10. Request a review from the project maintainers. They will review your changes, provide feedback, and suggest any necessary improvements.
104 | 
105 | 11. Make any required updates or address any feedback provided during the review process.
106 | 
107 | 12. Once your changes have been reviewed and approved, they will be merged into the main branch of the Zeta repository.
108 | 
109 | 13. Congratulations! You have successfully contributed to Zeta.
110 | 
111 | Please note that during the review process, you may be asked to make changes or address certain issues. It is important to engage in open and constructive communication with the project maintainers to ensure the quality of your contributions.
112 | 
113 | ## Developer Setup
114 | 
115 | If you are interested in setting up the Zeta development environment, please follow the instructions provided in the [developer setup guide](docs/developer-setup.md). This guide provides an overview of the different tools and technologies used in the project.
116 | 
117 | ## Join the Agora Community
118 | 
119 | Zeta is brought to you by Agora, the open-source AI research organization. Join the Agora community to connect with other researchers and developers working on AI projects.
120 | 
121 | - [Join the Agora Discord Server](https://discord.gg/qUtxnK2NMf)
122 | 
123 | Thank you for your contributions and for being a part of the Zeta and Agora community! Together, we can advance Humanity through the power of AI.


--------------------------------------------------------------------------------
/docs/demos.md:
--------------------------------------------------------------------------------
1 | # Demo Ideas
2 | 
3 | * GPT-4
4 | * Andromeda
5 | * Kosmos
6 | * LongNet
7 | * Text to video diffusion
8 | * Nebula
9 | 


--------------------------------------------------------------------------------
/docs/examples/count-tokens.md:
--------------------------------------------------------------------------------
 1 | To count tokens you can use Zeta events and the `TokenCounter` util:
 2 | 
 3 | ```python
 4 | from zeta import utils
 5 | from zeta.events import (
 6 |     StartPromptEvent, FinishPromptEvent,
 7 | )
 8 | from zeta.structures import Agent
 9 | 
10 | 
11 | token_counter = utils.TokenCounter()
12 | 
13 | agent = Agent(
14 |     event_listeners={
15 |         StartPromptEvent: [
16 |             lambda e: token_counter.add_tokens(e.token_count)
17 |         ],
18 |         FinishPromptEvent: [
19 |             lambda e: token_counter.add_tokens(e.token_count)
20 |         ],
21 |     }
22 | )
23 | 
24 | agent.run("tell me about large language models")
25 | agent.run("tell me about GPT")
26 | 
27 | print(f"total tokens: {token_counter.tokens}")
28 | 
29 | ```


--------------------------------------------------------------------------------
/docs/examples/index.md:
--------------------------------------------------------------------------------
1 | This section of the documentation is dedicated to examples highlighting Zeta functionality.
2 | 
3 | We try to keep all examples up to date, but if you think there is a bug please [submit a pull request](https://github.com/kyegomez/zeta-docs/tree/main/docs/examples). We are also more than happy to include new examples :)


--------------------------------------------------------------------------------
/docs/examples/load-and-query-pinecone.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | import hashlib
 3 | import json
 4 | from urllib.request import urlopen
 5 | from decouple import config
 6 | from zeta.drivers import PineconeVectorStoreDriver
 7 | 
 8 | 
 9 | def load_data(driver: PineconeVectorStoreDriver) -> None:
10 |     response = urlopen(
11 |         "https://raw.githubusercontent.com/wedeploy-examples/"
12 |         "supermarket-web-example/master/products.json"
13 |     )
14 | 
15 |     for product in json.loads(response.read()):
16 |         driver.upsert_text(
17 |             product["description"],
18 |             vector_id=hashlib.md5(product["title"].encode()).hexdigest(),
19 |             meta={
20 |                 "title": product["title"],
21 |                 "description": product["description"],
22 |                 "type": product["type"],
23 |                 "price": product["price"],
24 |                 "rating": product["rating"]
25 |             },
26 |             namespace="supermarket-products"
27 |         )
28 | 
29 | 
30 | vector_driver = PineconeVectorStoreDriver(
31 |     api_key=config("PINECONE_API_KEY"),
32 |     environment=config("PINECONE_ENVIRONMENT"),
33 |     index_name=config("PINECONE_INDEX_NAME")
34 | )
35 | 
36 | load_data(vector_driver)
37 | 
38 | result = vector_driver.query(
39 |     "fruit",
40 |     count=3,
41 |     filter={
42 |         "price": {"$lte": 15},
43 |         "rating": {"$gte": 4}
44 |     },
45 |     namespace="supermarket-products"
46 | )
47 | 
48 | print(result)
49 | ```


--------------------------------------------------------------------------------
/docs/examples/load-query-and-chat-marqo.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from zeta import utils
 3 | from zeta.drivers import MarqoVectorStoreDriver
 4 | from zeta.engines import VectorQueryEngine
 5 | from zeta.loaders import WebLoader
 6 | from zeta.structures import Agent
 7 | from zeta.tools import KnowledgeBaseClient
 8 | import openai
 9 | from marqo import Client
10 | 
11 | # Set the OpenAI API key
12 | openai.api_key_path = "../openai_api_key.txt"
13 | 
14 | # Define the namespace
15 | namespace = "kyegomez"
16 | 
17 | # Initialize the vector store driver
18 | vector_store = MarqoVectorStoreDriver(
19 |     api_key=openai.api_key_path,
20 |     url="http://localhost:8882",
21 |     index="chat2",
22 |     mq=Client(api_key="foobar", url="http://localhost:8882")
23 | )
24 | 
25 | # Get a list of all indexes
26 | #indexes = vector_store.get_indexes()
27 | #print(indexes)
28 | 
29 | # Initialize the query engine
30 | query_engine = VectorQueryEngine(vector_store_driver=vector_store)
31 | 
32 | # Initialize the knowledge base tool
33 | kb_tool = KnowledgeBaseClient(
34 |     description="Contains information about the Zeta Framework from www.zeta.ai",
35 |     query_engine=query_engine,
36 |     namespace=namespace
37 | )
38 | 
39 | # Load artifacts from the web
40 | artifacts = WebLoader(max_tokens=200).load("https://www.zeta.ai")
41 | 
42 | # Upsert the artifacts into the vector store
43 | vector_store.upsert_text_artifacts({namespace: artifacts,})
44 | 
45 | # Initialize the agent
46 | agent = Agent(tools=[kb_tool])
47 | 
48 | # Start the chat
49 | utils.Chat(agent).start()
50 | 
51 | ```


--------------------------------------------------------------------------------
/docs/examples/query-webpage.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from zeta.artifacts import BaseArtifact
 3 | from zeta.drivers import LocalVectorStoreDriver
 4 | from zeta.loaders import WebLoader
 5 | 
 6 | 
 7 | vector_store = LocalVectorStoreDriver()
 8 | 
 9 | [
10 |     vector_store.upsert_text_artifact(a, namespace="zeta")
11 |     for a in WebLoader(max_tokens=100).load("https://www.zeta.ai")
12 | ]
13 | 
14 | results = vector_store.query(
15 |     "creativity",
16 |     count=3,
17 |     namespace="zeta"
18 | )
19 | 
20 | values = [BaseArtifact.from_json(r.meta["artifact"]).value for r in results]
21 | 
22 | print("\n\n".join(values))
23 | ```


--------------------------------------------------------------------------------
/docs/examples/store-conversation-memory-in-dynamodb.md:
--------------------------------------------------------------------------------
 1 | To store your conversation on DynamoDB you can use DynamoDbConversationMemoryDriver.
 2 | ```python
 3 | from zeta.memory.structure import ConversationMemory
 4 | from zeta.memory.structure import ConversationMemoryElement, Turn, Message
 5 | from zeta.drivers import DynamoDbConversationMemoryDriver
 6 | 
 7 | # Instantiate DynamoDbConversationMemoryDriver
 8 | dynamo_driver = DynamoDbConversationMemoryDriver(
 9 |     aws_region="us-east-1",
10 |     table_name="conversations",
11 |     partition_key="convo_id",
12 |     value_attribute_key="convo_data",
13 |     partition_key_value="convo1"
14 | )
15 | 
16 | # Create a ConversationMemory structure
17 | conv_mem = ConversationMemory(
18 |     turns=[
19 |         Turn(
20 |             turn_index=0,
21 |             system=Message("Hello"),
22 |             user=Message("Hi")
23 |         ),
24 |         Turn(
25 |             turn_index=1,
26 |             system=Message("How can I assist you today?"),
27 |             user=Message("I need some information")
28 |         )
29 |     ],
30 |     latest_turn=Turn(
31 |         turn_index=2,
32 |         system=Message("Sure, what information do you need?"),
33 |         user=None  # user has not yet responded
34 |     ),
35 |     driver=dynamo_driver  # set the driver
36 | )
37 | 
38 | # Store the conversation in DynamoDB
39 | dynamo_driver.store(conv_mem)
40 | 
41 | # Load the conversation from DynamoDB
42 | loaded_conv_mem = dynamo_driver.load()
43 | 
44 | # Display the loaded conversation
45 | print(loaded_conv_mem.to_json())
46 | 
47 | ```


--------------------------------------------------------------------------------
/docs/examples/talk-to-a-pdf.md:
--------------------------------------------------------------------------------
 1 | This example demonstrates how to vectorize a PDF of the [Attention Is All You Need](https://arxiv.org/pdf/1706.03762.pdf) paper and setup a Zeta agent with rules and the `KnowledgeBase` tool to use it during conversations.
 2 | 
 3 | ```python
 4 | import io
 5 | import requests
 6 | from zeta.engines import VectorQueryEngine
 7 | from zeta.loaders import PdfLoader
 8 | from zeta.structures import Agent
 9 | from zeta.tools import KnowledgeBaseClient
10 | from zeta.utils import Chat
11 | 
12 | namespace = "attention"
13 | 
14 | response = requests.get("https://arxiv.org/pdf/1706.03762.pdf")
15 | engine = VectorQueryEngine()
16 | 
17 | engine.vector_store_driver.upsert_text_artifacts(
18 |     {
19 |         namespace: PdfLoader().load(
20 |             io.BytesIO(response.content)
21 |         )
22 |     }
23 | )
24 | 
25 | kb_client = KnowledgeBaseClient(
26 |     description="Contains information about the Attention Is All You Need paper. "
27 |                 "Use it to answer any related questions.",
28 |     query_engine=engine,
29 |     namespace=namespace
30 | )
31 | 
32 | agent = Agent(
33 |     tools=[kb_client]
34 | )
35 | 
36 | Chat(agent).start()
37 | ```


--------------------------------------------------------------------------------
/docs/examples/talk-to-a-webpage.md:
--------------------------------------------------------------------------------
 1 | This example demonstrates how to vectorize a webpage and setup a Zeta agent with rules and the `KnowledgeBase` tool to use it during conversations.
 2 | 
 3 | ```python
 4 | from zeta.engines import VectorQueryEngine
 5 | from zeta.loaders import WebLoader
 6 | from zeta.rules import Ruleset, Rule
 7 | from zeta.structures import Agent
 8 | from zeta.tools import KnowledgeBaseClient
 9 | from zeta.utils import Chat
10 | 
11 | 
12 | namespace = "physics-wiki"
13 | 
14 | engine = VectorQueryEngine()
15 | 
16 | artifacts = WebLoader().load(
17 |     "https://en.wikipedia.org/wiki/Physics"
18 | )
19 | 
20 | engine.vector_store_driver.upsert_text_artifacts(
21 |     {namespace: artifacts}
22 | )
23 | 
24 | 
25 | kb_client = KnowledgeBaseClient(
26 |     description="Contains information about physics. "
27 |                 "Use it to answer any physics-related questions.",
28 |     query_engine=engine,
29 |     namespace=namespace
30 | )
31 | 
32 | agent = Agent(
33 |     rulesets=[
34 |         Ruleset(
35 |             name="Physics Tutor",
36 |             rules=[
37 |                 Rule(
38 |                     "Always introduce yourself as a physics tutor"
39 |                 ),
40 |                 Rule(
41 |                     "Be truthful. Only discuss physics."
42 |                 )
43 |             ]
44 |         )
45 |     ],
46 |     tools=[kb_client]
47 | )
48 | 
49 | Chat(agent).start()
50 | ```


--------------------------------------------------------------------------------
/docs/examples/talk-to-redshift.md:
--------------------------------------------------------------------------------
 1 | This example demonstrates how to build an agent that can dynamically query Amazon Redshift Serverless tables and store its contents on the local hard drive.
 2 | 
 3 | Let's build a support agent that uses GPT-4:
 4 | 
 5 | ```python
 6 | import boto3
 7 | from zeta.drivers import AmazonRedshiftSqlDriver, OpenAiPromptDriver
 8 | from zeta.loaders import SqlLoader
 9 | from zeta.rules import Ruleset, Rule
10 | from zeta.structures import Agent
11 | from zeta.tools import SqlClient, FileManager
12 | from zeta.utils import Chat
13 | 
14 | session = boto3.Session(region_name="REGION_NAME")
15 | 
16 | sql_loader = SqlLoader(
17 |     sql_driver=AmazonRedshiftSqlDriver(
18 |         database="DATABASE",
19 |         session=session,
20 |         workgroup_name="WORKGROUP_NAME"
21 |     )
22 | )
23 | 
24 | sql_tool = SqlClient(
25 |     sql_loader=sql_loader,
26 |     table_name="people",
27 |     table_description="contains information about tech industry professionals",
28 |     engine_name="redshift"
29 | )
30 | 
31 | agent = Agent(
32 |     tools=[sql_tool, FileManager())],
33 |     rulesets=[
34 |         Ruleset(
35 |             name="HumansOrg Agent",
36 |             rules=[
37 |                 Rule("Act and introduce yourself as a HumansOrg, Inc. support agent"),
38 |                 Rule("Your main objective is to help with finding information about people"),
39 |                 Rule("Only use information about people from the sources available to you")
40 |             ]
41 |         )
42 |     ]
43 | )
44 | 
45 | Chat(agent).start()
46 | ```
47 | 


--------------------------------------------------------------------------------
/docs/examples/using-text-generation-web-ui.md:
--------------------------------------------------------------------------------
 1 | This example demonstrates how to build an agent that can integrate with [Text Generation Web UI](https://github.com/oobabooga/text-generation-webui).
 2 | 
 3 | To be able to perform successful connection, run text gen with '--api' and if you running text gen not on the same host, add '--listen'. see more option [here](https://github.com/oobabooga/text-generation-webui)
 4 | 
 5 | Check out the bare API usage [example](https://github.com/oobabooga/text-generation-webui/blob/main/api-examples/api-example.py).
 6 | 
 7 | ## Tokenizer
 8 | 
 9 | To match the tokenizer used in the text gen, one can use [PreTrainedTokenizerFast](https://huggingface.co/docs/transformers/fast_tokenizers#loading-from-a-json-file) to load tokenizer from saved json setting file.
10 | 
11 | Example:
12 | 
13 | Let's say you using [TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-GPTQ](https://huggingface.co/TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-GPTQ/tree/main) in text gen, you can get hold of 'tokenizer.json' file that can be used to setup a corresponding tokenizer.
14 | 
15 | ## Code Snippets
16 | 
17 | Code snippet using a pre defined 'preset'.
18 | 
19 | 'max_tokens' argument here need to be set with the same value as in the preset in text gen.
20 | 
21 | ```shell
22 | from zeta.structures import Agent
23 | from zeta.drivers import TextGenPromptDriver
24 | from zeta.tokenizers import TextGenTokenizer
25 | from transformers import PreTrainedTokenizerFast
26 | 
27 | fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
28 | 
29 | prompt_driver = TextGenPromptDriver(
30 |     preset="zeta",
31 |     tokenizer=TextGenTokenizer(max_tokens=300, tokenizer=fast_tokenizer)
32 | )
33 | 
34 | agent = Agent(
35 |     prompt_driver=prompt_driver
36 | )
37 | 
38 | agent.run(
39 |     "tell me what Zeta is"
40 | )
41 | ```
42 | 
43 | Code snippet example using params, if params and preset is defined, preset will be used.
44 | 
45 | this params are overriding the current preset set in text gen, not all of them must be used.
46 | 
47 | ```shell
48 | from zeta.structures import Agent
49 | from zeta.drivers import TextGenPromptDriver
50 | from zeta.tokenizers import TextGenTokenizer
51 | from transformers import PreTrainedTokenizerFast
52 | 
53 | params = {
54 |         'max_new_tokens': 250,
55 |         'do_sample': True,
56 |         'temperature': 0.7,
57 |         'top_p': 0.1,
58 |         'typical_p': 1,
59 |         'epsilon_cutoff': 0,  # In units of 1e-4
60 |         'eta_cutoff': 0,  # In units of 1e-4
61 |         'tfs': 1,
62 |         'top_a': 0,
63 |         'repetition_penalty': 1.18,
64 |         'repetition_penalty_range': 0,
65 |         'top_k': 40,
66 |         'min_length': 0,
67 |         'no_repeat_ngram_size': 0,
68 |         'num_beams': 1,
69 |         'penalty_alpha': 0,
70 |         'length_penalty': 1,
71 |         'early_stopping': False,
72 |         'mirostat_mode': 0,
73 |         'mirostat_tau': 5,
74 |         'mirostat_eta': 0.1,
75 |         'seed': 235245345,
76 |         'add_bos_token': True,
77 |         'truncation_length': 2048,
78 |         'ban_eos_token': False,
79 |         'skip_special_tokens': True,
80 |         'stopping_strings': []
81 |     }
82 | 
83 | fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
84 | 
85 | prompt_driver = TextGenPromptDriver(
86 |     params=params,
87 |     tokenizer=TextGenTokenizer(max_tokens=params['max_new_tokens'], tokenizer=fast_tokenizer)
88 | )
89 | 
90 | agent = Agent(
91 |     prompt_driver=prompt_driver
92 | )
93 | 
94 | agent.run(
95 |     "tell me what Zeta is"
96 | )
97 | ```


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
 1 | **FAQ: Zeta - Crafting the Next Level in Neural Networks**
 2 | 
 3 | ---
 4 | 
 5 | We understand that delving into a new framework, especially in the ever-evolving world of machine learning, can be both exciting and a tad bit overwhelming. We've compiled some of the most frequently asked questions, hoping to bridge the gap between curiosity and clarity. You inspire us, and we want to ensure that your journey with Zeta is smooth and transformative.
 6 | 
 7 | ---
 8 | 
 9 | ## 1. How is Zeta different from PyTorch?
10 | 
11 | **Answer:** First and foremost, we have immense respect for PyTorch and the revolution it has brought to deep learning. However, Zeta is not just another deep learning framework. While PyTorch offers a robust platform for building neural networks from scratch, Zeta aims to make the process of creating State of The Art Models even more effortless and intuitive. 
12 | 
13 | - **Modularity**: Zeta's architecture allows for easily interchangeable modules, making it a breeze for developers to plug and play with different configurations.
14 |   
15 | - **LLMs & Multi-Modality**: We've integrated tools to efficiently harness the power of LLMs and Multi-Modal Foundation Models. This is not just about building a model; it's about building models that can interact, perceive, and reason with diverse data types - be it text, image, or more.
16 |   
17 | - **Enhanced Security and Trust**: Zeta enforces trust boundaries, schema validation, and provides tool activity-level permissions. This ensures that while your models are smart, they're also safe and adhere to set protocols.
18 | 
19 | - **Ease of Use**: Ever felt like going for a serene swim? Using Zeta feels just like that – fluid, intuitive, and without friction. Our pythonic methods, classes, and top-notch error handling guide you every step of the way.
20 | 
21 | - **Performance**: Think of Zeta as the Lamborghini of ML frameworks. It's built for speed, efficiency, and performance. Every single FLOP is put to its best use, ensuring swift model training and inference.
22 | 
23 | In essence, while PyTorch provides the building blocks, Zeta offers a refined, faster, and more intuitive experience to craft and deploy powerful neural networks.
24 | 
25 | ---
26 | 
27 | ## 2. How steep is the learning curve for Zeta, especially for someone accustomed to PyTorch?
28 | 
29 | **Answer:** We designed Zeta keeping both beginners and professionals in mind. If you're familiar with PyTorch, you'll appreciate the similarities in terms of syntax and structure. The added features and modules in Zeta are introduced with clarity and simplicity. With our comprehensive documentation, hands-on examples, and supportive community on [Discord](https://discord.gg/gnWRz88eym), we aim to make your transition smooth and enjoyable.
30 | 
31 | ---
32 | 
33 | ## 3. How does Zeta handle backward compatibility?
34 | 
35 | **Answer:** We understand the importance of backward compatibility, especially when developers invest time and resources into a framework. While we continually strive to innovate and introduce new features, we make sure that changes don't break the functionality of models built on earlier versions. We're committed to ensuring a balance between innovation and stability.
36 | 
37 | ---
38 | 
39 | ## 4. Are there plans for introducing more pre-trained models in Zeta?
40 | 
41 | **Answer:** Absolutely! Our vision with Zeta is not static. We are in the constant pursuit of integrating newer, state-of-the-art pre-trained models. Our goal is to give developers the arsenal they need to break new grounds in machine learning. Stay tuned for more exciting updates!
42 | 
43 | ---
44 | 
45 | ## 5. I'm facing a challenge with Zeta. How can I get help?
46 | 
47 | **Answer:** We're genuinely sorry to hear that, but rest assured, we're here to assist. Our [Discord community](https://discord.gg/gnWRz88eym) is active, and our team, along with fellow developers, are always eager to help. You can also raise an issue or start a discussion on our [Github Page](https://github.com/kyegomez). Remember, challenges are stepping stones to mastery, and we're with you every step of the way.
48 | 
49 | ---
50 | 
51 | Your feedback, questions, and concerns are the winds beneath our wings. Keep them coming, and together, let's shape the future of neural networks with Zeta.


--------------------------------------------------------------------------------
/docs/flywheel.md:
--------------------------------------------------------------------------------
  1 | # The Zeta Flywheel
  2 | 
  3 | 1. **Building a Supportive Community:** Initiate by establishing an engaging and inclusive open-source community for both developers and sales freelancers around Zeta. Regular online meetups, webinars, tutorials, and sales training can make them feel welcome and encourage contributions and sales efforts.
  4 | 
  5 | 2. **Increased Contributions and Sales Efforts:** The more engaged the community, the more developers will contribute to Zeta and the more effort sales freelancers will put into selling Zeta.
  6 | 
  7 | 3. **Improvement in Quality and Market Reach:** More developer contributions mean better quality, reliability, and feature offerings from Zeta. Simultaneously, increased sales efforts from freelancers boost Zeta' market penetration and visibility.
  8 | 
  9 | 4. **Rise in User Base:** As Zeta becomes more robust and more well-known, the user base grows, driving more revenue.
 10 | 
 11 | 5. **Greater Financial Incentives:** Increased revenue can be redirected to offer more significant financial incentives to both developers and salespeople. Developers can be incentivized based on their contribution to Zeta, and salespeople can be rewarded with higher commissions.
 12 | 
 13 | 6. **Attract More Developers and Salespeople:** These financial incentives, coupled with the recognition and experience from participating in a successful project, attract more developers and salespeople to the community.
 14 | 
 15 | 7. **Wider Adoption of Zeta:** An ever-improving product, a growing user base, and an increasing number of passionate salespeople accelerate the adoption of Zeta.
 16 | 
 17 | 8. **Return to Step 1:** As the community, user base, and sales network continue to grow, the cycle repeats, each time speeding up the flywheel.
 18 | 
 19 | 
 20 | ```markdown
 21 |                +---------------------+
 22 |                |   Building a       |
 23 |                |  Supportive        | <--+
 24 |                |   Community        |    |
 25 |                +--------+-----------+    |
 26 |                         |                |
 27 |                         v                |
 28 |                +--------+-----------+    |
 29 |                |   Increased        |    |
 30 |                | Contributions &    |    |
 31 |                |   Sales Efforts    |    |
 32 |                +--------+-----------+    |
 33 |                         |                |
 34 |                         v                |
 35 |                +--------+-----------+    |
 36 |                |   Improvement in   |    |
 37 |                | Quality & Market   |    |
 38 |                |       Reach        |    |
 39 |                +--------+-----------+    |
 40 |                         |                |
 41 |                         v                |
 42 |                +--------+-----------+    |
 43 |                |   Rise in User     |    |
 44 |                |        Base        |    |
 45 |                +--------+-----------+    |
 46 |                         |                |
 47 |                         v                |
 48 |                +--------+-----------+    |
 49 |                |  Greater Financial |    |
 50 |                |     Incentives     |    |
 51 |                +--------+-----------+    |
 52 |                         |                |
 53 |                         v                |
 54 |                +--------+-----------+    |
 55 |                | Attract More        |    |
 56 |                | Developers &       |    |
 57 |                | Salespeople         |    |
 58 |                +--------+-----------+    |
 59 |                         |                |
 60 |                         v                |
 61 |                +--------+-----------+    |
 62 |                |  Wider Adoption of  |    |
 63 |                |       Zeta        |----+
 64 |                +---------------------+
 65 | ```
 66 | 
 67 | 
 68 | # Potential Risks and Mitigations:
 69 | 
 70 | 1. **Insufficient Contributions or Quality of Work**: Open-source efforts rely on individuals being willing and able to spend time contributing. If not enough people participate, or the work they produce is of poor quality, the product development could stall. 
 71 |    * **Mitigation**: Create a robust community with clear guidelines, support, and resources. Provide incentives for quality contributions, such as a reputation system, swag, or financial rewards. Conduct thorough code reviews to ensure the quality of contributions.
 72 | 
 73 | 2. **Lack of Sales Results**: Commission-based salespeople will only continue to sell the product if they're successful. If they aren't making enough sales, they may lose motivation and cease their efforts.
 74 |    * **Mitigation**: Provide adequate sales training and resources. Ensure the product-market fit is strong, and adjust messaging or sales tactics as necessary. Consider implementing a minimum commission or base pay to reduce risk for salespeople.
 75 | 
 76 | 3. **Poor User Experience or User Adoption**: If users don't find the product useful or easy to use, they won't adopt it, and the user base won't grow. This could also discourage salespeople and contributors.
 77 |    * **Mitigation**: Prioritize user experience in the product development process. Regularly gather and incorporate user feedback. Ensure robust user support is in place.
 78 | 
 79 | 4. **Inadequate Financial Incentives**: If the financial rewards don't justify the time and effort contributors and salespeople are putting in, they will likely disengage.
 80 |    * **Mitigation**: Regularly review and adjust financial incentives as needed. Ensure that the method for calculating and distributing rewards is transparent and fair.
 81 | 
 82 | 5. **Security and Compliance Risks**: As the user base grows and the software becomes more complex, the risk of security issues increases. Moreover, as contributors from various regions join, compliance with various international laws could become an issue.
 83 |    * **Mitigation**: Establish strong security practices from the start. Regularly conduct security audits. Seek legal counsel to understand and adhere to international laws and regulations.
 84 | 
 85 | ## Activation Plan for the Flywheel:
 86 | 
 87 | 1. **Community Building**: Begin by fostering a supportive community around Zeta. Encourage early adopters to contribute and provide feedback. Create comprehensive documentation, community guidelines, and a forum for discussion and support.
 88 | 
 89 | 2. **Sales and Development Training**: Provide resources and training for salespeople and developers. Make sure they understand the product, its value, and how to effectively contribute or sell.
 90 | 
 91 | 3. **Increase Contributions and Sales Efforts**: Encourage increased participation by highlighting successful contributions and sales, rewarding top contributors and salespeople, and regularly communicating about the project's progress and impact.
 92 | 
 93 | 4. **Iterate and Improve**: Continually gather and implement feedback to improve Zeta and its market reach. The better the product and its alignment with the market, the more the user base will grow.
 94 | 
 95 | 5. **Expand User Base**: As the product improves and sales efforts continue, the user base should grow. Ensure you have the infrastructure to support this growth and maintain a positive user experience.
 96 | 
 97 | 6. **Increase Financial Incentives**: As the user base and product grow, so too should the financial incentives. Make sure rewards continue to be competitive and attractive.
 98 | 
 99 | 7. **Attract More Contributors and Salespeople**: As the financial incentives and success of the product increase, this should attract more contributors and salespeople, further feeding the flywheel.
100 | 
101 | Throughout this process, it's important to regularly reassess and adjust your strategy as necessary. Stay flexible and responsive to changes in the market, user feedback, and the evolving needs of the community.


--------------------------------------------------------------------------------
/docs/hiring.md:
--------------------------------------------------------------------------------
 1 | ## **Join the Swarm Revolution: Advancing Humanity & Prosperity Together!**
 2 | 
 3 | ### **The Next Chapter of Humanity's Story Begins Here...**
 4 | 
 5 | At Zeta, our mission transcends mere technological advancement. We envision a world where every individual can leverage the power of AI to uplift their lives, communities, and our shared future. If you are driven by the passion to revolutionize industries, to scale the heights of innovation, and believe in earning your fair share for every ounce of your dedication – you might be the one we're looking for.
 6 | 
 7 | ---
 8 | 
 9 | ### **Why Zeta?** 
10 | 
11 | #### **For the Ambitious Spirit**:
12 | - **Opportunity Beyond Boundaries**: Just as Fuller believed in the infinite opportunities of America, we believe in the limitless potential of raw Humantiy.
13 |   
14 | #### **For the Maverick**:
15 | - **Unprecedented Independence**: Like the Fuller salesmen, our team members have the autonomy to sculpt their roles, timelines, and outcomes. Here, you’re the captain of your ship.
16 | 
17 | #### **For the Avid Learner**:
18 | - **Continuous Learning & Growth**: Dive deep into the realms of AI, distributed systems, and customer success methodologies. We offer training, mentorship, and a platform to sharpen your skills.
19 | 
20 | #### **For the High Achiever**:
21 | - **Rewarding Compensation**: While the sky is the limit for your innovations, so is your earning potential. Prosper with performance-based rewards that reflect your dedication.
22 | 
23 | #### **For the Community Builder**:
24 | - **Culture of Unity & Innovation**: At Zeta, you’re not just an employee; you’re a pivotal part of our mission. Experience camaraderie, collaboration, and a shared purpose that binds us together.
25 | 
26 | #### **For the Visionary**:
27 | - **Work on the Cutting-Edge**: Be at the forefront of AI and technology. Shape solutions that will define the next era of human history.
28 | 
29 | ---
30 | 
31 | ### **Benefits of Joining Zeta**:
32 | 
33 | 1. **Advance Humanity**: Play an instrumental role in democratizing technology for all.
34 | 2. **Financial Prosperity**: Harness a compensation structure that grows with your achievements.
35 | 3. **Flexible Work Environment**: Customize your workspace, schedule, and workstyle.
36 | 4. **Global Network**: Collaborate with some of the brightest minds spanning continents.
37 | 5. **Personal Development**: Regular workshops, courses, and seminars to fuel your growth.
38 | 6. **Health & Wellness**: Comprehensive health benefits and well-being programs.
39 | 7. **Ownership & Equity**: As we grow, so does your stake and impact in our organization.
40 | 8. **Retreats & Team Building**: Forge bonds beyond work in exotic locations globally.
41 | 9. **Customer Success Impact**: Directly experience the joy of solving real-world challenges for our users.
42 | 
43 | ---
44 | 
45 | ### **Positions Open**:
46 | 
47 | - **AI & Swarm Engineers**: Architect, design, and optimize the swarm systems powering global innovations.
48 | 
49 | ---
50 | 
51 | ### **Your Invitation to the Future**:
52 | If you resonate with our vision of blending technological marvels with human brilliance, of creating a prosperous world where every dream has the wings of AI – we invite you to join us on this extraordinary journey.
53 | 
54 | **Are you ready to create history with Zeta?**
55 | 
56 | ---
57 | 
58 | **Apply Now and Let’s Push Our People Further!**
59 | 
60 | ---


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Zeta Docs
 2 | 
 3 | Welcome to Zeta's Documentation!
 4 | 
 5 | Zeta is a modular framework that enables for seamless, reliable, and fluid creation of zetascale AI models.
 6 | 
 7 | ## Zeta
 8 | 
 9 | <!-- ![Zeta Banner](docs/assets/img/zetascale.png) -->
10 | 
11 | Zeta provides you with reliable, high performance, and fast modular building blocks for building zeta scale neural nets at lightspeed with minimal code and a pythonic API. 
12 | 
13 | [Click here for Zeta Documentation →](zeta/)
14 | 
15 | 
16 | ## Examples
17 | 
18 | Check out Zeta examples for building agents, data retrieval, and more.
19 | 
20 | [Checkout Zeta examples →](examples/)
21 | 


--------------------------------------------------------------------------------
/docs/metric.md:
--------------------------------------------------------------------------------
1 | # The Golden Metric: 
2 | 
3 | * We need to figure out a single metric that determines if we're accomplishing our goal with zeta which is to build zetascale superintelligent AI models as fast as possible with minimal code.
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | 
3 | <!--https://squidfunk.github.io/mkdocs-material/customization/#overriding-blocks-->
4 | 
5 | {% block announce %}
6 |   <div style="text-align:center">
7 |     <a href="https://github.com/kyegomez/zeta">Star and contribute</a> to Zeta on GitHub!
8 |   </div>
9 | {% endblock %}


--------------------------------------------------------------------------------
/docs/purpose.md:
--------------------------------------------------------------------------------
 1 | # Zeta's Purpose
 2 | 
 3 | 
 4 | Eevery once in a while, a revolutionary project comes along that changes everything.
 5 | 
 6 | A landscape cluttered by rigid frameworks, plagued by inefficiencies, and where developers - our brightest minds - are bogged down by limitations.
 7 | 
 8 | Now, imagine a world where harnessing the power of state-of-the-art models isn't just possible... it's simple. A world where efficiency doesn’t sacrifice safety, and where your ideas are bounded only by your imagination. We should be living in this world. But we aren't.
 9 | 
10 | 
11 | Because Zeta is what's missing.
12 | 
13 | 
14 | The challenge? Creating a framework that's not just another tool, but a revolution.
15 | 
16 | To bridge this gap, one would need to optimize at the foundational level, prioritize user experience, and introduce a design philosophy that future-proofs. It's colossal. And until now, no one's even come close.
17 | 
18 | 
19 | But there’s an enormous opportunity here. An opportunity that promises not just recognition but the power to redefine an industry. And, the key to unlocking this future? It's been with us all along.
20 | 
21 | 
22 | Insight.
23 | 
24 | 
25 | Introducing... Zeta.
26 | 
27 | 
28 | Our secret? Fluidity.
29 | 
30 | It’s a philosophy that values modularity, reliability, usability, and unmatched speed. 
31 | 
32 | But more than that, it's a commitment to evolution, to pushing boundaries, to never settling.
33 | 
34 | 
35 | Why are we the best to execute this vision? 
36 | 
37 | Because we've been there from the start. 
38 | 
39 | We've seen the challenges, felt the frustrations, and now, we're poised to lead the revolution. 
40 | 
41 | We’ve done it before, and with Zeta, we’re doing it again.
42 | 
43 | 
44 | Zeta isn’t just the next step. It's a leap into the future.
45 | 
46 | Zeta is the future of AI.
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/roadmap.md:
--------------------------------------------------------------------------------
  1 | 
  2 | **[Zeta's 3-Step Master Plan for Perfecting Multi-Modality LLMs]**
  3 | 
  4 | ---
  5 | 
  6 | **1. Refinement and Excellence: Perfecting the Framework**
  7 |     - **[Objective]**: To develop Zeta into the most sophisticated, yet intuitively simple framework for building Multi-Modality LLMs.
  8 | 
  9 |     - **[Strategies]**
 10 |         - **Zeta Innovation Labs**: 
 11 |             * Create a dedicated team of experts who exclusively focus on refining the foundational modules and blocks.
 12 |             * Prioritize research in areas like advanced self-supervised learning, multi-modal integration, and zero-shot learning.
 13 |         - **Modularity Focus**:
 14 |             * Develop plug-and-play modules that allow developers to effortlessly incorporate various data types (text, image, video, audio) into their LLMs.
 15 |             * Standardize the blocks ensuring consistent performance, error-handling, and interoperability.
 16 |         - **Performance Optimization**:
 17 |             * Collaborate with hardware manufacturers to ensure that Zeta is perfectly optimized for cutting-edge GPUs, TPUs, and other specialized hardware. 
 18 |             * Roll out regular updates to keep the framework at the forefront of performance.
 19 | 
 20 | ---
 21 | 
 22 | **2. User-Centric Development: Making Zeta Intuitive**
 23 |     - **[Objective]**: Ensure that every feature, tool, and module in Zeta aligns with the principle of making LLM creation simpler and more efficient.
 24 | 
 25 |     - **[Strategies]**
 26 |         - **Zeta Academy**:
 27 |             * Host frequent workshops and webinars targeted at educating users on harnessing the power of Zeta's multi-modality LLM features.
 28 |             * Create a vast library of tutorials, ranging from beginner to advanced, with real-world examples of LLM implementation.
 29 |         - **Interactive GUI for LLM Design**:
 30 |             * Develop a visual interface where users can drag-and-drop modules, visualize their LLM architecture, and see real-time performance metrics.
 31 |         - **Feedback Loops**:
 32 |             * Create a robust system to collect and implement feedback. Users should feel like they’re co-creating Zeta.
 33 |             * Launch a beta program where selected developers can test new features and provide insights.
 34 | 
 35 | ---
 36 | 
 37 | **3. Scaling and Outreach: From the Labs to the World**
 38 |     - **[Objective]**: Make Zeta the de facto choice for developers worldwide aiming to craft state-of-the-art Multi-Modality LLMs.
 39 | 
 40 |     - **[Strategies]**
 41 |         - **Zeta Ambassadors**:
 42 |             * Identify and collaborate with top AI researchers and practitioners globally, making them the face and voice of Zeta in their communities.
 43 |         - **Strategic Partnerships**:
 44 |             * Work closely with major tech institutions, universities, and platforms to integrate Zeta into their curriculum or platforms.
 45 |             * Create an API gateway for seamless integration of Zeta with other popular machine learning and data processing platforms.
 46 |         - **Global Challenges & Competitions**:
 47 |             * Organize worldwide LLM challenges, where developers use Zeta to solve real-world problems, bringing attention to both the problems and the capabilities of Zeta.
 48 | 
 49 | ---
 50 | 
 51 | 
 52 | In every tool, in every line of code, in every module of Zeta, you'll find our relentless pursuit of excellence. But remember, at its core, 
 53 | 
 54 | Zeta isn't about us,
 55 | 
 56 | it's about you, the creator. 
 57 | 
 58 | It's about giving you the power, the simplicity, and the edge to redefine the boundaries of what's possible. 
 59 | 
 60 | With Zeta, we’re not just building a tool; we're crafting the future. 
 61 | 
 62 | A future we're eager to see through your eyes.
 63 | 
 64 | 
 65 | 
 66 | 
 67 | ------
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | **[Zeta's 3-Step Master Plan]**
 92 | 
 93 | **1. Cultivate an Ecosystem of Innovation**
 94 |     - **[Objective]**: Establish an environment where creativity and innovation are paramount.
 95 |     
 96 |     - **[Strategies]**
 97 |         - **Education & Outreach**: 
 98 |             * Launch a series of free online courses, workshops, and webinars to educate developers on the capabilities and advantages of Zeta.
 99 |             * Partner with top universities and institutions, offering them early access and integrations, fostering a new generation of developers natively trained on Zeta.
100 |         - **Zeta Labs**: 
101 |             * Open a research lab committed to pushing the boundaries of what neural networks can achieve.
102 |             * Provide grants, resources, and mentorship to promising projects and startups that choose to build with Zeta.
103 |         - **Open Source Philosophy**:
104 |             * Release parts of Zeta's core codebase to the public, inviting developers worldwide to contribute, refine, and expand upon the framework.
105 |             * Organize hackathons and coding challenges to galvanize the community around real-world problems that Zeta can solve.
106 | 
107 | ---
108 | 
109 | **2. Seamless Integration & Scalability**
110 |     - **[Objective]**: Make Zeta the easiest, most efficient, and most scalable framework to integrate into any project or system.
111 |     
112 |     - **[Strategies]**
113 |         - **Developer Toolkits**:
114 |             * Release a suite of tools, plugins, and libraries for all major development platforms and languages, ensuring Zeta is accessible to everyone, everywhere.
115 |         - **Zeta Cloud**:
116 |             * Offer a cloud solution that allows developers to run, test, and deploy their neural networks seamlessly. This ensures businesses of all sizes can scale without friction.
117 |         - **Partnerships**:
118 |             * Collaborate with major tech companies, ensuring Zeta's native support on platforms like AWS, Google Cloud, and Azure.
119 |             * Establish alliances with hardware manufacturers, optimizing Zeta for the latest GPUs and Neural Network Processors.
120 | 
121 | ---
122 | 
123 | **3. Build a Community and Cultivate Trust**
124 |     - **[Objective]**: Establish Zeta as more than a tool – it should be a movement, a community of forward-thinkers who believe in redefining the boundaries of neural network capabilities.
125 |     
126 |     - **[Strategies]**
127 |         - **ZetaCon**:
128 |             * Annually host a global conference (both offline and online) bringing together the brightest minds in the AI and machine learning sector. It will be a platform for networking, knowledge-sharing, and showcasing the best of what's been built using Zeta.
129 |         - **Transparency Reports**:
130 |             * Release regular updates about Zeta's development, challenges, successes, and roadmap.
131 |             * Actively gather feedback, ensuring the community feels heard and that their insights are valued.
132 |         - **Zeta Academy**:
133 |             * Create a platform where developers can share their projects, tutorials, and courses about Zeta. Recognize and reward the best contributions to foster a sense of ownership and pride within the community.
134 | 
135 | ---
136 | 
137 | This isn't just a roadmap. It's our promise, our commitment. Because at the end of the day, it's not about the lines of code we write. It's about the lives we change, the innovations we inspire, and the future we create. And with Zeta, we believe that future is brighter than ever. Let's build it together.
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | :root {
2 |     --md-primary-fg-color:        #8315F9;
3 |     --md-accent-fg-color:         #00FFCE;
4 |   }


--------------------------------------------------------------------------------
/docs/zeta/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/docs/zeta/.DS_Store


--------------------------------------------------------------------------------
/docs/zeta/index.md:
--------------------------------------------------------------------------------
 1 | The Zeta framework provides developers with the ability to create State of The Art Models as simply and seamlessly as possible through **Modularity**, **Reliability**, **Use-Ability**, and **Speed**
 2 | 
 3 | Zeta not only helps developers harness the potential of LLMs and Multi-Modal Foundation Models but also enforces trust boundaries, schema validation, and tool activity-level permissions. By doing so, Zeta maximizes LLMs’ reasoning while adhering to strict policies regarding their capabilities.
 4 | 
 5 | Zeta’s design philosophy is based on the following tenets:
 6 | 
 7 | 1. **Use-Ability**: Utilizing Zeta should feel like going for a swim in the ocean, seamless and fluid with pythonic methods and classes and error handling that signifies what steps to take next.
 8 | 2. **Reliability**: Zeta puts every FLOP to work by harnessing ultra-reliable and high-performance designs for all functions and classes
 9 | 3. **Speed**: Zeta is like the Lamborghini of ML Frames with simply unparalled speed.
10 | 
11 | ## Quick Starts
12 | 
13 | ### Using pip
14 | 
15 | Install **zeta**
16 | 
17 | ```
18 | pip3 install zeta 
19 | ```
20 | 
21 | ## Unleash FlashAttention
22 | With Zeta, you can unleash the best and highest performance attention mechanisms like `FlashAttention` and `MultiQueryAttention`, here's an example with Flash Attention
23 | 
24 | ```python
25 | import torch
26 | from zeta import FlashAttention
27 | 
28 | q = torch.randn(2, 4, 6, 8)
29 | k = torch.randn(2, 4, 10, 8)
30 | v = torch.randn(2, 4, 10, 8)
31 | 
32 | attention = FlashAttention(causal=False, dropout=0.1, flash=False)
33 | output = attention(q, k, v)
34 | 
35 | print(output.shape) 
36 | ```
37 | 
38 | ## Unleash GPT-4 
39 | On top of the SOTA Attention mechanisms we provide, we also provide rough implementation of some of the best neural nets ever made like `GPT4`, here's an example on how to utilize our implementation of GPT-4
40 | 
41 | ```python
42 | import torch
43 | from zeta import GPT4, GPT4MultiModal
44 | 
45 | #text
46 | text = torch.randint(0, 256, (1, 1024)).cuda()
47 | img = torch.randn(1, 3, 256, 256)
48 | 
49 | gpt4_language = GPT4()
50 | 
51 | gpt4_language(x)
52 | 
53 | #multimodal GPT4
54 | 
55 | gpt4_multimodal = GPT4MultiModal()
56 | gpt4_multimodal_output = gpt4_multimodal(text, img)
57 | 
58 | ```
59 | 
60 | 


--------------------------------------------------------------------------------
/docs/zeta/nn/architecture/decoder.md:
--------------------------------------------------------------------------------
  1 | # Decoder Class Documentation
  2 | 
  3 | Module/Class Name: Decoder
  4 | 
  5 | ```python
  6 | class Decoder(AttentionLayers):
  7 |     def __init__(self, **kwargs):
  8 |         assert 'causal' not in kwargs, 'cannot set causality on decoder'
  9 |         super().__init__(causal=True, **kwargs)
 10 | ```
 11 | 
 12 | ## Overview and Introduction
 13 | 
 14 | The `Decoder` class is a component of the Zeta library designed for creating a decoder model with multiple attention layers. It extends the functionality of the `AttentionLayers` class to enable the construction of a decoder architecture. The decoder is a key component in various sequence-to-sequence tasks, such as machine translation, text generation, and more.
 15 | 
 16 | The decoder employs multi-head self-attention mechanisms and feed-forward networks to transform input sequences into meaningful output sequences while maintaining the causal property. It is particularly suitable for autoregressive tasks, where each step depends only on previous steps in the sequence.
 17 | 
 18 | ## Class Definition
 19 | 
 20 | ```python
 21 | class Decoder(AttentionLayers):
 22 |     def __init__(self, **kwargs):
 23 |         assert 'causal' not in kwargs, 'cannot set causality on decoder'
 24 |         super().__init__(causal=True, **kwargs)
 25 | ```
 26 | 
 27 | The `Decoder` class inherits from the `AttentionLayers` class and introduces the causality constraint by setting `causal=True`. It is initialized with various parameters that configure the architecture and behavior of the decoder.
 28 | 
 29 | ## Parameters
 30 | 
 31 | The `Decoder` class constructor accepts various parameters that control the behavior of the decoder. The most important parameters are inherited from the `AttentionLayers` class, and additional parameters specific to the decoder are introduced. Below is a summary of the parameters:
 32 | 
 33 | - `dim` (int): Dimensionality of the model.
 34 | - `depth` (int): Number of decoder layers.
 35 | - `heads` (int): Number of parallel attention heads.
 36 | - `cross_attend` (bool): Enable cross-attention between input and output sequences.
 37 | - `sandwich_coef` (int): Coefficient for configuring sandwich normalization.
 38 | - `residual_attn` (bool): Enable residual connection for self-attention layers.
 39 | - `cross_residual_attn` (bool): Enable residual connection for cross-attention layers.
 40 | - `layer_dropout` (float): Dropout probability applied to each layer.
 41 | - ... (additional parameters inherited from `AttentionLayers`)
 42 | 
 43 | ## Functionality and Usage
 44 | 
 45 | The `Decoder` class extends the functionality of the `AttentionLayers` class to specifically create decoder models. It employs multi-head self-attention mechanisms and feed-forward networks to process input sequences and generate output sequences.
 46 | 
 47 | ### Initialization
 48 | 
 49 | To create a decoder instance, you can use the following code:
 50 | 
 51 | ```python
 52 | from zeta import Decoder
 53 | 
 54 | decoder = Decoder(
 55 |     dim=512,
 56 |     depth=6,
 57 |     heads=8,
 58 |     causal=True,
 59 |     cross_attend=True,
 60 |     residual_attn=True,
 61 |     layer_dropout=0.1
 62 | )
 63 | ```
 64 | 
 65 | ### Forward Pass
 66 | 
 67 | The forward pass of the decoder can be performed using the following code:
 68 | 
 69 | ```python
 70 | output = decoder(input_sequence, context=context_sequence, mask=mask_sequence, context_mask=context_mask_sequence)
 71 | ```
 72 | 
 73 | Here, `input_sequence` represents the input sequence to the decoder, `context_sequence` represents the context sequence for cross-attention (if enabled), `mask_sequence` is an optional mask to ignore certain elements in the input, and `context_mask_sequence` is an optional mask for the context sequence.
 74 | 
 75 | ### Return Intermediates
 76 | 
 77 | If desired, you can also obtain intermediate outputs at each layer using the `return_hiddens` parameter:
 78 | 
 79 | ```python
 80 | output, intermediates = decoder(input_sequence, context=context_sequence, mask=mask_sequence, context_mask=context_mask_sequence, return_hiddens=True)
 81 | ```
 82 | 
 83 | The `intermediates` object will contain information about intermediate hidden states and attention outputs for each layer.
 84 | 
 85 | ## Mathematical Formula
 86 | 
 87 | The `Decoder` class is built upon the foundation of multi-head self-attention and feed-forward networks. It can be summarized using the following mathematical formula:
 88 | 
 89 | 1. Input Embedding: \( X \)
 90 | 2. Multi-Head Self-Attention: \( A = \text{MultiHeadAttention}(X) \)
 91 | 3. Feed-Forward Network: \( Y = \text{FeedForward}(A) \)
 92 | 4. Residual Connection: \( Z = X + Y \)
 93 | 
 94 | The above formula represents the basic forward pass of each layer in the decoder. The decoder iteratively applies these operations across its layers to generate meaningful output sequences while maintaining causal dependencies.
 95 | 
 96 | ## References
 97 | 
 98 | - [Zeta Library Documentation](https://example.com/zeta/docs)
 99 | - [Attention Is All You Need](https://arxiv.org/abs/1706.03762)
100 | - [PAR: Prompted Attention](https://arxiv.org/abs/2207.04503)
101 | ```
102 | 
103 | This documentation provides an in-depth overview of the `Decoder` class in the Zeta library. It covers its purpose, parameters, usage examples, and includes a simplified mathematical formula to illustrate its functionality.


--------------------------------------------------------------------------------
/docs/zeta/nn/architecture/transformer.md:
--------------------------------------------------------------------------------
  1 | # Transformer Documentation
  2 | 
  3 | ## Overview
  4 | 
  5 | The `Transformer` class in the Zeta library is a versatile deep learning architecture that combines attention mechanisms with feedforward neural networks for various natural language processing tasks, such as language modeling, machine translation, and text generation. The Transformer architecture was introduced in the paper "Attention is All You Need" by Vaswani et al.
  6 | 
  7 | The main purpose of the `Transformer` class is to provide a flexible and configurable interface for creating transformer-based models for sequence-to-sequence tasks. The class allows users to specify the number of tokens, maximum sequence length, attention layers, embeddings, and other parameters necessary for creating and training transformer models.
  8 | 
  9 | The Transformer class supports both autoregressive and non-autoregressive training settings and includes features such as relative positional biases, rotary positional embeddings, memory tokens, and more.
 10 | 
 11 | ## Class Signature
 12 | 
 13 | ```python
 14 | class Transformer(nn.Module):
 15 |     def __init__(
 16 |         self,
 17 |         *,
 18 |         num_tokens,
 19 |         max_seq_len,
 20 |         attn_layers,
 21 |         embedding_provider: BaseEmbedding,
 22 |         emb_dim = None,
 23 |         max_mem_len = 0.,
 24 |         shift_mem_down = 0,
 25 |         emb_dropout = 0.,
 26 |         post_emb_norm = False,
 27 |         num_memory_tokens = None,
 28 |         tie_embedding = False,
 29 |         logits_dim = None,
 30 |         use_abs_pos_emb = True,
 31 |         scaled_sinu_pos_emb = False,
 32 |         l2norm_embed = False,
 33 |         emb_frac_gradient = 1.
 34 |     )
 35 | ```
 36 | 
 37 | ## Parameters
 38 | 
 39 | - `num_tokens` (int): The total number of tokens in the vocabulary.
 40 | - `max_seq_len` (int): The maximum length of the input sequences.
 41 | - `attn_layers` (AttentionLayers): An instance of the `AttentionLayers` class representing the core attention layers of the transformer.
 42 | - `embedding_provider` (BaseEmbedding): An instance of the `BaseEmbedding` class providing token embeddings.
 43 | - `emb_dim` (int, optional): The embedding dimension. Default is `None`, in which case `emb_dim` is set to the same dimension as the `attn_layers`.
 44 | - `max_mem_len` (float, optional): Maximum memory length for memory tokens. Default is `0.0`, indicating no memory tokens.
 45 | - `shift_mem_down` (int, optional): Number of positions to shift memory tokens down in each layer. Default is `0`.
 46 | - `emb_dropout` (float, optional): Dropout rate applied to the embedding layer. Default is `0.0`.
 47 | - `post_emb_norm` (bool, optional): Apply layer normalization to the post-embedding inputs. Default is `False`.
 48 | - `num_memory_tokens` (int, optional): Number of memory tokens to use. Default is `None`, indicating no memory tokens.
 49 | - `tie_embedding` (bool, optional): Tie the output projection weights with the input token embeddings. Default is `False`.
 50 | - `logits_dim` (int, optional): Dimensionality of the output logits. Default is `None`, indicating that it's the same as `num_tokens`.
 51 | - `use_abs_pos_emb` (bool, optional): Use absolute positional embeddings. Default is `True`.
 52 | - `scaled_sinu_pos_emb` (bool, optional): Use scaled sinusoidal positional embeddings. Default is `False`.
 53 | - `l2norm_embed` (bool, optional): Apply L2 normalization to the embeddings. Default is `False`.
 54 | - `emb_frac_gradient` (float, optional): Fraction of the gradient that should go to the embedding. Default is `1.0`.
 55 | 
 56 | ## Methods
 57 | 
 58 | ### `forward`
 59 | 
 60 | ```python
 61 | def forward(
 62 |     self,
 63 |     x,
 64 |     return_embeddings = False,
 65 |     return_logits_and_embeddings = False,
 66 |     return_intermediates = False,
 67 |     mask = None,
 68 |     return_mems = False,
 69 |     return_attn = False,
 70 |     mems = None,
 71 |     pos = None,
 72 |     prepend_embeds = None,
 73 |     sum_embeds = None,
 74 |     **kwargs
 75 | )
 76 | ```
 77 | 
 78 | This method computes the forward pass of the transformer.
 79 | 
 80 | #### Parameters
 81 | 
 82 | - `x` (torch.Tensor): Input tensor representing the sequence of token indices.
 83 | - `return_embeddings` (bool, optional): If `True`, return only the embeddings without applying the output projection. Default is `False`.
 84 | - `return_logits_and_embeddings` (bool, optional): If `True`, return both the logits and embeddings. Default is `False`.
 85 | - `return_intermediates` (bool, optional): If `True`, return intermediate attention values. Default is `False`.
 86 | - `mask` (torch.Tensor, optional): Attention mask indicating positions to be masked. Default is `None`.
 87 | - `return_mems` (bool, optional): If `True`, return updated memory tokens. Default is `False`.
 88 | - `return_attn` (bool, optional): If `True`, return attention maps. Default is `False`.
 89 | - `mems` (list of torch.Tensor, optional): Memory tokens for each layer. Default is `None`.
 90 | - `pos` (torch.Tensor, optional): External positional embeddings. Default is `None`.
 91 | - `prepend_embeds` (torch.Tensor, optional): Prepend embeddings to the input sequence. Default is `None`.
 92 | - `sum_embeds` (torch.Tensor, optional): Sum external embeddings to the input sequence. Default is `None`.
 93 | - `kwargs`: Additional keyword arguments passed to the attention layers.
 94 | 
 95 | #### Returns
 96 | 
 97 | The method returns the output logits or embeddings based on the specified return options.
 98 | 
 99 | ## Usage Examples
100 | 
101 | Here are three usage examples of the `Transformer` class from the Zeta library:
102 | 
103 | ```python
104 | from zeta.nn import Transformer
105 | 
106 | # Example 1: Basic Usage
107 | transformer = Transformer(
108 |     num_tokens=10000,
109 |     max_seq_len=256,
110 |     attn_layers=attn_layers_instance,
111 |     embedding_provider=embedding_provider_instance
112 | )
113 | logits = transformer(input_tokens)
114 | 
115 | # Example 2: Return Embeddings
116 | embeddings = transformer(input_tokens, return_embeddings=True)
117 | 
118 | # Example 3: Return Intermediate Attention Maps
119 | logits, attn_maps = transformer(input_tokens, return_attn=True)
120 | ```
121 | 
122 | In these examples, replace `attn_layers_instance` and `embedding_provider_instance` with actual instances of `AttentionLayers` and `BaseEmbedding`, respectively, and `input_tokens` with your input tensor containing token indices.
123 | 
124 | ## Mathematical Formula
125 | 
126 | The mathematical formula for the `Transformer` class can be represented as follows:
127 | 
128 | ```
129 | Input -> Embedding -> Post-embedding Norm -> Embedding Dropout -> Project Embedding -> Attention Layers -> Layer Normalization -> To Logits/Embeddings
130 | ```
131 | 
132 | In this formula, "Attention Layers" represents the core attention mechanism of the transformer, which includes self-attention and feedforward neural networks.
133 | 
134 | ## References
135 | 
136 | - Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., ... & Polosukhin, I. (2017). Attention is All You Need. Advances in neural information processing systems, 30.
137 | - Zeta Library: Link to the official documentation of the Zeta library.
138 | - Insert any additional references or resources as needed.
139 | ```
140 | 
141 | 


--------------------------------------------------------------------------------
/docs/zeta/nn/attention/base.md:
--------------------------------------------------------------------------------
 1 | # BaseAttention Abstract Class
 2 | ============================
 3 | 
 4 | The `BaseAttention` class is an abstract base class that defines the interface for all attention mechanisms. It includes the basic structure and methods that all attention mechanisms should have.
 5 | 
 6 | ```python
 7 | from abc import  abstractmethod
 8 | import torch.nn as nn
 9 | 
10 | class BaseAttention(nn.Module):
11 |     @abstractmethod
12 |     def __init__(self, dim):
13 |         super().__init__()
14 |         self.dim = dim
15 | 
16 | 
17 |     @abstractmethod
18 |     def forward(self, x, context=None, mask=None):
19 |         pass
20 | ```
21 | 
22 | 
23 | ## Usage
24 | -----------------------
25 | 
26 | The `FlashAttentionTwo` class extends the `BaseAttention` abstract base class and implements the specific attention mechanism.
27 | 
28 | ```python
29 | class FlashAttentionTwo(BaseAttention):
30 |     def __init__(
31 |         self,
32 |         *,
33 |         dim,
34 |         heads = 8,
35 |         dim_head = 64,
36 |         causal = False,
37 |         q_bucket_size = 512,
38 |         k_bucket_size = 1024,
39 |         parallel = False,
40 |         mixed_precision = False
41 |     ):
42 |         super().__init__(dim, heads, dim_head)
43 |         self.causal = causal
44 |         self.parallel = parallel
45 |         self.mixed_precision = mixed_precision
46 |         self.q_bucket_size = q_bucket_size
47 |         self.k_bucket_size = k_bucket_size
48 |         # ... rest of the implementation ...
49 | 
50 |     def forward(
51 |         self,
52 |         x,
53 |         context = None,
54 |         mask = None,
55 |         q_bucket_size = None,
56 |         k_bucket_size = None,
57 |     ):
58 |         # ... implementation of the forward method ...
59 | ```
60 | 
61 | 
62 | ## Rules for Using the BaseAttention Class
63 | ---------------------------------------
64 | 
65 | 1.  Any class that extends the `BaseAttention` class must implement the `forward` method. This method defines how the attention mechanism operates.
66 | 
67 | 2.  The `__init__` method of the `BaseAttention` class takes three parameters: `dim`, `heads`, and `dim_head`. Any class that extends `BaseAttention` should pass these parameters to the `__init__` method of the base class.
68 | 
69 | 3.  The `forward` method of the `BaseAttention` class takes three parameters: `x`, `context`, and `mask`. Any class that extends `BaseAttention` should include these parameters in its `forward` method.
70 | 
71 | ---
72 | 
73 | ## Example of Using the FlashAttentionTwo Class
74 | --------------------------------------------
75 | 
76 | ```python
77 | from zeta import FlashAttentionTwo
78 | 
79 | # Create an instance of the FlashAttentionTwo class
80 | attention = FlashAttentionTwo(dim=512, heads=8, dim_head=64)
81 | 
82 | # Create some input data
83 | x = torch.randn(1, 10, 512)
84 | 
85 | # Apply the attention mechanism
86 | out = attention(x)
87 | ```
88 | 
89 | 
90 | In this example, we first create an instance of the `FlashAttentionTwo` class. We then create some input data `x` and apply the attention mechanism to this data by calling the `forward` method of the `attention` instance.


--------------------------------------------------------------------------------
/docs/zeta/nn/attention/flash2.md:
--------------------------------------------------------------------------------
  1 | # Module Name: FlashAttentionTwo
  2 | 
  3 | The `FlashAttentionTwo` class is a PyTorch module that implements a variant of the attention mechanism, which is a key component in many state-of-the-art models in natural language processing and other fields. This class is designed to be memory-efficient and optionally supports parallel computation and mixed precision for improved performance.
  4 | 
  5 | ## Class Definition
  6 | ----------------
  7 | 
  8 | ```python
  9 | class FlashAttentionTwo(nn.Module):
 10 |     def __init__(
 11 |         self,
 12 |         *,
 13 |         dim,
 14 |         heads = 8,
 15 |         dim_head = 64,
 16 |         causal = False,
 17 |         q_bucket_size = 512,
 18 |         k_bucket_size = 1024,
 19 |         parallel = False,
 20 |         mixed_precision = False
 21 |     ):
 22 | ```
 23 | 
 24 | ---
 25 | 
 26 | ### Parameters
 27 | 
 28 | -   `dim` (int): The dimensionality of the input data.
 29 | -   `heads` (int, optional): The number of attention heads. Default is 8.
 30 | -   `dim_head` (int, optional): The dimensionality of each attention head. Default is 64.
 31 | -   `causal` (bool, optional): If True, the attention mechanism is causal. Default is False.
 32 | -   `q_bucket_size` (int, optional): The bucket size for the query in the attention mechanism. Default is 512.
 33 | -   `k_bucket_size` (int, optional): The bucket size for the key in the attention mechanism. Default is 1024.
 34 | -   `parallel` (bool, optional): If True, the computation is performed in parallel across multiple GPUs. Default is False.
 35 | -   `mixed_precision` (bool, optional): If True, the computation is performed in mixed precision for improved performance. Default is False.
 36 | 
 37 | -----
 38 | 
 39 | ### Methods
 40 | 
 41 | #### `forward`
 42 | 
 43 | ```
 44 | def forward(
 45 |     self,
 46 |     x,
 47 |     context = None,
 48 |     mask = None,
 49 |     q_bucket_size = None,
 50 |     k_bucket_size = None,
 51 | ):
 52 | ```
 53 | 
 54 | Performs the forward pass of the attention mechanism.
 55 | 
 56 | ##### Parameters
 57 | 
 58 | -   `x` (Tensor): The input data.
 59 | -   `context` (Tensor, optional): The context for the attention mechanism. If not provided, the input data `x` is used as the context.
 60 | -   `mask` (Tensor, optional): An optional mask for the attention mechanism.
 61 | -   `q_bucket_size` (int, optional): The bucket size for the query in the attention mechanism. If not provided, the value specified during initialization is used.
 62 | -   `k_bucket_size` (int, optional): The bucket size for the key in the attention mechanism. If not provided, the value specified during initialization is used.
 63 | 
 64 | ---
 65 | 
 66 | ##### Returns
 67 | 
 68 | -   `out` (Tensor): The output of the attention mechanism.
 69 | 
 70 | 
 71 | ## Usage Examples
 72 | --------------
 73 | 
 74 | ### Example 1: Basic Usage
 75 | 
 76 | ```python
 77 | from torch import nn
 78 | from zeta import FlashAttentionTwo
 79 | 
 80 | model = FlashAttentionTwo(dim=512)
 81 | x = torch.randn(1, 10, 512)
 82 | out = model(x)
 83 | ```
 84 | 
 85 | Copy code
 86 | 
 87 | ### Example 2: Using a Mask
 88 | 
 89 | ```python
 90 | from torch import nn
 91 | from zeta import FlashAttentionTwo
 92 | 
 93 | model = FlashAttentionTwo(dim=512)
 94 | x = torch.randn(1, 10, 512)
 95 | mask = torch.ones(1, 10)
 96 | out = model(x, mask=mask)
 97 | ```
 98 | 
 99 | ----
100 | 
101 | ### Example 3: Using a Context
102 | 
103 | ```python
104 | from torch import nn
105 | from zeta import FlashAttentionTwo
106 | 
107 | model = FlashAttentionTwo(dim=512)
108 | x = torch.randn(1, 10, 512)
109 | context = torch.randn(1, 10, 512)
110 | out = model(x, context=context)
111 | ```
112 | 
113 | 
114 | ## Mathematical Formula
115 | --------------------
116 | 
117 | The attention mechanism can be described by the following formula:
118 | 
119 | ![Attention Formula](https://wikimedia.org/api/rest_v1/media/math/render/svg/0de1e8f5c8f6e3c3e1f8b3c89a6a2b7b187a5d3f)
120 | 
121 | where Q, K, and V are the query, key, and value, respectively. The softmax function ensures that the weights sum to 1, and the dot product of the weights and the value gives the output of the attention mechanism.
122 | 
123 | 
124 | ### Additional Information
125 | ----------------------
126 | 
127 | The `FlashAttentionTwo` class is designed to be memory-efficient and optionally supports parallel computation and mixed precision for improved performance.
128 | 
129 | -   The `parallel` parameter allows the computation to be performed in parallel across multiple GPUs. This can significantly speed up the computation for large models or large datasets.
130 | 
131 | -   The `mixed_precision` parameter allows the computation to be performed in mixed precision. This means that some operations are performed in lower precision (e.g., float16) and some in higher precision (e.g., float32). This can significantly speed up the computation and reduce memory usage on modern GPUs that support mixed precision.
132 | 
133 | -   The `q_bucket_size` and `k_bucket_size` parameters control the bucket size for the query and key in the attention mechanism, respectively. These parameters can be used to trade off between memory usage and computational efficiency. Larger bucket sizes can be more memory-efficient but may also be slower.
134 | 
135 | ### Common Issues
136 | -------------
137 | 
138 | -   If you encounter out-of-memory errors, you can try reducing the `q_bucket_size` and `k_bucket_size` parameters, or enabling mixed precision computation by setting `mixed_precision=True`.
139 | 
140 | -   If you encounter slow computation, you can try increasing the `q_bucket_size` and `k_bucket_size` parameters, or enabling parallel computation by setting `parallel=True` (if you have multiple GPUs available).
141 | 
142 | ### References and Resources
143 | ------------------------
144 | 
145 | -   [Attention Is All You Need](https://arxiv.org/abs/1706.03762): This is the original paper that introduced the concept of attention in deep learning.
146 | 
147 | -   [PyTorch Documentation](https://pytorch.org/docs/stable/index.html): The official PyTorch documentation provides detailed information about the PyTorch library and its modules.
148 | 
149 | -   [Efficient Attention: Attention with Linear Complexities](https://arxiv.org/abs/1812.01243): This paper introduces the concept of bucketing in the attention mechanism to improve memory efficiency.
150 | 
151 | -   [Mixed Precision Training](https://arxiv.org/abs/1710.03740): This paper introduces the concept of mixed precision training, which can significantly speed up computation and reduce memory usage on modern GPUs.
152 | 
153 | -   [PyTorch Tutorials](https://pytorch.org/tutorials/): The official PyTorch tutorials provide many examples of how to use PyTorch for various tasks.
154 | 
155 | -


--------------------------------------------------------------------------------
/docs/zeta/nn/attention/flash_attention.md:
--------------------------------------------------------------------------------
  1 | # FlashAttention
  2 | 
  3 | The FlashAttention module performs efficient attention computations, specifically designed for leveraging hardware capabilities on certain NVIDIA GPUs. It offers the option to perform "flash" attention which can be computationally faster on specific GPU architectures.
  4 | 
  5 | ---
  6 | 
  7 | ## Class Definition:
  8 | 
  9 | ```python
 10 | class FlashAttention(nn.Module):
 11 | ```
 12 | 
 13 | ### Parameters:
 14 | 
 15 | - `causal` (bool, optional): Determines whether to apply causal masking. Default: False.
 16 | - `dropout` (float, optional): Dropout probability. Default: 0.
 17 | - `flash` (bool, optional): Whether to use flash attention. Requires PyTorch version 2.0 or above. Default: True.
 18 | 
 19 | ---
 20 | 
 21 | ## Methods:
 22 | 
 23 | ### `__init__(self, causal=False, dropout=0., flash=True)`
 24 | 
 25 | Initializes the FlashAttention module.
 26 | 
 27 | ### `get_mask(self, i, j, device)`
 28 | 
 29 | Generates a mask for attention computation.
 30 | 
 31 | #### Parameters:
 32 | - `i` (int): Length of the query sequence.
 33 | - `j` (int): Length of the key sequence.
 34 | - `device` (torch.device): Device to place the mask tensor.
 35 | 
 36 | #### Returns:
 37 | - `torch.Tensor`: Mask tensor of shape `(i, j)`.
 38 | 
 39 | ### `flash_attn(self, q, k, v, mask=None, attn_bias=None)`
 40 | 
 41 | Performs flash attention computation.
 42 | 
 43 | #### Parameters:
 44 | - `q` (torch.Tensor): Query tensor of shape `(batch, heads, q_len, dim)`.
 45 | - `k` (torch.Tensor): Key tensor of shape `(batch, heads, k_len, dim)`.
 46 | - `v` (torch.Tensor): Value tensor of shape `(batch, heads, v_len, dim)`.
 47 | - `mask` (torch.Tensor, optional): Mask tensor of shape `(batch, heads, q_len, k_len)`. Default: None.
 48 | - `attn_bias` (torch.Tensor, optional): Attention bias tensor of shape `(batch, heads, q_len, k_len)`. Default: None.
 49 | 
 50 | #### Returns:
 51 | - `torch.Tensor`: Output tensor of shape `(batch, heads, q_len, dim)`.
 52 | 
 53 | ### `forward(self, q, k, v, mask=None, attn_bias=None)`
 54 | 
 55 | Performs the attention computation using einstein notation.
 56 | 
 57 | #### Parameters:
 58 | - `q` (torch.Tensor): Query tensor of shape `(batch, heads, q_len, dim)`.
 59 | - `k` (torch.Tensor): Key tensor of shape `(batch, heads, k_len, dim)`.
 60 | - `v` (torch.Tensor): Value tensor of shape `(batch, heads, v_len, dim)`.
 61 | - `mask` (torch.Tensor, optional): Mask tensor of shape `(batch, heads, q_len, k_len)`. Default: None.
 62 | - `attn_bias` (torch.Tensor, optional): Attention bias tensor of shape `(batch, heads, q_len, k_len)`. Default: None.
 63 | 
 64 | #### Returns:
 65 | - `torch.Tensor`: Attention output tensor.
 66 | 
 67 | ---
 68 | 
 69 | ## Usage Examples:
 70 | 
 71 | 1. **Basic Usage**:
 72 | ```python
 73 | from zeta.nn import FlashAttention
 74 | attn_module = FlashAttention()
 75 | output = attn_module(query_tensor, key_tensor, value_tensor)
 76 | ```
 77 | 
 78 | 2. **Using Flash Attention with Masking**:
 79 | ```python
 80 | from zeta.nn import FlashAttention
 81 | attn_module = FlashAttention(flash=True)
 82 | mask = attn_module.get_mask(query_length, key_length, device)
 83 | output = attn_module(query_tensor, key_tensor, value_tensor, mask=mask)
 84 | ```
 85 | 
 86 | 3. **Using Causal Flash Attention with Dropout**:
 87 | ```python
 88 | from zeta.nn import FlashAttention
 89 | attn_module = FlashAttention(causal=True, dropout=0.1, flash=True)
 90 | output = attn_module(query_tensor, key_tensor, value_tensor)
 91 | ```
 92 | 
 93 | ---
 94 | 
 95 | ## Additional Tips:
 96 | 
 97 | - The `FlashAttention` module is optimized for NVIDIA A100 GPUs. On these GPUs, using `flash=True` is recommended for faster computation.
 98 | - Ensure that PyTorch version is 2.0 or above when enabling flash attention.
 99 | - The mask generated using `get_mask` method is useful for attention computations where certain positions need to be masked out.
100 | 
101 | ---
102 | 
103 | ## References:
104 | 
105 | - Original Attention Mechanism: [Attention Is All You Need](https://arxiv.org/abs/1706.03762)


--------------------------------------------------------------------------------
/docs/zeta/nn/attention/multihead.md:
--------------------------------------------------------------------------------
  1 | # Multihead Attention Documentation for Zeta Library
  2 | 
  3 | ## Introduction
  4 | 
  5 | `MultiheadAttention` is a module in the Zeta library that provides multi-head attention mechanism. This mechanism enables the model to focus on different parts of the input sequence simultaneously. It's widely used in models such as transformers for capturing various aspects of information in the input.
  6 | 
  7 | ## Purpose
  8 | 
  9 | The purpose of the `MultiheadAttention` module is to allow joint information representation from different subspaces of the input sequence. This results in capturing a richer context when modeling sequences.
 10 | 
 11 | ## Architecture
 12 | 
 13 | The `MultiheadAttention` class extends from the `nn.Module` base class. Internally, it uses linear transformations for keys, values, and queries (`k_proj`, `v_proj`, `q_proj`). These projections are wrapped using the `MultiwayWrapper`. It also utilizes layer normalization (`inner_attn_ln`) and optionally uses relative positional embeddings (`xpos`).
 14 | 
 15 | ## Class Definition
 16 | 
 17 | ```python
 18 | class zeta.nn.embeddings.MultiheadAttention(nn.Module):
 19 | ```
 20 | 
 21 | ### Parameters:
 22 | - `args`: General arguments passed for configuring the module.
 23 | - `embed_dim` (int): Total dimension of the model.
 24 | - `num_heads` (int): Number of parallel attention heads. The embed_dim will be split across num_heads.
 25 | - `dropout` (float): Dropout probability. Default: 0.0.
 26 | - `self_attention` (bool): Whether to apply self attention. Only one of `self_attention` or `encoder_decoder_attention` can be True. Default: False.
 27 | - `encoder_decoder_attention` (bool): Whether to apply encoder-decoder attention. Only one of `self_attention` or `encoder_decoder_attention` can be True. Default: False.
 28 | - `subln` (bool): If True, applies layer normalization after self attention. Default: False.
 29 | 
 30 | ### Methods:
 31 | 
 32 | #### `reset_parameters()`
 33 | Reinitialize the parameters of the attention module.
 34 | 
 35 | #### `forward(query, key, value, ...)`
 36 | Computes the forward pass of the attention mechanism.
 37 | 
 38 | - Parameters:
 39 |   - `query` (Tensor): The query tensor.
 40 |   - `key` (Tensor): The key tensor.
 41 |   - `value` (Tensor): The value tensor.
 42 |   - Other arguments including `incremental_state`, `key_padding_mask`, `attn_mask`, `rel_pos`, and `is_first_step`.
 43 | 
 44 | - Returns:
 45 |   - `attn` (Tensor): The computed attention tensor.
 46 |   - `attn_weights` (Tensor): The attention weights.
 47 | 
 48 | ### Mathematical Formulation:
 49 | 
 50 | Given a query \( Q \), key \( K \), and value \( V \), the multihead attention mechanism is mathematically represented as:
 51 | 
 52 | \[ \text{Attention}(Q, K, V) = \text{softmax}\left(\frac{QK^T}{\sqrt{d_k}}\right) V \]
 53 | 
 54 | Where \( d_k \) is the dimension of the key.
 55 | 
 56 | ## Usage Examples:
 57 | 
 58 | ### Example 1: Basic Usage
 59 | 
 60 | ```python
 61 | from zeta.nn.embeddings import MultiheadAttention
 62 | import torch
 63 | 
 64 | args = ...  # Some configuration
 65 | attention = MultiheadAttention(args, embed_dim=512, num_heads=8, dropout=0.1, self_attention=True)
 66 | query = torch.rand((32, 10, 512))
 67 | key = torch.rand((32, 10, 512))
 68 | value = torch.rand((32, 10, 512))
 69 | 
 70 | attn, attn_weights = attention(query, key, value)
 71 | ```
 72 | 
 73 | ### Example 2: With Masking
 74 | 
 75 | ```python
 76 | from zeta.nn.embeddings import MultiheadAttention
 77 | import torch
 78 | 
 79 | args = ...  # Some configuration
 80 | attention = MultiheadAttention(args, embed_dim=512, num_heads=8, dropout=0.1, self_attention=True)
 81 | query = torch.rand((32, 10, 512))
 82 | key = torch.rand((32, 10, 512))
 83 | value = torch.rand((32, 10, 512))
 84 | attn_mask = torch.ones((10, 10)).triu_() * -1e9  # Upper triangular mask
 85 | 
 86 | attn, attn_weights = attention(query, key, value, attn_mask=attn_mask)
 87 | ```
 88 | 
 89 | ### Example 3: Encoder-Decoder Attention
 90 | 
 91 | ```python
 92 | from zeta.nn.embeddings import MultiheadAttention
 93 | import torch
 94 | 
 95 | args = ...  # Some configuration
 96 | attention = MultiheadAttention(args, embed_dim=512, num_heads=8, dropout=0.1, encoder_decoder_attention=True)
 97 | query = torch.rand((32, 10, 512))  # Decoder query
 98 | key = torch.rand((32, 20, 512))  # Encoder key
 99 | value = torch.rand((32, 20, 512))  # Encoder value
100 | 
101 | attn, attn_weights = attention(query, key, value)
102 | ```
103 | 
104 | ## Additional Tips:
105 | - For encoder-decoder attention, make sure the dimensions of the encoder and decoder tensors match the expected input sizes.
106 | - Using masks can be helpful to prevent the attention mechanism from focusing on certain parts of the sequence, such as padding.
107 | 


--------------------------------------------------------------------------------
/docs/zeta/nn/attention/multiquery.md:
--------------------------------------------------------------------------------
  1 | # MultiQueryAttention
  2 | 
  3 | ## Overview and Introduction:
  4 | 
  5 | The `MultiQueryAttention` class is a part of the Zeta library, designed to perform self-attention operations on given input data. Unlike traditional attention mechanisms that use a single query, this class leverages multiple queries to capture a broader range of context information. This class allows for various implementations of attention, including Flash, Triton, and Torch. It also provides the flexibility to choose normalization type, fully connected layer type, and offers debugging verbosity.
  6 | 
  7 | ## Class Definition:
  8 | 
  9 | ```python
 10 | class MultiQueryAttention(nn.Module):
 11 |     """Multi-Query self attention.
 12 |     Using torch or triton attention implementation enables the user to also use
 13 |     additive bias.
 14 |     """
 15 | ```
 16 | 
 17 | ### Parameters:
 18 | - `d_model` (int): Dimension of the model.
 19 | - `heads` (int): Number of parallel attention heads.
 20 | - `attn_impl` (str, optional): Attention implementation type, can be either 'triton', 'flash', or 'torch'. Default is 'triton'.
 21 | - `clip_qkv` (Optional[float]): Clipping value for query, key, and value. If specified, qkv is clamped within the range [-clip_qkv, clip_qkv].
 22 | - `qk_ln` (bool, optional): If True, layer normalization is applied to query and key.
 23 | - `softmax_scale` (Optional[float]): Scale for softmax. Default value is computed as 1/sqrt(head_dim).
 24 | - `attn_pdrop` (float, optional): Attention dropout probability. Default is 0.0.
 25 | - `norm_type` (str, optional): Normalization type, default is 'low_precision_layernorm'.
 26 | - `fc_type` (str, optional): Fully connected layer type, default is 'torch'.
 27 | - `verbose` (int, optional): Verbosity level, default is 0.
 28 | - `device` (Optional[str]): Device to which the tensors should be moved.
 29 | 
 30 | ## Functionality and Usage:
 31 | 
 32 | The `MultiQueryAttention` class operates by using multiple queries to capture broader context information from given data. This is achieved through the forward method which computes the self-attention on the given inputs.
 33 | 
 34 | ### Method: `forward`
 35 | ```python
 36 | def forward(
 37 |     self,
 38 |     x,
 39 |     past_key_value=None,
 40 |     bias=None,
 41 |     mask=None,
 42 |     causal=True,
 43 |     needs_weights=False,
 44 | ):
 45 | ```
 46 | 
 47 | #### Parameters:
 48 | 
 49 | - `x` (Tensor): Input tensor.
 50 | - `past_key_value` (Optional): Past key and value for attention computation. Default is None.
 51 | - `bias` (Optional): Additive bias for attention scores. Default is None.
 52 | - `mask` (Optional): Key padding mask. Default is None.
 53 | - `causal` (bool, optional): If True, a causal mask is applied to prevent information flow from future tokens. Default is True.
 54 | - `needs_weights` (bool, optional): If True, attention weights are also returned. Default is False.
 55 | 
 56 | #### Returns:
 57 | 
 58 | - `context` (Tensor): Contextualized tensor after attention computation.
 59 | - `attn_weights` (Tensor, Optional): Attention weights. Only returned if `needs_weights` is True.
 60 | - `past_key_value` (Tensor, Optional): New past key and value.
 61 | 
 62 | ## Usage Examples:
 63 | 
 64 | 1. Basic Usage:
 65 | ```python
 66 | from zeta import MultiQueryAttention
 67 | import torch
 68 | 
 69 | # Initialize the attention module
 70 | attention_layer = MultiQueryAttention(d_model=512, heads=8, attn_impl='torch')
 71 | 
 72 | # Random input tensor
 73 | x = torch.rand(16, 10, 512)  # Batch of 16, sequence length 10, embedding size 512
 74 | output, attn_weights, _ = attention_layer(x)
 75 | ```
 76 | 
 77 | 2. Using Past Key and Value:
 78 | ```python
 79 | past_key_value = (torch.rand(16, 8, 10, 64), torch.rand(16, 8, 10, 64))  # Past key and value for 8 heads
 80 | output, attn_weights, new_past_key_value = attention_layer(x, past_key_value=past_key_value)
 81 | ```
 82 | 
 83 | 3. With Causal Masking and Weights:
 84 | ```python
 85 | output, attn_weights, _ = attention_layer(x, causal=True, needs_weights=True)
 86 | ```
 87 | 
 88 | ## Mathematical Formula:
 89 | 
 90 | For the self-attention mechanism, the computation involves using multiple queries (\( Q \)), keys (\( K \)), and values (\( V \)):
 91 | 
 92 | ```latex
 93 | \[ \text{Attention}(Q, K, V) = \text{Softmax}\left(\frac{Q \times K^T}{\sqrt{d_k}} + \text{Bias}\right) \times V \]
 94 | ```
 95 | Where:
 96 | - \( Q \), \( K \), and \( V \) are the queries, keys, and values respectively.
 97 | - \( d_k \) is the dimension of the keys.
 98 | - Bias is the optional additive bias.
 99 | 
100 | ## Additional Information and Tips:
101 | 
102 | - It's crucial to select the correct attention implementation (`attn_impl`) based on your needs and the hardware you're running on.
103 | - The `triton` implementation might be faster than `flash` but can use more memory. Ensure that you have adequate GPU memory if using `triton`.
104 | - If using the `torch` implementation, it's advisable to check if CUDA is available for GPU acceleration.
105 | - The clipping of qkv (`clip_qkv`) can be beneficial for stability in training.
106 | 
107 | ## References and Resources:
108 | For a deeper understanding of the self-attention mechanism and its variants, you can refer to the "Attention is All You Need" paper by Vaswani et al., 2017.


--------------------------------------------------------------------------------
/docs/zeta/nn/biases/alibi.md:
--------------------------------------------------------------------------------
 1 | # AlibiPositionalBias Documentation
 2 | 
 3 | ## Introduction
 4 | 
 5 | The `AlibiPositionalBias` module belongs to the zeta library and plays a crucial role in handling positional bias for multi-head attention mechanisms. Specifically, it attempts to alleviate the absolute positional bias based on the number of attention heads.
 6 | 
 7 | ## Class Definition:
 8 | 
 9 | ```python
10 | class AlibiPositionalBias(nn.Module):
11 | ```
12 | 
13 | ### Parameters:
14 | - **heads** (`int`): Number of attention heads for which the slopes need to be calculated.
15 | - **total_heads** (`int`): Total number of attention heads in the network.
16 | 
17 | ### Attributes:
18 | - **slopes** (`Tensor`): Tensor containing slope values, which are computed based on the number of heads.
19 | - **bias** (`Tensor` or `None`): Tensor for storing positional bias values. If not initialized or needs recomputation, it would be None.
20 | 
21 | ### Methods:
22 | #### `__init__(self, heads, total_heads, **kwargs) -> None`:
23 | Initializes the `AlibiPositionalBias` module.
24 | 
25 | #### `get_bias(self, i, j, device) -> Tensor`:
26 | Computes the positional bias for given dimensions i and j.
27 | 
28 | - **Parameters**:
29 |   - **i** (`int`): One dimension of the required positional bias.
30 |   - **j** (`int`): Second dimension of the required positional bias.
31 |   - **device** (`torch.device`): The device on which computations are to be performed.
32 | 
33 | #### `_get_slopes(heads) -> List[float]`:
34 | A static method that calculates slopes based on the number of attention heads.
35 | 
36 | - **Parameters**:
37 |   - **heads** (`int`): Number of attention heads.
38 | 
39 | #### `forward(self, i, j) -> Tensor`:
40 | Computes or retrieves the bias tensor for given dimensions.
41 | 
42 | - **Parameters**:
43 |   - **i** (`int`): One dimension for the required positional bias.
44 |   - **j** (`int`): Second dimension for the required positional bias.
45 | 
46 | ## Mathematical Formula:
47 | 
48 | Given `n` attention heads, the alibi positional bias can be represented as:
49 | 
50 | \[ \text{Bias} = \text{-abs}(j_{\text{range}}) \times \text{slope} \]
51 | 
52 | Where:
53 | - \( j_{\text{range}} \) is an array of numbers from `0` to `j-1`.
54 | - `slope` is computed based on the number of heads using `_get_slopes` method.
55 | 
56 | ## Usage Examples:
57 | 
58 | ### Example 1: Initialize and compute bias
59 | ```python
60 | from zeta import AlibiPositionalBias
61 | import torch
62 | 
63 | bias_module = AlibiPositionalBias(heads=4, total_heads=8)
64 | bias = bias_module(10, 10)
65 | print(bias)
66 | ```
67 | 
68 | ### Example 2: Retrieve stored bias
69 | ```python
70 | bias = bias_module(5, 5)
71 | print(bias)
72 | ```
73 | 
74 | ### Example 3: Computing bias for different dimensions
75 | ```python
76 | bias = bias_module(8, 15)
77 | print(bias)
78 | ```
79 | 
80 | ## Note:
81 | 
82 | - It's crucial to ensure that the `total_heads` parameter is always greater than or equal to the `heads` parameter during initialization.
83 | - The device property is internally used to determine the computation device based on the registered buffers.
84 | 
85 | ## References:
86 | 
87 | For a deeper understanding and applications of positional bias in attention mechanisms, one may refer to the foundational paper on Transformer architectures:
88 | - [Attention Is All You Need](https://arxiv.org/abs/1706.03762)
89 | 
90 | Also, the `einops` library provides a versatile interface for tensor manipulations. More details can be found at its official [documentation](https://einops.rocks/).


--------------------------------------------------------------------------------
/docs/zeta/nn/biases/relative_bias.md:
--------------------------------------------------------------------------------
 1 | # RelativePositionBias
 2 | 
 3 | `RelativePositionBias` is a specialized PyTorch module designed to generate relative position biases, which can be vital for certain attention mechanisms in deep learning architectures. This module quantizes the distance between two positions into a certain number of buckets and then uses an embedding to get the relative position bias. This mechanism aids in the attention mechanism by providing biases based on relative positions between the query and key, rather than relying solely on their absolute positions.
 4 | 
 5 | ## Architecture:
 6 | The architecture can be visualized in three major steps:
 7 | 1. **Bucketing:** Convert relative distances between two positions into bucket indices.
 8 | 2. **Embedding:** Use the bucket indices to get embeddings for each pair of positions.
 9 | 3. **Computing Bias:** Computes the bias values based on the embeddings.
10 | 
11 | ## Purpose:
12 | In the context of attention mechanisms, especially the transformer-based architectures, the position of tokens can provide valuable information. The `RelativePositionBias` class helps introduce this information in a compact form by bucketing relative positions and then embedding them to serve as biases for the attention scores.
13 | 
14 | ## Mathematical Formula:
15 | Given a relative position \( r \), the bucket index \( b \) is computed as:
16 | \[ b = 
17 | \begin{cases} 
18 |       n + \text{num_buckets} \div 2 & \text{if } n < 0 \text{ and bidirectional is True} \\
19 |       \min\left( \max_{\text{exact}} + \left(\frac{\log(\frac{n}{\max_{\text{exact}}})}{\log(\frac{\text{max_distance}}{\max_{\text{exact}}})} \times (\text{num_buckets} - \max_{\text{exact}})\right), \text{num_buckets} - 1 \right) & \text{otherwise} 
20 |    \end{cases}
21 | \]
22 | Where \( n \) is the negative of the relative position, and \( \max_{\text{exact}} \) is \( \text{num_buckets} \div 2 \).
23 | 
24 | ## Class Definition:
25 | 
26 | ```python
27 | class RelativePositionBias(nn.Module):
28 |     """
29 |     Compute relative position bias which can be utilized in attention mechanisms.
30 |     
31 |     Parameters:
32 |     - bidirectional (bool): If True, considers both forward and backward relative positions. Default: True.
33 |     - num_buckets (int): Number of buckets to cluster relative position distances. Default: 32.
34 |     - max_distance (int): Maximum distance to be considered for bucketing. Distances beyond this will be mapped to the last bucket. Default: 128.
35 |     - n_heads (int): Number of attention heads. Default: 12.
36 |     """
37 | ```
38 | 
39 | ### Key Methods:
40 | - **_relative_position_bucket**: This static method is responsible for converting relative positions into bucket indices.
41 | - **compute_bias**: Computes the relative position bias for given lengths of queries and keys.
42 | - **forward**: Computes and returns the relative position biases for a batch.
43 | 
44 | ## Usage Examples:
45 | 
46 | ```python
47 | from zeta import RelativePositionBias
48 | import torch
49 | 
50 | # Initialize the RelativePositionBias module
51 | rel_pos_bias = RelativePositionBias()
52 | 
53 | # Example 1: Compute bias for a single batch
54 | bias_matrix = rel_pos_bias(1, 10, 10)
55 | 
56 | # Example 2: Utilize in conjunction with an attention mechanism
57 | # NOTE: This is a mock example, and may not represent an actual attention mechanism's complete implementation.
58 | class MockAttention(nn.Module):
59 |     def __init__(self):
60 |         super().__init__()
61 |         self.rel_pos_bias = RelativePositionBias()
62 | 
63 |     def forward(self, queries, keys):
64 |         bias = self.rel_pos_bias(queries.size(0), queries.size(1), keys.size(1))
65 |         # Further computations with bias in the attention mechanism...
66 |         return None  # Placeholder
67 | 
68 | # Example 3: Modify default configurations
69 | custom_rel_pos_bias = RelativePositionBias(bidirectional=False, num_buckets=64, max_distance=256, n_heads=8)
70 | ```
71 | 
72 | ## Tips:
73 | 1. The choice of `num_buckets` and `max_distance` might need tuning based on the dataset and application.
74 | 2. If the architecture doesn't need bidirectional biases, set `bidirectional` to `False` to reduce computation.
75 | 3. Ensure that the device of tensors being processed and the device of the `RelativePositionBias` module are the same.
76 | 
77 | ## References:
78 | - [Attention Is All You Need](https://arxiv.org/abs/1706.03762)
79 | - [Transformer Architectures](https://www.aclweb.org/anthology/D18-1422.pdf)
80 | 
81 | Note: This documentation is based on the provided code and might need adjustments when integrated into the complete `zeta` library.


--------------------------------------------------------------------------------
/docs/zeta/nn/biases/xpos.md:
--------------------------------------------------------------------------------
  1 | # XPOS Module Documentation
  2 | -------------------------
  3 | 
  4 | ### Architecture
  5 | 
  6 | The XPOS module is a part of a neural network model and is implemented as a subclass of `torch.nn.Module`. It consists of several functions and a class that work together to apply rotary positional embeddings to an input tensor.
  7 | 
  8 | ### Purpose
  9 | 
 10 | The purpose of the XPOS module is to incorporate positional information into the input tensor of a neural network model. It achieves this by generating fixed positional embeddings and applying them to the input tensor using rotary positional encoding techniques. This allows the model to capture the sequential order and relative positions of the input elements, which can be beneficial for tasks such as natural language processing and time series analysis.
 11 | 
 12 | ### Functions and Methods
 13 | 
 14 | 1.  `fixed_pos_embedding(x)`: Generates fixed positional embeddings for the input tensor.
 15 | 
 16 |     -   Args:
 17 |         -   `x` (torch.Tensor): Input tensor of shape `(seq_len, dim)`.
 18 |     -   Returns:
 19 |         -   `sin` (torch.Tensor): Sine positional embeddings of shape `(seq_len, dim)`.
 20 |         -   `cos` (torch.Tensor): Cosine positional embeddings of shape `(seq_len, dim)`.
 21 | 2.  `rotate_every_two(x)`: Rearranges the elements of the input tensor by rotating every two elements.
 22 | 
 23 |     -   Args:
 24 |         -   `x` (torch.Tensor): Input tensor of shape `(batch_size, seq_len, dim)`.
 25 |     -   Returns:
 26 |         -   `x` (torch.Tensor): Rearranged tensor of shape `(batch_size, seq_len, dim)`.
 27 | 3.  `duplicate_interleave(m)`: Duplicates a matrix while interleaving the copy.
 28 | 
 29 |     -   Args:
 30 |         -   `m` (torch.Tensor): Input matrix.
 31 |     -   Returns:
 32 |         -   `m` (torch.Tensor): Duplicated and interleaved matrix.
 33 | 4.  `apply_rotary_pos_emb(x, sin, cos, scale=1)`: Applies rotary positional embeddings to the input tensor.
 34 | 
 35 |     -   Args:
 36 |         -   `x` (torch.Tensor): Input tensor of shape `(batch_size, seq_len, dim)`.
 37 |         -   `sin` (torch.Tensor): Sine positional embeddings of shape `(seq_len, dim)`.
 38 |         -   `cos` (torch.Tensor): Cosine positional embeddings of shape `(seq_len, dim)`.
 39 |         -   `scale` (float): Scaling factor for the positional embeddings (default: 1).
 40 |     -   Returns:
 41 |         -   `x` (torch.Tensor): Tensor with applied rotary positional embeddings.
 42 | 5.  `XPOS(head_dim, scale_base=512)`: XPOS module class.
 43 | 
 44 |     -   Args:
 45 |         -   `head_dim` (int): Dimensionality of the input tensor.
 46 |         -   `scale_base` (int): Base value for scaling the positional embeddings (default: 512).
 47 |     -   Methods:
 48 |         -   `forward(x, offset=0, downscale=False)`: Forward pass of the XPOS module.
 49 |             -   Args:
 50 |                 -   `x` (torch.Tensor): Input tensor of shape `(batch_size, seq_len, dim)`.
 51 |                 -   `offset` (int): Offset value for positional embeddings (default: 0).
 52 |                 -   `downscale` (bool): Boolean indicating whether to downscale the positional embeddings (default: False).
 53 |             -   Returns:
 54 |                 -   `x` (torch.Tensor): Tensor with applied rotary positional embeddings.
 55 | 
 56 | ### Usage Examples
 57 | 
 58 | 1.  Applying XPOS module to an input tensor:
 59 | 
 60 |     ```
 61 |     import torch
 62 |     from xpos import XPOS
 63 | 
 64 |     # Create an instance of the XPOS module
 65 |     xpos = XPOS(head_dim=256)
 66 | 
 67 |     # Generate a random input tensor
 68 |     x = torch.randn(1, 10, 256)
 69 | 
 70 |     # Apply the XPOS module to the input tensor
 71 |     output = xpos(x)
 72 |     ```
 73 | 
 74 | 
 75 | 2.  Applying XPOS module with offset and downscaling:
 76 | 
 77 |     ```
 78 |     import torch
 79 |     from zeta import XPOS
 80 | 
 81 |     # Create an instance of the XPOS module
 82 |     xpos = XPOS(head_dim=512)
 83 | 
 84 |     # Generate a random input tensor
 85 |     x = torch.randn(1, 20, 512)
 86 | 
 87 |     # Apply the XPOS module to the input tensor with offset and downscaling
 88 |     output = xpos(x, offset=2, downscale=True)
 89 |     ```
 90 | 3.  Using the individual functions of the XPOS module:
 91 | 
 92 |     ```
 93 |     import torch
 94 |     from zeta import fixed_pos_embedding, apply_rotary_pos_emb
 95 | 
 96 |     # Generate fixed positional embeddings
 97 |     scale = torch.randn(10, 256)
 98 |     sin, cos = fixed_pos_embedding(scale)
 99 | 
100 |     # Apply rotary positional embeddings to an input tensor
101 |     x = torch.randn(1, 10, 256)
102 |     output = apply_rotary_pos_emb(x, sin, cos, scale=0.5)
103 |     ```
104 | 
105 | Note: The above examples assume that the `xpos.py` file


--------------------------------------------------------------------------------
/docs/zeta/nn/embeddings/multiway.md:
--------------------------------------------------------------------------------
  1 | # **Documentation for `MultiwayEmbedding` in Zeta Library**
  2 | 
  3 | **Table of Contents**
  4 | 
  5 | 1. Overview
  6 | 2. Class Definition and Parameters
  7 | 3. Methods and Functionalities
  8 | 4. Usage Examples
  9 | 5. Additional Tips and Information
 10 | 6. References
 11 | 
 12 | ---
 13 | 
 14 | ## 1. Overview
 15 | 
 16 | The `MultiwayEmbedding` class in the Zeta library provides a way to apply two separate embeddings to two distinct parts of the input tensor. It splits the input tensor at the specified position and applies one embedding to the first part and another embedding to the second part. This can be particularly useful when dealing with inputs that require diverse representations or embeddings.
 17 | 
 18 | ---
 19 | 
 20 | ## 2. Class Definition and Parameters
 21 | 
 22 | ```python
 23 | class MultiwayEmbedding(MultiwayNetwork):
 24 |     """
 25 |     A specialized version of the MultiwayNetwork to perform multi-way embeddings on an input tensor.
 26 | 
 27 |     Parameters:
 28 |     - modules (List[nn.Module]): A list containing exactly two PyTorch modules. Typically these would be embedding layers.
 29 |     - dim (int): The dimension along which to split and concatenate the input tensor. Default is 1.
 30 |     """
 31 | 
 32 |     def __init__(self, modules, dim=1):
 33 |         super(MultiwayNetwork, self).__init__()
 34 |         ...
 35 | ```
 36 | 
 37 | ---
 38 | 
 39 | ## 3. Methods and Functionalities
 40 | 
 41 | **forward(x, **kwargs)**
 42 | ```python
 43 | def forward(self, x, **kwargs):
 44 |     """
 45 |     Forward method to apply embeddings on the split input tensor.
 46 | 
 47 |     Parameters:
 48 |     - x (torch.Tensor): The input tensor.
 49 |     - **kwargs: Additional arguments that might be needed for the embeddings.
 50 | 
 51 |     Returns:
 52 |     - torch.Tensor: Concatenated tensor after applying the embeddings.
 53 |     """
 54 |     ...
 55 | ```
 56 | 
 57 | ---
 58 | 
 59 | ## 4. Usage Examples
 60 | 
 61 | **Example 1:** Basic Usage
 62 | ```python
 63 | from zeta import MultiwayEmbedding
 64 | import torch.nn as nn
 65 | 
 66 | emb1 = nn.Embedding(10, 5)
 67 | emb2 = nn.Embedding(10, 5)
 68 | multiway_emb = MultiwayEmbedding([emb1, emb2])
 69 | 
 70 | x = torch.LongTensor([[1,2,3],[4,5,6]])
 71 | output = multiway_emb(x)
 72 | print(output)
 73 | ```
 74 | 
 75 | **Example 2:** Setting a Split Position
 76 | ```python
 77 | from zeta import MultiwayEmbedding, set_split_position
 78 | import torch.nn as nn
 79 | 
 80 | emb1 = nn.Embedding(10, 5)
 81 | emb2 = nn.Embedding(10, 5)
 82 | multiway_emb = MultiwayEmbedding([emb1, emb2])
 83 | multiway_emb.apply(set_split_position(2))
 84 | 
 85 | x = torch.LongTensor([[1,2,3],[4,5,6]])
 86 | output = multiway_emb(x)
 87 | print(output)
 88 | ```
 89 | 
 90 | **Example 3:** Working with Different Embedding Dimensions
 91 | ```python
 92 | from zeta import MultiwayEmbedding
 93 | import torch.nn as nn
 94 | 
 95 | emb1 = nn.Embedding(10, 5)
 96 | emb2 = nn.Embedding(10, 7)
 97 | multiway_emb = MultiwayEmbedding([emb1, emb2], dim=2)
 98 | 
 99 | x = torch.LongTensor([[1,2,3],[4,5,6]])
100 | output = multiway_emb(x)
101 | print(output)
102 | ```
103 | 
104 | ---
105 | 
106 | ## 5. Additional Tips and Information
107 | 
108 | - Ensure that the input tensor's dimensions align with the expected embeddings. If there's a mismatch in dimensions, a runtime error will occur.
109 | - The split position determines the point at which the tensor is divided. It's crucial to set this appropriately, especially if the embeddings have different dimensions.
110 | - Using the provided `set_split_position` utility function makes it easy to apply the split position for the embeddings.
111 | 
112 | ---
113 | 
114 | ## 6. References
115 | 
116 | - Torch documentation: [Link to PyTorch Documentation](https://pytorch.org/docs/stable/index.html)
117 | - Agora: [Link to Agora's GitHub](#) (assuming there might be a GitHub link or other resource for Agora)
118 | 
119 | ---
120 | 
121 | **Note:** Ensure that the tensor operations align mathematically, especially if you're concatenating tensors with different dimensions. In such cases, ensure the embeddings produce tensors that can be concatenated along the specified dimension.
122 | 
123 | **Mathematical Explanation:** Given an input tensor \( X \) split into \( X_1 \) and \( X_2 \), and two embeddings \( A \) and \( B \), the output is given by concatenating \( A(X_1) \) and \( B(X_2) \).


--------------------------------------------------------------------------------
/docs/zeta/nn/embeddings/rope.md:
--------------------------------------------------------------------------------
  1 | # RotaryEmbedding
  2 | 
  3 | `RotaryEmbedding` is a PyTorch module implementing the rotary embedding mechanism. It is designed to handle sequences of any length without the need for fine-tuning, and can also incorporate positional information into the embeddings.
  4 | 
  5 | ## Class Definition
  6 | 
  7 | ```python
  8 | class RotaryEmbedding(nn.Module):
  9 |     def __init__(
 10 |         self,
 11 |         dim,
 12 |         use_xpos=False,
 13 |         scale_base=512,
 14 |         interpolation_factor=1.,
 15 |         base=10000,
 16 |         base_rescale_factor=1.,
 17 |         ):
 18 |         ...
 19 | ```
 20 | 
 21 | ### Parameters
 22 | 
 23 | - `dim` (int): The dimensionality of the embeddings.
 24 | - `use_xpos` (bool, optional): Whether to use positional information in the embeddings. Default: `False`.
 25 | - `scale_base` (int, optional): Base of the scale for positional information. Default: `512`.
 26 | - `interpolation_factor` (float, optional): Factor used for interpolating the embeddings. Default: `1.0`.
 27 | - `base` (int, optional): Base of the frequencies used in the embeddings. Default: `10000`.
 28 | - `base_rescale_factor` (float, optional): Factor used for rescaling the base of the frequencies. Default: `1.0`.
 29 | 
 30 | ### Method: `forward`
 31 | 
 32 | ```python
 33 | def forward(self, seq_len, device):
 34 |     ...
 35 | ```
 36 | 
 37 | #### Parameters
 38 | 
 39 | - `seq_len` (int): The length of the sequence.
 40 | - `device` (torch.device): The device on which the computation will be performed.
 41 | 
 42 | #### Returns
 43 | 
 44 | - `freqs` (Tensor): The computed frequencies for the embeddings.
 45 | - `scale` (Tensor): The computed scale for the embeddings.
 46 | 
 47 | ## Functionality and Usage
 48 | 
 49 | The `RotaryEmbedding` module computes rotary embeddings for a sequence of a given length. The embeddings are computed based on the frequency and scale of each position in the sequence. The frequency and scale are computed using the `inv_freq` and `scale` buffers registered in the module.
 50 | 
 51 | The `forward` method computes the `freqs` and `scale` tensors based on the `seq_len` and `device` provided. The `freqs` tensor is computed by multiplying the `t` tensor, which contains the indices of the sequence, with the `inv_freq` tensor. The `scale` tensor is computed using the `scale` buffer and the `scale_base` parameter.
 52 | 
 53 | The `freqs` and `scale` tensors are then concatenated along the last dimension and returned.
 54 | 
 55 | ### Usage Examples
 56 | 
 57 | #### Example 1: Basic Usage
 58 | 
 59 | ```python
 60 | from zeta.nn import RotaryEmbedding
 61 | import torch
 62 | from torch import nn
 63 | 
 64 | # Initialize the RotaryEmbedding module
 65 | rotary_embedding = RotaryEmbedding(dim=64, use_xpos=True)
 66 | 
 67 | # Compute the embeddings for a sequence of length 10
 68 | seq_len = 10
 69 | device = torch.device('cuda')
 70 | freqs, scale = rotary_embedding(seq_len, device)
 71 | 
 72 | print(freqs)
 73 | print(scale)
 74 | ```
 75 | 
 76 | #### Example 2: Using a Different Scale Base
 77 | 
 78 | ```python
 79 | from zeta.nn import RotaryEmbedding
 80 | import torch
 81 | from torch import nn
 82 | 
 83 | # Initialize the RotaryEmbedding module with a different scale base
 84 | rotary_embedding = RotaryEmbedding(dim=64, use_xpos=True, scale_base=1024)
 85 | 
 86 | # Compute the embeddings for a sequence of length 10
 87 | seq_len = 10
 88 | device = torch.device('cuda')
 89 | freqs, scale = rotary_embedding(seq_len, device)
 90 | 
 91 | print(freqs)
 92 | print(scale)
 93 | ```
 94 | 
 95 | #### Example 3: Without Positional Information
 96 | 
 97 | ```python
 98 | from zeta.nn import RotaryEmbedding
 99 | import torch
100 | from torch import nn
101 | 
102 | # Initialize the RotaryEmbedding module without positional information
103 | rotary_embedding = RotaryEmbedding(dim=64, use_xpos=False)
104 | 
105 | # Compute the embeddings for a sequence of length 10
106 | seq_len = 10
107 | device = torch.device('cuda')
108 | freqs, scale = rotary_embedding(seq_len, device)
109 | 
110 | print(freqs)
111 | print(scale)
112 | ```
113 | 
114 | ## Mathematical Formula
115 | 
116 | The mathematical formula for computing the `freqs` tensor is:
117 | 
118 | \[ \text{freqs} = t \cdot \text{inv\_freq} \]
119 | 
120 | Where:
121 | - \( t \) is a tensor containing the indices of the sequence.
122 | - \( \text{inv\_freq} \) is a tensor containing the inverse frequencies.
123 | 
124 | The mathematical formula for computing the `scale` tensor is:
125 | 
126 | \[ \text{scale} = \text{scale}^{\frac{\text{power}}{\text{scale\_base}}} \]
127 | 
128 | Where:
129 | - \( \text{power} \) is a tensor containing the power of each position in the sequence.
130 | - \( \text{scale\_base} \) is a scalar containing the base of the scale.
131 | - \( \text{scale} \) is a tensor containing the scale of each position in the sequence.
132 | 
133 | ## Additional Information and Tips
134 | 
135 | - The `interpolation_factor` parameter can be used to interpolate the embeddings for sequences of different lengths. A larger `interpolation_factor` will result in a smoother interpolation.
136 | - The `base_rescale_factor` parameter can be used to rescale the base of the frequencies. This can be useful for adjusting the embeddings for sequences of different lengths.
137 | - If `use_xpos` is set to `False`, the `scale` tensor will not be used, and the `freqs` tensor will be returned as is.
138 | 
139 | ## References and Resources
140 | 
141 | - [Paper: Link to the paper](https://arxiv.org/pdf/2308.10882.pdf)
142 | - [PyTorch Documentation](https://pytorch.org/docs/stable/indehtml)
143 | - [Einops Documentation](https://einops.rocks/pytorch-examples.html)
144 | 
145 | Note: The above template includes the class definition, parameters, description, functionality, usage examples, mathematical formula, additional information and tips, and references and resources. To replicate the documentation for any other module or framework, follow the same structure and provide the specific details for that module or framework.


--------------------------------------------------------------------------------
/docs/zeta/nn/embeddings/truncated_rope.md:
--------------------------------------------------------------------------------
  1 | # Module/Function Name: TruncatedRotaryEmbedding
  2 | 
  3 | The `TruncatedRotaryEmbedding` class is part of the Zeta library and is designed to implement the rotary embeddings with a truncation mechanism. The rotary embedding is a positional encoding method that aims to provide the model with information about the relative positions of the tokens in a sequence. The `TruncatedRotaryEmbedding` class extends the rotary embedding concept by incorporating a truncation mechanism, which sets the rotary embedding to zero for positions where the frequency is higher than a specified threshold.
  4 | 
  5 | The architecture and workings of this class are inspired by the paper [link to the paper](https://arxiv.org/pdf/2308.10882.pdf).
  6 | 
  7 | ## Parameters:
  8 | 
  9 | - `dim` (int): Dimensionality of the embeddings.
 10 | - `a` (float): Lower bound of the truncation region. Rotary embeddings with frequency lower than `a` will be set to zero.
 11 | - `b` (float): Upper bound of the truncation region. Rotary embeddings with frequency higher than or equal to `b` will not be truncated.
 12 | - `rho` (float): Value to which the rotary embeddings will be truncated in the region [a, b).
 13 | 
 14 | The `dim` parameter is required to determine the dimensionality of the embeddings, while `a`, `b`, and `rho` are hyperparameters that control the truncation mechanism.
 15 | 
 16 | ## Method:
 17 | 
 18 | ### `forward(seq_len, device)`
 19 | 
 20 | Computes the truncated rotary embeddings for a given sequence length.
 21 | 
 22 | #### Parameters:
 23 | 
 24 | - `seq_len` (int): Length of the sequence for which the rotary embeddings are to be computed.
 25 | - `device` (torch.device): Device on which the computations are to be performed.
 26 | 
 27 | #### Returns:
 28 | 
 29 | - `result` (Tensor): A tensor containing the truncated rotary embeddings for the specified sequence length.
 30 | 
 31 | ## Functionality and Usage:
 32 | 
 33 | The `TruncatedRotaryEmbedding` class is used to compute the truncated rotary embeddings for a given sequence length. The rotary embeddings are computed by multiplying a tensor containing the position indices of the tokens in the sequence by the inverse frequencies. The inverse frequencies are computed based on the specified embedding dimension `dim` and are stored in the `inv_freq` buffer.
 34 | 
 35 | The truncation mechanism is implemented by creating a `theta_star` tensor, which is used to multiply the computed `freqs`. The `theta_star` tensor is created based on the specified `a`, `b`, and `rho` parameters, and the computed `freqs` tensor. For positions where the frequency is higher than or equal to `b`, the rotary embeddings are not truncated, and `theta_star` is set to the frequency at that position. For positions where the frequency is lower than `a`, the rotary embeddings are set to zero, and `theta_star` is set to zero. For positions where the frequency is in the range [a, b], the rotary embeddings are truncated to `rho`, and `theta_star` is set to `rho`.
 36 | 
 37 | Once the `theta_star` tensor is created, it is multiplied element-wise by the `freqs` tensor to compute the final truncated rotary embeddings.
 38 | 
 39 | ### Usage Example:
 40 | 
 41 | ```python
 42 | from zeta.nn.embeddings.truncated_rope import TruncatedRotaryEmbedding
 43 | import torch
 44 | 
 45 | # Define the parameters
 46 | dim = 64
 47 | a = 0.1
 48 | b = 0.9
 49 | rho = 0.5
 50 | seq_len = 100
 51 | device = torch.device('cuda')
 52 | 
 53 | # Create the TruncatedRotaryEmbedding module
 54 | trunc_rotary_emb = TruncatedRotaryEmbedding(dim, a, b, rho)
 55 | 
 56 | # Compute the truncated rotary embeddings for the specified sequence length
 57 | rotary_embeddings = trunc_rotary_emb(seq_len, device)
 58 | 
 59 | print(rotary_embeddings)
 60 | ```
 61 | 
 62 | In this example, the `TruncatedRotaryEmbedding` module is created with the specified `dim`, `a`, `b`, and `rho` parameters. The `forward` method is then called with the specified `seq_len` and `device` parameters to compute the truncated rotary embeddings for a sequence of length `seq_len`.
 63 | 
 64 | ## Additional Information and Tips:
 65 | 
 66 | - The `a`, `b`, and `rho` parameters control the truncation mechanism and may need to be tuned based on the specific application and data being used. In particular, the `a` parameter should be set to a value that effectively removes the high-frequency noise in the rotary embeddings, while the `b` parameter should be set to a value that retains the useful positional information in the rotary embeddings.
 67 | 
 68 | - The `dim` parameter should be set to the same value as the embedding dimension used in the model.
 69 | 
 70 | - The `device` parameter in the `forward` method should be set to the same device on which the model is being trained.
 71 | 
 72 | ## Mathematical Formulation:
 73 | 
 74 | The mathematical formulation of the truncated rotary embeddings can be expressed as follows:
 75 | 
 76 | \[ \text{freqs} = t \cdot \text{inv\_freq} \]
 77 | 
 78 | \[ \theta = \text{base}^{-2 \cdot i / \text{dim}}, \, i = 0, 2, \ldots, \text{dim}-2 \]
 79 | 
 80 | \[ \theta^* = 
 81 | \begin{cases}
 82 | 0, & \text{if } \theta < a \\
 83 | \rho, & \text{if } a \leq \theta < b \\
 84 | \theta, & \text{if } \theta \geq b
 85 | \end{cases}
 86 | \]
 87 | 
 88 | \[ \text{result} = \text{freqs} \cdot \theta^* \]
 89 | 
 90 | Where:
 91 | 
 92 | - \( t \) is a tensor containing the position indices of the tokens in the sequence.
 93 | - \( \text{inv\_freq} \) is a tensor containing the inverse frequencies computed based on the specified `dim` parameter.
 94 | - \( \text{freqs} \) is a tensor containing the computed frequencies for each position in the sequence.
 95 | - \( \theta \) is a tensor containing the computed theta values for each position in the sequence.
 96 | - \( \theta^* \) is a tensor containing the truncated theta values for each position in the sequence.
 97 | - \( \text{result} \) is the final tensor containing the truncated rotary embeddings for each position in the sequence.
 98 | 
 99 | ## References and Resources:
100 | 
101 | - Paper: [Link to the paper](https://arxiv.org/pdf/2308.10882.pdf)
102 | 
103 | For further exploration and implementation details, refer to the paper linked above.


--------------------------------------------------------------------------------
/docs/zeta/nn/modules/lora.md:
--------------------------------------------------------------------------------
  1 | # Lora
  2 | 
  3 | The `Lora` class is a module of the Zeta library that provides a simple linear transformation of the input data. It is a part of the `torch.nn` module and extends the `nn.Module` class from PyTorch.
  4 | 
  5 | ## Overview and Introduction
  6 | 
  7 | The `Lora` class is designed to provide a scalable and efficient linear transformation operation. It is particularly useful in scenarios where the dimensionality of the input data is very high and computational efficiency is of paramount importance. The `Lora` class achieves this by breaking down the weight matrix into two lower rank matrices `A` and `B`, and a scale factor `alpha`, which are learned during the training process. This results in a significant reduction in the number of parameters to be learned, and consequently, a more computationally efficient model.
  8 | 
  9 | ## Key Concepts and Terminology
 10 | 
 11 | - **Linear Transformation**: A linear transformation is a mathematical operation that transforms input data by multiplying it with a weight matrix. It is a fundamental operation in many machine learning models.
 12 | 
 13 | - **Low Rank Approximation**: Low rank approximation is a technique used to approximate a matrix by another matrix of lower rank. This is often used to reduce the dimensionality of data and to make computations more efficient.
 14 | 
 15 | - **Scale Factor**: A scale factor is a number by which a quantity is multiplied, changing the magnitude of the quantity.
 16 | 
 17 | ## Class Definition
 18 | 
 19 | The `Lora` class is defined as follows:
 20 | 
 21 | ```python
 22 | class Lora(nn.Module):
 23 |     def __init__(
 24 |         self,
 25 |         dim,
 26 |         dim_out,
 27 |         r=8,
 28 |         alpha=None
 29 |     ):
 30 |         super().__init__()
 31 |         self.scale = alpha / r
 32 | 
 33 |         self.A = nn.Parameter(torch.randn(dim, r))
 34 |         self.B = nn.Parameter(torch.randn(r, dim_out))
 35 | 
 36 |     @property
 37 |     def weight(self):
 38 |         return (self.A @ self.B) * self.scale
 39 |     
 40 |     def forward(self, x):
 41 |         return x @ self.weight
 42 | ```
 43 | 
 44 | ### Parameters
 45 | 
 46 | - `dim` (`int`): The dimensionality of the input data. It is the number of features in the input data.
 47 | - `dim_out` (`int`): The desired dimensionality of the output data. It is the number of features in the output data.
 48 | - `r` (`int`, optional): The rank of the matrices `A` and `B`. It determines the size of the matrices `A` and `B`. Default is 8.
 49 | - `alpha` (`float`, optional): The scale factor. If not provided, it is set to 1 by default.
 50 | 
 51 | ### Methods
 52 | 
 53 | #### `forward`
 54 | 
 55 | The `forward` method is used to compute the forward pass of the `Lora` module.
 56 | 
 57 | ##### Parameters
 58 | 
 59 | - `x` (`Tensor`): The input data. It is a tensor of shape `(batch_size, dim)`.
 60 | 
 61 | ##### Returns
 62 | 
 63 | - `Tensor`: The transformed data. It is a tensor of shape `(batch_size, dim_out)`.
 64 | 
 65 | ## Functionality and Usage
 66 | 
 67 | The `Lora` class is used to perform a linear transformation of the input data. The transformation is defined by the weight matrix `W`, which is approximated by the product of two lower rank matrices `A` and `B`, and a scale factor `alpha`. The `Lora` class learns the matrices `A` and `B`, and the scale factor `alpha` during the training process. 
 68 | 
 69 | The forward pass of the `Lora` module computes the product of the input data `x` and the weight matrix `W`, which is approximated by `(A @ B) * scale`.
 70 | 
 71 | ### Mathematical Formula
 72 | 
 73 | The mathematical formula for the forward pass of the `Lora` module is:
 74 | 
 75 | \[ y = xW \]
 76 | 
 77 | Where:
 78 | - \( y \) is the transformed data.
 79 | - \( x \) is the input data.
 80 | - \( W \) is the weight matrix, which is approximated by \( (A @ B) * \text{scale} \).
 81 | 
 82 | ### Usage Examples
 83 | 
 84 | Below are three examples of how to use the `Lora` class.
 85 | 
 86 | #### Example 1: Basic Usage
 87 | 
 88 | ```python
 89 | import torch
 90 | from zeta import Lora
 91 | 
 92 | # Define the input data
 93 | x = torch.randn(32, 128) # batch size of 32, and 128 features
 94 | 
 95 | # Define the Lora module
 96 | lora = Lora(dim=128, dim_out=64)
 97 | 
 98 | # Compute the forward pass
 99 | y = lora(x)
100 | ```
101 | 
102 | #### Example 2: Specifying the Rank and Scale Factor
103 | 
104 | ```python
105 | import torch
106 | from zeta import Lora
107 | 
108 | # Define the input data
109 | x = torch.randn(32, 128) # batch size of 32, and 128 features
110 | 
111 | # Define the Lora module with specified rank and scale factor
112 | lora = Lora(dim=128, dim_out=64, r=16, alpha=0.1)
113 | 
114 | # Compute the forward pass
115 | y = lora(x)
116 | ```
117 | 
118 | #### Example 3: Using the Lora Module in a Neural Network
119 | 
120 | ```python
121 | import torch
122 | from torch import nn
123 | from zeta import Lora
124 | 
125 | # Define a simple neural network with a Lora layer
126 | class Net(nn.Module):
127 |     def __init__(self):
128 |         super().__init__()
129 |         self.lora = Lora(dim=128, dim_out=64)
130 |         self.fc = nn.Linear(64, 10)
131 |     
132 |     def forward(self, x):
133 |         x = self.lora(x)
134 |         x = self.fc(x)
135 |         return x
136 | 
137 | # Define the input data
138 | x = torch.randn(32, 128) # batch size of 32, and 128 features
139 | 
140 | # Define the model
141 | model = Net()
142 | 
143 | # Compute the forward pass
144 | output = model(x)
145 | ```
146 | 
147 | ## Additional Information and Tips
148 | 
149 | - The `Lora` class is particularly useful in scenarios where the dimensionality of the input data is very high and computational efficiency is of paramount importance. However, it may not be suitable for all applications, as the approximation of the weight matrix may result in a loss of accuracy.
150 | 
151 | - The rank `r` and the scale factor `alpha` are hyperparameters that need to be tuned for the specific application. A higher value of `r` will
152 | 
153 |  result in a more accurate approximation of the weight matrix, but will also increase the computational cost. Similarly, the scale factor `alpha` needs to be tuned to achieve the desired trade-off between accuracy and computational efficiency.
154 | 
155 | ## References and Resources
156 | 
157 | - [PyTorch nn.Module documentation](https://pytorch.org/docs/stable/generated/torch.nn.Module.html)
158 | - [Low Rank Matrix Factorization for Deep Neural Network Training with High-dimensional Output Targets](https://arxiv.org/abs/2005.08735)
159 | 
160 | For further exploration and implementation details, you can refer to the above resources and the official PyTorch documentation.


--------------------------------------------------------------------------------
/docs/zeta/nn/modules/token_learner.md:
--------------------------------------------------------------------------------
  1 | # Zeta Library Documentation
  2 | 
  3 | ## Module Name: TokenLearner
  4 | 
  5 | The `TokenLearner` is a PyTorch module designed for learning tokens from input data. It is a part of the Zeta library, a collection of modules and functions designed for efficient and flexible implementation of various deep learning tasks. The `TokenLearner` class is particularly useful for tasks such as image classification, object detection, and other applications where it is beneficial to extract tokens (representative features) from the input data.
  6 | 
  7 | ## Introduction
  8 | 
  9 | In various deep learning tasks, it is common to extract tokens (representative features) from the input data. These tokens are then used for downstream tasks like classification, detection, etc. The `TokenLearner` class is designed to efficiently extract tokens from the input data. It does this by utilizing a convolutional neural network (CNN) with grouped convolutions and a gating mechanism.
 10 | 
 11 | ## Class Definition
 12 | 
 13 | ```python
 14 | class TokenLearner(nn.Module):
 15 |     def __init__(
 16 |             self,
 17 |             *,
 18 |             dim: int = None,
 19 |             ff_mult: int = 2,
 20 |             num_output_tokens: int = 8,
 21 |             num_layers: int = 2
 22 |     ):
 23 |         ...
 24 | ```
 25 | 
 26 | ### Parameters:
 27 | 
 28 | - `dim` (int, optional): The dimension of the input data. Default is `None`.
 29 | - `ff_mult` (int, optional): The factor by which the inner dimension of the network will be multiplied. Default is `2`.
 30 | - `num_output_tokens` (int, optional): The number of tokens to be output by the network. Default is `8`.
 31 | - `num_layers` (int, optional): The number of layers in the network. Default is `2`.
 32 | 
 33 | ## Functionality and Usage
 34 | 
 35 | The `TokenLearner` class is a PyTorch `nn.Module` that learns tokens from the input data. The input data is first packed and then processed through a series of grouped convolutions followed by a gating mechanism. The output is a set of tokens that are representative of the input data.
 36 | 
 37 | The forward method of the `TokenLearner` class takes an input tensor `x` and performs the following operations:
 38 | 
 39 | 1. The input tensor `x` is packed using the `pack_one` helper function.
 40 | 2. The packed tensor is then rearranged and passed through a series of grouped convolutions and activation functions.
 41 | 3. The output of the convolutions is then rearranged and multiplied with the input tensor.
 42 | 4. The resulting tensor is then reduced to obtain the final tokens.
 43 | 
 44 | ### Method:
 45 | 
 46 | ```python
 47 | def forward(self, x):
 48 |     ...
 49 | ```
 50 | 
 51 | ### Parameters:
 52 | 
 53 | - `x` (Tensor): The input tensor of shape `(batch_size, channels, height, width)`.
 54 | 
 55 | ### Returns:
 56 | 
 57 | - `x` (Tensor): The output tokens of shape `(batch_size, channels, num_output_tokens)`.
 58 | 
 59 | ## Usage Examples
 60 | 
 61 | ### Example 1: Basic Usage
 62 | 
 63 | ```python
 64 | from zeta import TokenLearner
 65 | import torch
 66 | 
 67 | # Initialize the TokenLearner
 68 | token_learner = TokenLearner(dim=64)
 69 | 
 70 | # Generate some random input data
 71 | x = torch.randn(1, 64, 32, 32)
 72 | 
 73 | # Forward pass
 74 | tokens = token_learner.forward(x)
 75 | 
 76 | print(tokens.shape)
 77 | ```
 78 | 
 79 | In this example, a `TokenLearner` is initialized with an input dimension of 64. A random tensor of shape `(1, 64, 32, 32)` is then passed through the `TokenLearner` to obtain the tokens. The output will be a tensor of shape `(1, 64, 8)`.
 80 | 
 81 | ### Example 2: Custom Parameters
 82 | 
 83 | ```python
 84 | from zeta import TokenLearner
 85 | import torch
 86 | 
 87 | # Initialize the TokenLearner with custom parameters
 88 | token_learner = TokenLearner(dim=128, ff_mult=4, num_output_tokens=16)
 89 | 
 90 | # Generate some random input data
 91 | x = torch.randn(2, 128, 64, 64)
 92 | 
 93 | # Forward pass
 94 | tokens = token_learner.forward(x)
 95 | 
 96 | print(tokens.shape)
 97 | # Output: torch.Size([2, 128, 16])
 98 | ```
 99 | 
100 | In this example, a `TokenLearner` is initialized with custom parameters. A random tensor of shape `(2, 128, 64, 64)` is then passed through the `TokenLearner` to obtain the tokens. The output will be a tensor of shape `(2, 128, 16)`.
101 | 
102 | ### Example 3: Integration with Other PyTorch Modules
103 | 
104 | ```python
105 | from zeta import TokenLearner
106 | import torch
107 | import torch.nn as nn
108 | 
109 | # Initialize the TokenLearner
110 | token_learner = TokenLearner(dim=64)
111 | 
112 | # Generate some random input data
113 | x = torch.randn(1, 64, 32, 32)
114 | 
115 | # Define a simple model
116 | model = nn.Sequential(
117 |     token_learner,
118 |     nn.Flatten(),
119 |     nn.Linear(64*8, 10)
120 | )
121 | 
122 | # Forward pass
123 | output = model(x)
124 | 
125 | print(output.shape)
126 | # Output: torch.Size([1, 10])
127 | ```
128 | 
129 | In this example, the `TokenLearner` is integrated into a simple model consisting of the `TokenLearner`, a `Flatten` layer, and a `Linear` layer. A random tensor of shape `(1, 64, 32, 32)` is then passed through the model to obtain the final output. The output will be a tensor of shape `(1, 10)`.
130 | 
131 | ## Mathematical Formulation
132 | 
133 | The `TokenLearner` can be mathematically formulated as follows:
134 | 
135 | Let `X` be the input tensor of shape `(B, C, H, W)`, where `B` is the batch size, `C` is the number of channels, `H` is the height, and `W` is the width. The `TokenLearner` first rearranges `X` to a tensor of shape `(B, G*C, H, W)`, where `G` is the number of output tokens. This is done by repeating `X` along the channel dimension `G` times.
136 | 
137 | The rearranged tensor is then passed through a series of grouped convolutions and activation functions to obtain a tensor `A` of shape `(B, G, H, W)`. This tensor is then rearranged and multiplied with the input tensor `X` to obtain a tensor of shape `(B, C, G, H, W)`.
138 | 
139 | The final tokens are obtained by reducing this tensor along the `H` and `W` dimensions to obtain a tensor of shape `(B, C, G)`.
140 | 
141 | ## Additional Information and Tips
142 | 
143 | - The `num_output_tokens` parameter controls the number of tokens that will be output by the `TokenLearner`. A larger number of output tokens will result in a more detailed representation of the input data, but will also increase the computational requirements.
144 | 
145 | - The `ff_mult` parameter controls the inner dimension of the `TokenLearner`. A larger `ff_mult` will result in a larger capacity model, but will also increase the computational requirements.
146 | 
147 | - The `TokenLearner` works best with input data that has a relatively small spatial dimension (e.g. 32x32 or 64x64). For larger input sizes, it may be beneficial to use a downsampling layer (e.g. `nn.MaxPool2d`) before passing the data through the `TokenLearner`.
148 | 
149 | 


--------------------------------------------------------------------------------
/docs/zeta/nn/utils/helpers.md:
--------------------------------------------------------------------------------
  1 | ## Documentation
  2 | 
  3 | ### Overview
  4 | 
  5 | The provided module comprises utility functions and classes to streamline specific operations with Python data structures and PyTorch models. The main aspects of the module are:
  6 | 
  7 | - Checking the existence of a value.
  8 | - Implementing custom call behavior through classes.
  9 | - Custom decorators for function calls.
 10 | - Dictionary manipulation.
 11 | - Initialization of PyTorch layer parameters.
 12 | 
 13 | ### Functions and Classes
 14 | 
 15 | 1. **exists(val: Any) -> bool**:  
 16 |    Checks if the provided value is not `None`.
 17 | 
 18 | 2. **default(val: Any, d: Any) -> Any**:  
 19 |    Returns the value if it's not `None`; otherwise, it returns a default value.
 20 | 
 21 | 3. **once(fn: Callable) -> Callable**:  
 22 |    A decorator ensuring that the function is only called once.
 23 | 
 24 | 4. **eval_decorator(fn: Callable) -> Callable**:  
 25 |    A decorator for `torch.nn.Module` methods to switch the module to `eval` mode during the function call and revert to its original mode afterwards.
 26 | 
 27 | 5. **cast_tuple(val: Any, depth: int) -> Tuple**:  
 28 |    Casts a value to a tuple with a specific depth.
 29 | 
 30 | 6. **maybe(fn: Callable) -> Callable**:  
 31 |    A decorator that calls the function only if its first argument exists.
 32 | 
 33 | 7. **always**:  
 34 |    A class that always returns the specified value when called.
 35 | 
 36 | 8. **not_equals** and **equals**:  
 37 |    Classes that, when instantiated with a value, check if another value is (not) equal to the specified value.
 38 | 
 39 | 9. **init_zero_(layer: nn.Module) -> None**:  
 40 |    Initializes the weights and biases of a torch layer to zero.
 41 | 
 42 | 10. **pick_and_pop(keys: List[str], d: Dict) -> Dict**:  
 43 |    Extracts values from a dictionary based on provided keys.
 44 | 
 45 | 11. **group_dict_by_key(cond: Callable, d: Dict) -> Tuple[Dict, Dict]**:  
 46 |    Groups dictionary keys based on a given condition.
 47 | 
 48 | 12. **string_begins_with(prefix: str, str: str) -> bool**:  
 49 |    Checks if a string starts with a specific prefix.
 50 | 
 51 | 13. **group_by_key_prefix(prefix: str, d: Dict) -> Tuple[Dict, Dict]**:  
 52 |    Groups dictionary items by keys starting with a specific prefix.
 53 | 
 54 | 14. **groupby_prefix_and_trim(prefix: str, d: Dict) -> Tuple[Dict, Dict]**:  
 55 |    Similar to `group_by_key_prefix` but also removes the prefix from keys.
 56 | 
 57 | ### Usage Examples
 58 | 
 59 | 1. **Using the `once` decorator**:
 60 | 
 61 |     ```python
 62 |     from zeta import once
 63 | 
 64 |     @once
 65 |     def greet():
 66 |         print("Hello, World!")
 67 | 
 68 |     greet()  # prints "Hello, World!"
 69 |     greet()  # Does nothing on the second call
 70 |     ```
 71 | 
 72 | 2. **Using the `eval_decorator` with PyTorch**:
 73 | 
 74 |     ```python
 75 |     import torch.nn as nn
 76 |     from zeta import eval_decorator
 77 | 
 78 |     class SimpleModel(nn.Module):
 79 |         def __init__(self):
 80 |             super().__init__()
 81 |             self.layer = nn.Linear(10, 10)
 82 | 
 83 |         @eval_decorator
 84 |         def predict(self, x):
 85 |             return self.layer(x)
 86 | 
 87 |     model = SimpleModel()
 88 |     input_tensor = torch.randn(1, 10)
 89 |     output = model.predict(input_tensor)  # Automatically switches to eval mode and back
 90 |     ```
 91 | 
 92 | 3. **Dictionary Manipulation with Prefix Functions**:
 93 | 
 94 |     ```python
 95 |     from zeta import group_by_key_prefix
 96 |     
 97 |     sample_dict = {
 98 |         "user_name": "John",
 99 |         "user_age": 25,
100 |         "order_id": 12345,
101 |         "order_date": "2023-01-01"
102 |     }
103 | 
104 |     user_data, order_data = group_by_key_prefix("user_", sample_dict)
105 |     print(user_data)  # {'user_name': 'John', 'user_age': 25}
106 |     print(order_data)  # {'order_id': 12345, 'order_date': '2023-01-01'}
107 |     ```
108 | 
109 | This module is a collection of general-purpose utility functions and classes, making many common operations more concise. It's beneficial when working with PyTorch models and various data manipulation tasks.


--------------------------------------------------------------------------------
/docs/zeta/tokenizers/language_tokenizer.md:
--------------------------------------------------------------------------------
 1 | # Module Name: LanguageTokenizerGPTX
 2 | 
 3 | The `LanguageTokenizerGPTX` is an embedding utility tailored for the "EleutherAI/gpt-neox-20b" transformer model. This class allows for seamless tokenization and decoding operations, abstracting away the underlying complexity of the chosen transformer's tokenizer.
 4 | 
 5 | ## Introduction:
 6 | Language tokenization is a crucial step in natural language processing tasks. This module provides an interface to tokenize and decode text using the GPT-Neox-20b transformer from the EleutherAI project. With the ability to manage end-of-string tokens, padding tokens, and a fixed model length, `LanguageTokenizerGPTX` serves as a convenient wrapper for the actual tokenizer from the transformers library.
 7 | 
 8 | ## Class Definition:
 9 | 
10 | ```python
11 | class LanguageTokenizerGPTX:
12 |     def __init__(self):
13 |         ...
14 |     def tokenize_texts(self, texts: str) -> torch.Tensor:
15 |         ...
16 |     def decode(self, texts: torch.Tensor) -> str:
17 |         ...
18 |     def __len__(self) -> int:
19 |         ...
20 | ```
21 | 
22 | ### Parameters:
23 | The class does not take any parameters upon instantiation. It uses predefined parameters internally to load the tokenizer.
24 | 
25 | ### Methods:
26 | 
27 | #### 1. `__init__(self) -> None`:
28 | Initializes the `LanguageTokenizerGPTX` object. This method loads the `AutoTokenizer` with predefined parameters.
29 | 
30 | #### 2. `tokenize_texts(self, texts: str) -> torch.Tensor`:
31 | Tokenizes a given text or list of texts.
32 | 
33 | - **texts** (str): The input text(s) to tokenize.
34 |   
35 |   **Returns**:
36 |   - A torch Tensor of token IDs representing the input text(s).
37 | 
38 | #### 3. `decode(self, texts: torch.Tensor) -> str`:
39 | Decodes a given tensor of token IDs back to text.
40 | 
41 | - **texts** (torch.Tensor): The tensor of token IDs to decode.
42 |   
43 |   **Returns**:
44 |   - A string representing the decoded text.
45 | 
46 | #### 4. `__len__(self) -> int`:
47 | Provides the total number of tokens in the tokenizer's vocabulary.
48 | 
49 |   **Returns**:
50 |   - An integer representing the total number of tokens.
51 | 
52 | ## Usage Examples:
53 | 
54 | ```python
55 | from zeta import LanguageTokenizerGPTX
56 | import torch
57 | 
58 | # Initialize the tokenizer
59 | tokenizer = LanguageTokenizerGPTX()
60 | 
61 | # Example 1: Tokenize a single text
62 | text = "Hello, world!"
63 | tokenized_text = tokenizer.tokenize_texts(text)
64 | print(tokenized_text)
65 | 
66 | # Example 2: Decode a tokenized text
67 | decoded_text = tokenizer.decode(tokenized_text)
68 | print(decoded_text)
69 | 
70 | # Example 3: Get the number of tokens in the tokenizer's vocabulary
71 | num_tokens = len(tokenizer)
72 | print(f"The tokenizer has {num_tokens} tokens.")
73 | ```
74 | 
75 | ## Mathematical Formulation:
76 | 
77 | Given a text \( t \) and a vocabulary \( V \) from the GPT-Neox-20b model, tokenization maps \( t \) to a sequence of token IDs \( T \) where each token ID \( t_i \) corresponds to a token in \( V \). Decoding reverses this process.
78 | 
79 | \[ t \xrightarrow{\text{tokenize}} T \]
80 | \[ T \xrightarrow{\text{decode}} t \]
81 | 
82 | ## Additional Information:
83 | 
84 | The GPT-Neox-20b model is part of the EleutherAI project. It's a variant of the GPT architecture with tweaks in terms of model size and training. Utilizing such models require an understanding of tokenization and decoding, which this module aims to simplify.
85 | 
86 | ## References:
87 | 
88 | - [Transformers Library by Hugging Face](https://huggingface.co/transformers/)
89 | - [EleutherAI GPT-Neox](https://github.com/EleutherAI/gpt-neox)
90 | 
91 | Note: Ensure you have the necessary packages and dependencies installed, particularly the transformers library from Hugging Face.


--------------------------------------------------------------------------------
/docs/zeta/tokenizers/multi_modal_tokenizer.md:
--------------------------------------------------------------------------------
  1 | # **Documentation for Zeta Library's MultiModalTokenizer Class**
  2 | 
  3 | ---
  4 | 
  5 | ## **Introduction and Overview**
  6 | 
  7 | The `MultiModalTokenizer` class is part of the Zeta Library, designed to provide tokenization capabilities for both text and image data. This enables more seamless integration and utilization of multimodal (text and image) data, especially when used with models that can handle such information simultaneously, like the CLIP model.
  8 | 
  9 | **Key Features**:
 10 | 
 11 | 1. **Multimodal Tokenization**: Combines text and image tokenization within one unified class.
 12 | 2. **Integration with Hugging Face Transformers**: Utilizes the `CLIPProcessor` for image tokenization and `AutoTokenizer` for text tokenization.
 13 | 3. **Special Tokens for Image Segmentation**: Uses special tokens `<image>` and `</image>` to denote image token boundaries within text.
 14 | 4. **Error Handling**: Implements comprehensive error handling and logging to ensure robustness.
 15 | 
 16 | ---
 17 | 
 18 | ## **Class Definition**
 19 | 
 20 | ### **MultiModalTokenizer**
 21 | 
 22 | ```python
 23 | class MultiModalTokenizer:
 24 |     """
 25 |     A tokenizer class for the kosmos model
 26 | 
 27 |     Attributes:
 28 |         processor(CLIPProcessor): The processor to tokenize images.
 29 |         tokenizer(AutoTokenizer): The tokenizer to tokenize text.
 30 |         im_idx(int): The Index of the "<image>" token.
 31 |         im_end_idx(int): The index of the "</image>" token.
 32 |     """
 33 | ```
 34 | 
 35 | #### **Parameters**:
 36 | 
 37 | - **max_length (int, optional)**: Maximum length of the tokenized sequence. Defaults to 8192.
 38 | 
 39 | #### **Attributes**:
 40 | 
 41 | - **processor (CLIPProcessor)**: The processor used to tokenize images.
 42 | - **tokenizer (AutoTokenizer)**: The tokenizer used to tokenize text.
 43 | - **im_idx (int)**: Index of the `<image>` token.
 44 | - **im_end_idx (int)**: Index of the `</image>` token.
 45 | 
 46 | ---
 47 | 
 48 | ## **Methods**
 49 | 
 50 | ### **1. tokenize_texts**
 51 | 
 52 | ```python
 53 | def tokenize_texts(self, texts: str) -> Tuple[torch.Tensor, torch.Tensor]:
 54 |     """
 55 |     Tokenize given texts.
 56 | 
 57 |     Args:
 58 |         texts (str): The text to be tokenized.
 59 | 
 60 |     Returns:
 61 |         A tuple containing the tokenized texts and only the text tokens.
 62 |     """
 63 | ```
 64 | 
 65 | ### **2. tokenize_images**
 66 | 
 67 | ```python
 68 | def tokenize_images(self, images) -> torch.Tensor:
 69 |     """
 70 |     Tokenizes given images.
 71 | 
 72 |     Args:
 73 |         images: The images to be tokenized.
 74 | 
 75 |     Returns:
 76 |         The tokenized images.
 77 |     """
 78 | ```
 79 | 
 80 | ### **3. tokenize**
 81 | 
 82 | ```python
 83 | def tokenize(self, sample) -> Dict[str, torch.Tensor]:
 84 |     """
 85 |     Tokenizes given sample.
 86 | 
 87 |     Args:
 88 |         sample: The sample to be tokenized.
 89 | 
 90 |     Returns:
 91 |         A dictionary containing the tokenized text tokens, images, labels, and attention mask.
 92 |     """
 93 | ```
 94 | 
 95 | ---
 96 | 
 97 | ## **Usage Examples**
 98 | 
 99 | ### **Example 1: Tokenizing Texts**
100 | 
101 | ```python
102 | from zeta import MultiModalTokenizer
103 | import torch
104 | 
105 | tokenizer = MultiModalTokenizer()
106 | texts = ["Hello World", "Zeta Library is great!"]
107 | tokenized_texts, only_texts = tokenizer.tokenize_texts(texts)
108 | print(tokenized_texts)
109 | print(only_texts)
110 | ```
111 | 
112 | ### **Example 2: Tokenizing Images**
113 | 
114 | ```python
115 | from zeta import MultiModalTokenizer
116 | import torch
117 | 
118 | tokenizer = MultiModalTokenizer()
119 | images = torch.randn(2, 3, 224, 224)  # Assuming 2 random images of shape 3x224x224
120 | tokenized_images = tokenizer.tokenize_images(images)
121 | print(tokenized_images)
122 | ```
123 | 
124 | ### **Example 3: Tokenizing Multimodal Data**
125 | 
126 | ```python
127 | from zeta import MultiModalTokenizer
128 | import torch
129 | 
130 | tokenizer = MultiModalTokenizer()
131 | sample = {
132 |     "target_text": ["Hello World", "Zeta Library is great!"],
133 |     "image": torch.randn(2, 3, 224, 224)
134 | }
135 | tokenized_data = tokenizer.tokenize(sample)
136 | print(tokenized_data)
137 | ```
138 | 
139 | ---
140 | 
141 | ## **Mathematical Overview**
142 | 
143 | Given a text sequence \( T \) of length \( n \) and an image \( I \) represented by a tensor of shape \( C \times H \times W \), where \( C \) is the number of channels, \( H \) is the height, and \( W \) is the width:
144 | 
145 | 1. The tokenized text, \( T' \), is represented as:
146 |    \[ T' = [<s>, <image>, </image>, T_{1}, T_{2}, ..., T_{n}, </s>] \]
147 | 
148 | 2. The tokenized image, \( I' \), is processed using the CLIP processor to obtain a tensor representation.
149 | 
150 | 3. When both text and image data are tokenized using the `tokenize` method, the output contains both \( T' \) and \( I' \) with their respective attention masks.
151 | 
152 | ---
153 | 
154 | ## **Additional Tips**
155 | 
156 | - Ensure you have the required model weights and configurations for the specified pretrained models ("laion/CLIP-ViT-L-14-laion2B-s32B-b82K" and "EleutherAI/gpt-neox-20b") downloaded or accessible from the Hugging Face Model Hub.
157 |   
158 | - Handle potential tokenization errors gracefully using try-except blocks, as demonstrated in the provided methods.
159 | 
160 | ---
161 | 
162 | ## **References and Resources**
163 | 
164 | 1. CLIP: Connecting Vision and Language with Reinforced Loss - OpenAI: [Link](https://openai.com/blog/clip/)
165 | 2. Hugging Face's Transformers library: [Link](https://huggingface.co/transformers/)
166 | 3. Documentation on Special Tokens in Transformers: [Link](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.add_special_tokens)
167 | 
168 | ---


--------------------------------------------------------------------------------
/docs/zeta/tokenizers/sentencepiece.md:
--------------------------------------------------------------------------------
  1 | # SentencePieceTokenizer
  2 | 
  3 | `SentencePieceTokenizer` is a class for tokenizing and detokenizing text using a pre-trained SentencePiece model. The SentencePiece model is a unsupervised text tokenizer and detokenizer mainly for Neural Network-based text generation tasks where the vocabulary size is predetermined prior to the neural model training. This class is a part of the zeta library which is a collection of various utility functions and classes for Natural Language Processing tasks.
  4 | 
  5 | ## Introduction
  6 | 
  7 | Tokenization is a crucial step in many natural language processing tasks. It involves splitting a piece of text into smaller units, called tokens. These tokens can be as small as characters or as large as words. The `SentencePieceTokenizer` class provides an efficient and easy-to-use way to tokenize and detokenize text using a SentencePiece model.
  8 | 
  9 | The SentencePiece model is trained to find the best tokenization by dynamically adjusting the size and boundary of tokens. SentencePiece implements subword units (e.g., byte-pair-encoding (BPE) and unigram language model with the extension of direct training from raw sentences. SentencePiece allows us to make a purely end-to-end system that does not depend on language-specific pre/postprocessing.
 10 | 
 11 | ## Class Definition
 12 | 
 13 | ```python
 14 | class SentencePieceTokenizer:
 15 |     def __init__(self, model_path: str):
 16 |         ...
 17 | ```
 18 | 
 19 | ### Parameters:
 20 | 
 21 | - `model_path (str)`: The path to the pre-trained SentencePiece model. It should be a file with `.model` extension.
 22 | 
 23 | ### Attributes:
 24 | 
 25 | - `n_words (int)`: The vocabulary size of the SentencePiece model.
 26 | - `bos_id (int)`: The token ID for the beginning of sentence token.
 27 | - `eos_id (int)`: The token ID for the end of sentence token.
 28 | - `pad_id (int)`: The token ID for the padding token.
 29 | - `prefix_id (int, optional)`: The token ID for the prefix token.
 30 | - `middle_id (int, optional)`: The token ID for the middle token.
 31 | - `suffix_id (int, optional)`: The token ID for the suffix token.
 32 | - `eot_id (int, optional)`: The token ID for the end of text token.
 33 | 
 34 | ## Methods
 35 | 
 36 | ### `encode`
 37 | 
 38 | ```python
 39 | def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
 40 |     ...
 41 | ```
 42 | 
 43 | Encodes a string into a list of integer token IDs.
 44 | 
 45 | #### Parameters:
 46 | 
 47 | - `s (str)`: The string to be encoded.
 48 | - `bos (bool)`: Whether to add the beginning of sentence token at the start.
 49 | - `eos (bool)`: Whether to add the end of sentence token at the end.
 50 | 
 51 | #### Returns:
 52 | 
 53 | - `List[int]`: A list of integer token IDs.
 54 | 
 55 | ### `decode`
 56 | 
 57 | ```python
 58 | def decode(self, t: List[int]) -> str:
 59 |     ...
 60 | ```
 61 | 
 62 | Decodes a list of integer token IDs into a string.
 63 | 
 64 | #### Parameters:
 65 | 
 66 | - `t (List[int])`: A list of integer token IDs to be decoded.
 67 | 
 68 | #### Returns:
 69 | 
 70 | - `str`: The decoded string.
 71 | 
 72 | ### `encode_infilling`
 73 | 
 74 | ```python
 75 | def encode_infilling(self, s: str) -> List[int]:
 76 |     ...
 77 | ```
 78 | 
 79 | Encodes a string without an implicit leading space.
 80 | 
 81 | #### Parameters:
 82 | 
 83 | - `s (str)`: The string to be encoded.
 84 | 
 85 | #### Returns:
 86 | 
 87 | - `List[int]`: A list of integer token IDs.
 88 | 
 89 | ### `decode_infilling`
 90 | 
 91 | ```python
 92 | def decode_infilling(self, t: List[int]) -> str:
 93 |     ...
 94 | ```
 95 | 
 96 | Decodes a list of integer token IDs into a string without an implicit leading space.
 97 | 
 98 | #### Parameters:
 99 | 
100 | - `t (List[int])`: A list of integer token IDs to be decoded.
101 | 
102 | #### Returns:
103 | 
104 | - `str`: The decoded string.
105 | 
106 | ## Usage Examples
107 | 
108 | ### Example 1:
109 | 
110 | ```python
111 | from zeta import SentencePieceTokenizer
112 | 
113 | tokenizer = SentencePieceTokenizer(model_path='path/to/your/model.model')
114 | text = "Hello, world!"
115 | tokens = tokenizer.encode(text, bos=True, eos=True)
116 | print(tokens)
117 | # [2, 284, 16, 250, 13, 849, 4, 3]
118 | 
119 | decoded_text = tokenizer.decode(tokens)
120 | print(decoded_text)
121 | # "Hello, world!"
122 | ```
123 | 
124 | ### Example 2:
125 | 
126 | ```python
127 | from zeta import SentencePieceTokenizer
128 | 
129 | tokenizer = SentencePieceTokenizer(model_path='path/to/your/model.model')
130 | text = "Hello, world!"
131 | tokens = tokenizer.encode_infilling(text)
132 | print(tokens)
133 | # [284, 16, 250, 13, 849, 4]
134 | 
135 | decoded_text = tokenizer.decode_infilling(tokens)
136 | print(decoded_text)
137 | # "Hello, world!"
138 | ```
139 | 
140 | ### Example 3:
141 | 
142 | ```python
143 | from zeta import SentencePieceTokenizer
144 | 
145 | tokenizer = SentencePieceTokenizer(model_path='path/to/your/model.model')
146 | tokens = [2, 284, 16, 250, 13, 849, 4, 3]
147 | decoded_text = tokenizer.decode(tokens)
148 | print(decoded_text)
149 | # "Hello, world!"
150 | ```
151 | 
152 | ## Additional Information
153 | 
154 | - Make sure that the model file specified in `model_path` exists.
155 | - The special tokens such as `<PRE>`, `<MID>`, `<SUF>`, `<EOT>` are optional and may not be present in all SentencePiece models.
156 | 
157 | ## References and Resources
158 | 
159 | - [SentencePiece GitHub Repository](https://github.com/google/sentencepiece)
160 | - [SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Generation](https://arxiv.org/abs/1808.06226)
161 | 
162 | ## Mathematical Formulation
163 | 
164 | The SentencePiece model uses the following mathematical formula for tokenization:
165 | 
166 | \[P(w) = \prod_{i=1}^{n} P(w_i | w_1, ..., w_{i-1})\]
167 | 
168 | Where:
169 | - \(P(w)\) is the probability of the word \(w\).
170 | - \(n\) is the number of subwords in the word \(w\).
171 | - \(w_i\) is the \(i\)-th subword of \(w\).
172 | 
173 | The model is trained to maximize the likelihood of the training data, and the subwords are chosen to minimize the perplexity of the training data.


--------------------------------------------------------------------------------
/docs/zeta/training/nebula.md:
--------------------------------------------------------------------------------
  1 | # Nebula
  2 | 
  3 | The `Nebula` class is a custom loss function class that dynamically determines the most suitable loss function for a given dataset based on certain characteristics of the dataset, such as sparsity, correlation, range of values, and user input. It is part of the `zeta` library and is built upon PyTorch's LossFunction class.
  4 | 
  5 | ## Introduction
  6 | 
  7 | The purpose of the `Nebula` class is to help determine and cache the most suitable loss function for a given dataset without requiring the user to manually select one. This can be particularly useful in scenarios where the user is unsure of the most appropriate loss function to use or in automated systems where the type of problem (classification or regression) is not known a priori.
  8 | 
  9 | The `Nebula` class considers various characteristics of the data, such as whether the target values are integers, the sparsity of the target values, the correlation between predictions and target values, and any user or domain knowledge provided, to determine whether the problem is a classification or regression problem and subsequently select an appropriate loss function.
 10 | 
 11 | ## Class Definition
 12 | 
 13 | ```python
 14 | class Nebula(LossFunction):
 15 |     def __init__(self, domain_knowledge=None, user_input=None):
 16 |         ...
 17 | ```
 18 | 
 19 | ### Parameters
 20 | 
 21 | - `domain_knowledge` (str, optional): Domain knowledge about the problem. It can be either "classification" or "regression". Default is `None`.
 22 | - `user_input` (str, optional): User input about the problem type. It can be either "classification" or "regression". Default is `None`.
 23 | 
 24 | ### Attributes
 25 | 
 26 | - `loss_function`: The determined loss function.
 27 | - `domain_knowledge`: Domain knowledge provided during initialization.
 28 | - `user_input`: User input provided during initialization.
 29 | - `loss_function_cache`: A cache for storing the determined loss function for a dataset.
 30 | - `unique_values_cache`: A cache for storing the unique values in the target variable `y_true`.
 31 | - `class_balance_cache`: A cache for storing the class balance in the target variable `y_true`.
 32 | - `logger`: A logger for logging information during the determination of the loss function.
 33 | 
 34 | ## Functionality and Usage
 35 | 
 36 | The `Nebula` class is used to dynamically determine the most suitable loss function for a given dataset and cache the determined loss function for future use. The class analyzes the unique values, class balance, sparsity, and correlation of the target variable `y_true` and the predicted variable `y_pred` to determine whether the problem is a classification or regression problem and select an appropriate loss function.
 37 | 
 38 | ### Method: `determine_loss_function`
 39 | 
 40 | ```python
 41 | def determine_loss_function(self, y_pred, y_true):
 42 |     ...
 43 | ```
 44 | 
 45 | This method determines the most suitable loss function based on the characteristics of `y_pred` and `y_true`.
 46 | 
 47 | #### Parameters
 48 | 
 49 | - `y_pred` (Tensor): The predicted values.
 50 | - `y_true` (Tensor): The ground truth values.
 51 | 
 52 | ### Method: `__call__`
 53 | 
 54 | ```python
 55 | def __call__(self, y_pred, y_true):
 56 |     ...
 57 | ```
 58 | 
 59 | This method computes the loss using the determined loss function.
 60 | 
 61 | #### Parameters
 62 | 
 63 | - `y_pred` (Tensor): The predicted values.
 64 | - `y_true` (Tensor): The ground truth values.
 65 | 
 66 | #### Returns
 67 | 
 68 | - `Tensor`: The computed loss.
 69 | 
 70 | ### Usage Examples
 71 | 
 72 | #### Example 1: Basic Usage
 73 | 
 74 | ```python
 75 | from zeta import Nebula
 76 | import torch
 77 | 
 78 | # Initialize Nebula
 79 | nebula = Nebula()
 80 | 
 81 | # Generate some example data
 82 | y_pred = torch.randn(10, 5)
 83 | y_true = torch.randint(0, 5, (10,))
 84 | 
 85 | # Compute the loss
 86 | loss = nebula(y_pred, y_true)
 87 | 
 88 | print(loss)
 89 | ```
 90 | 
 91 | #### Example 2: Providing Domain Knowledge
 92 | 
 93 | ```python
 94 | from zeta import Nebula
 95 | import torch
 96 | 
 97 | # Initialize Nebula with domain knowledge
 98 | nebula = Nebula(domain_knowledge="classification")
 99 | 
100 | # Generate some example data
101 | y_pred = torch.randn(10, 5)
102 | y_true = torch.randint(0, 5, (10,))
103 | 
104 | # Compute the loss
105 | loss = nebula(y_pred, y_true)
106 | 
107 | print(loss)
108 | ```
109 | 
110 | #### Example 3: Providing User Input
111 | 
112 | ```python
113 | from zeta import Nebula
114 | import torch
115 | 
116 | # Initialize Nebula with user input
117 | nebula = Nebula(user_input="regression")
118 | 
119 | # Generate some example data
120 | y_pred = torch.randn(10, 1)
121 | y_true = torch.randn(10, 1)
122 | 
123 | # Compute the loss
124 | loss = nebula(y_pred, y_true)
125 | 
126 | print(loss)
127 | ```
128 | 
129 | ## Mathematical Formula
130 | 
131 | The `Nebula` class does not have a specific mathematical formula as it dynamically determines the most suitable loss function based on the characteristics of the data. However, the determined loss function will have its own mathematical formula, which can be found in the PyTorch documentation or the `zeta` library documentation.
132 | 
133 | ## Additional Information and Tips
134 | 
135 | - The `Nebula` class caches the determined loss function, unique values, and class balance for a given dataset to avoid recomputing them in the future.
136 | - If both `domain_knowledge` and `user_input` are provided, `domain_knowledge` will take precedence over `user_input`.
137 | - The `Nebula` class uses the `logging` module to log information during the determination of the loss function. You can customize the logging settings by modifying the `logger` attribute.
138 | 
139 | 


--------------------------------------------------------------------------------
/docs/zeta/training/optimizers/decoupled_lion.md:
--------------------------------------------------------------------------------
  1 | # DecoupledLionW Optimizer
  2 | 
  3 | ## Overview and Introduction
  4 | 
  5 | `DecoupledLionW` is a PyTorch optimizer designed to improve training performance and convergence for deep learning models. It is an extension of the Lion optimizer, which incorporates decoupled weight decay and a momentum-based update rule. 
  6 | 
  7 | The optimizer utilizes the Adam-like update rule, where the weight decay is applied separately from the gradient update. This is crucial as it helps prevent overfitting, improves generalization, and aids faster convergence and smoother optimization.
  8 | 
  9 | ### Key Concepts:
 10 | 
 11 | - **Weight Decay:** Reduces the magnitude of the model's weights, preventing overfitting and improving generalization.
 12 | - **Momentum Update:** An interpolation between the current gradient and the previous momentum state, allowing for faster convergence and smoother optimization.
 13 | - **Momentum Decay:** Gradually reduces the momentum term over time, preventing it from becoming too large and destabilizing the optimization process.
 14 | 
 15 | ## Class Definition
 16 | 
 17 | ```python
 18 | class DecoupledLionW(Optimizer):
 19 |     def __init__(
 20 |             self,
 21 |             params,
 22 |             lr: float = 1e-4,
 23 |             betas: Tuple[float, float] = (0.9, 0.99),
 24 |             weight_decay: float = 0.0,
 25 |     ):
 26 | ```
 27 | 
 28 | ### Parameters
 29 | 
 30 | - `params` (iterable): Iterable of parameters to optimize or dictionaries defining parameter groups.
 31 | - `lr` (float, optional): Learning rate. Default: 1e-4.
 32 | - `betas` (Tuple[float, float], optional): Coefficients used for computing running averages of gradient and its square. Default: (0.9, 0.99).
 33 | - `weight_decay` (float, optional): Weight decay (L2 penalty). Default: 0.
 34 | 
 35 | ### Attributes
 36 | 
 37 | - `metric_functions`: A dictionary of lambda functions to compute various metrics like L2 norm of moments, parameters, updates, and gradients, as well as cosine similarity between updates and gradients.
 38 | 
 39 | ## Functionality and Usage
 40 | 
 41 | ### `lionw` Method
 42 | 
 43 | This static method is responsible for applying the weight decay, momentum update, and momentum decay.
 44 | 
 45 | ```python
 46 | @staticmethod
 47 | def lionw(p, grad, exp_avg, lr, initial_lr, wd, beta1, beta2) -> None:
 48 | ```
 49 | 
 50 | #### Parameters
 51 | 
 52 | - `p` (Tensor): Parameter tensor.
 53 | - `grad` (Tensor): Gradient tensor.
 54 | - `exp_avg` (Tensor): Exponential moving average of gradient values.
 55 | - `lr` (float): Learning rate.
 56 | - `initial_lr` (float): Initial learning rate.
 57 | - `wd` (float): Weight decay.
 58 | - `beta1` (float): Exponential decay rate for the first moment estimates.
 59 | - `beta2` (float): Exponential decay rate for the second moment estimates.
 60 | 
 61 | ### `step` Method
 62 | 
 63 | Performs a single optimization step.
 64 | 
 65 | ```python
 66 | @torch.no_grad()
 67 | def step(self, closure: Optional[Callable] = None):
 68 | ```
 69 | 
 70 | #### Parameters
 71 | 
 72 | - `closure` (callable, optional): A closure that reevaluates the model and returns the loss.
 73 | 
 74 | #### Returns
 75 | 
 76 | - `loss` (float, optional): The loss value if `closure` is provided. None otherwise.
 77 | 
 78 | ### `pre_reduce_metrics` Method
 79 | 
 80 | This method preprocesses the metrics before reduction across nodes.
 81 | 
 82 | ```python
 83 | def pre_reduce_metrics(self, optimizer_metrics):
 84 | ```
 85 | 
 86 | #### Parameters
 87 | 
 88 | - `optimizer_metrics` (dict): A dictionary containing the optimizer metrics.
 89 | 
 90 | #### Returns
 91 | 
 92 | - `optimizer_metrics` (dict): The pre-processed optimizer metrics.
 93 | 
 94 | ### `report_per_parameter_metrics` Method
 95 | 
 96 | This method reports the per-parameter metrics.
 97 | 
 98 | ```python
 99 | def report_per_parameter_metrics(self, param: torch.Tensor, name: str, optimizer_metrics: dict):
100 | ```
101 | 
102 | #### Parameters
103 | 
104 | - `param` (Tensor): Parameter tensor.
105 | - `name` (str): Name of the parameter.
106 | - `optimizer_metrics` (dict): A dictionary containing the optimizer metrics.
107 | 
108 | #### Returns
109 | 
110 | - `optimizer_metrics` (dict): The optimizer metrics with the reported per-parameter metrics.
111 | 
112 | ## Usage Examples
113 | 
114 | ```python
115 | from zeta import x
116 | import torch
117 | 
118 | # Define model parameters
119 | params = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
120 | 
121 | # Define optimizer
122 | optimizer = DecoupledLionW(params, lr=0.1, betas=(0.9, 0.999), weight_decay=0.01)
123 | 
124 | # Define loss function
125 | loss_fn = torch.nn.MSELoss()
126 | 
127 | # Forward pass
128 | output = x(params)
129 | target = torch.tensor([0.0, 1.0, 2.0])
130 | loss = loss_fn(output, target)
131 | 
132 | # Backward pass
133 | loss.backward()
134 | 
135 | # Optimization step
136 | optimizer.step()
137 | ```
138 | 
139 | ## Mathematical Formula
140 | 
141 | The update rule of the optimizer can be represented by the following formula:
142 | 
143 | \[ p = p - \alpha \cdot \text{sign}(\beta_1 \cdot m + (1-\beta_1) \cdot g) - \eta \cdot wd \]
144 | 
145 | Where:
146 | 
147 | - \( p \) is the parameter.
148 | - \( \alpha \) is the learning rate.
149 | - \( \beta_1 \) is the exponential decay rate for the first moment estimates.
150 | - \( m \) is the momentum (exponential moving average of gradient values).
151 | - \( g \) is the gradient.
152 | - \( \eta \) is the decay factor.
153 | - \( wd \) is the weight decay.
154 | 
155 | ## Additional Information and Tips
156 | 
157 | - A high value of `weight_decay` can lead to a large reduction in the model's weights on every step. Ensure to use an appropriate value for your specific use case.
158 | - The optimizer supports both single-node and multi-node distributed training, enabling efficient training on parallel computing environments.
159 | 


--------------------------------------------------------------------------------
/docs/zeta/training/optimizers/sophia.md:
--------------------------------------------------------------------------------
  1 | # SophiaG Optimizer for Zeta Library
  2 | 
  3 | ## Overview
  4 | 
  5 | The SophiaG optimizer is designed to adaptively change learning rates during training, offering a combination of momentum-based acceleration and second-order Hessian-based adaptive learning rates. This optimizer is particularly useful for training deep neural networks and optimizing complex, non-convex loss functions. Key features include:
  6 | 
  7 | 1. **Momentum**: Utilizes exponentially moving averages of gradients.
  8 | 2. **Adaptive Learning Rate**: Adjusts the learning rate based on the second-order Hessian information.
  9 | 3. **Regularization**: Applies weight decay to avoid overfitting.
 10 | 4. **Optional Settings**: Allows for maximizing the loss function, customizable settings for capturable and dynamic parameters.
 11 | 
 12 | ## Class Definition
 13 | 
 14 | ```python
 15 | class SophiaG(Optimizer):
 16 |     def __init__(self, params, lr=1e-4, betas=(0.965, 0.99), rho=0.04,
 17 |                  weight_decay=1e-1, *, maximize: bool = False,
 18 |                  capturable: bool = False, dynamic: bool = False):
 19 | ```
 20 | 
 21 | ### Parameters:
 22 | 
 23 | - `params` (iterable): Iterable of parameters to optimize.
 24 | - `lr` (float, default=1e-4): Learning rate.
 25 | - `betas` (Tuple[float, float], default=(0.965, 0.99)): Coefficients used for computing running averages of gradient and Hessian.
 26 | - `rho` (float, default=0.04): Damping factor for Hessian-based updates.
 27 | - `weight_decay` (float, default=1e-1): Weight decay factor.
 28 | - `maximize` (bool, default=False): Whether to maximize the loss function.
 29 | - `capturable` (bool, default=False): Enable/Disable special capturing features.
 30 | - `dynamic` (bool, default=False): Enable/Disable dynamic adjustments of the optimizer.
 31 | 
 32 | ## Usage and Functionality
 33 | 
 34 | ### 1. Initialization
 35 | 
 36 | Upon initialization, the optimizer performs validation on its parameters and sets them as the default parameters for parameter groups.
 37 | 
 38 | ```python
 39 | from zeta import SophiaG
 40 | 
 41 | optimizer = SophiaG(model.parameters(), lr=0.01, betas=(0.9, 0.999), weight_decay=1e-4)
 42 | ```
 43 | 
 44 | ### 2. Step Forward
 45 | 
 46 | The `.step()` method updates the model parameters. The function is decorated with `@torch.no_grad()` to avoid saving any more computation graphs for gradient computation.
 47 | 
 48 | ```python
 49 | loss = criterion(output, target)
 50 | loss.backward()
 51 | optimizer.step()
 52 | ```
 53 | 
 54 | ### 3. Update Hessian and Exponential Average
 55 | 
 56 | The optimizer has internal methods to update the Hessian and Exponential Moving Average (EMA) of the gradients, controlled by `betas`.
 57 | 
 58 | ### 4. SophiaG Function
 59 | 
 60 | The core SophiaG function updates the parameters based on the gradient (`grad`), moving average (`exp_avg`), and Hessian (`hessian`). It uses the following update formula:
 61 | 
 62 | \[ \text{param} = \text{param} - \text{lr} \times \left( \text{beta}_1 \times \text{exp_avg} + \frac{(1-\text{beta}_1) \times \text{grad}}{( \text{beta}_2 \times \text{hessian} + (1-\text{beta}_2) )^{\rho}} \right) \]
 63 | 
 64 | ## Usage Examples
 65 | 
 66 | ### 1. Basic Usage:
 67 | 
 68 | ```python
 69 | from zeta import SophiaG
 70 | import torch
 71 | import torch.nn as nn
 72 | 
 73 | model = nn.Linear(10, 1)
 74 | optimizer = SophiaG(model.parameters(), lr=0.01)
 75 | ```
 76 | 
 77 | ### 2. Customizing Betas and Learning Rate:
 78 | 
 79 | ```python
 80 | from zeta import SophiaG
 81 | import torch
 82 | 
 83 | optimizer = SophiaG(model.parameters(), lr=0.001, betas=(0.9, 0.999))
 84 | ```
 85 | 
 86 | ### 3. Using with Weight Decay:
 87 | 
 88 | ```python
 89 | from zeta import SophiaG
 90 | 
 91 | optimizer = SophiaG(model.parameters(), lr=0.01, weight_decay=1e-4)
 92 | ```
 93 | 
 94 | ## Additional Information and Tips
 95 | 
 96 | - Make sure that the parameters passed are compatible with the model you are using.
 97 | - To maximize the loss function (useful in adversarial training), set `maximize=True`.
 98 | 
 99 | ## Common Issues
100 | 
101 | - If sparse gradients are involved, the SophiaG optimizer is not applicable.
102 | 
103 | ## References and Resources
104 | 
105 | - [Adaptive Learning Rates](https://arxiv.org/pdf/1609.04747)
106 | - [Zeta Documentation](https://zeta.apac.ai)
107 | 
108 | For further questions or issues, visit our [GitHub repository](https://github.com/kyegomez/zeta).
109 | 


--------------------------------------------------------------------------------
/docs/zeta/training/train.md:
--------------------------------------------------------------------------------
  1 | # Documentation for `Trainer` Module from Zeta Library
  2 | 
  3 | ---
  4 | 
  5 | ## Introduction
  6 | 
  7 | The `Trainer` module from the Zeta library provides an easy-to-use, flexible, and scalable approach to training deep learning models. By abstracting away many of the lower-level details of training, including distributed training, gradient accumulation, and model checkpointing, `Trainer` allows developers to focus on the high-level aspects of model development and experimentation.
  8 | 
  9 | This module also integrates seamlessly with the HuggingFace `Accelerator` to enable mixed precision training, GPU acceleration, and distributed training across multiple nodes or GPUs.
 10 | 
 11 | ---
 12 | 
 13 | ## `Trainer` Class Definition
 14 | 
 15 | ```python
 16 | def Trainer(
 17 |         gradient_accumulate_every: int = None, 
 18 |         batch_size: int = None, 
 19 |         seq_len: int = None,
 20 |         entity_name: str = None,
 21 |         model = None,
 22 |         use_fsdp: bool = False,
 23 |         use_activation_checkpointing: bool = False,
 24 |         learning_rate = None,
 25 |         seed = None,
 26 |         use_pretokenized: bool = False,
 27 |         resume_from_checkpoint = None,
 28 |         checkpointing_steps = None,
 29 |         output_dir = None,
 30 |         weight_decay = None,
 31 |         use_deepspeed = None
 32 |     ):
 33 | ```
 34 | 
 35 | ### Parameters
 36 | 
 37 | - `gradient_accumulate_every` (`int`, optional): Specifies how often to accumulate gradients. Default: `None`.
 38 | - `batch_size` (`int`, optional): Specifies the batch size for training. Default: `None`.
 39 | - `seq_len` (`int`, optional): Sequence length for model inputs. Default: `None`.
 40 | - `entity_name` (`str`, optional): Name of the entity for logging purposes. Default: `None`.
 41 | - `model`: The model to train. No default value.
 42 | - `use_fsdp` (`bool`, optional): Whether or not to use Fully Sharded Data Parallelism (FSDP). Default: `False`.
 43 | - `use_activation_checkpointing` (`bool`, optional): Use activation checkpointing to save memory during training. Default: `False`.
 44 | - `learning_rate`: The learning rate for training. No default value.
 45 | - `seed`: Random seed for reproducibility. No default value.
 46 | - `use_pretokenized` (`bool`, optional): Whether to use pre-tokenized data. Default: `False`.
 47 | - `resume_from_checkpoint`: Path to a checkpoint to resume training from. Default: `None`.
 48 | - `checkpointing_steps`: How often to save model checkpoints. Default: `None`.
 49 | - `output_dir`: Directory to save final trained model and checkpoints. Default: `None`.
 50 | - `weight_decay`: Weight decay value for regularization. No default value.
 51 | - `use_deepspeed`: Whether to use deepspeed for training optimization. Default: `None`.
 52 | 
 53 | ---
 54 | 
 55 | ## Functionality and Usage
 56 | 
 57 | The primary function of the `Trainer` module is to handle the training process, including data loading, optimization, and model updates. It leverages HuggingFace's `Accelerator` to provide accelerated training on GPUs and distributed environments.
 58 | 
 59 | Here are the primary steps:
 60 | 
 61 | 1. Initialization of the `Accelerator` for GPU training and gradient accumulation.
 62 | 2. Model and optimizer initialization.
 63 | 3. Loading datasets and setting up data loaders.
 64 | 4. Training loop with gradient accumulation and model checkpointing.
 65 | 5. Save the final trained model.
 66 | 
 67 | ### Code Examples
 68 | 
 69 | **1. Basic Usage**
 70 | 
 71 | ```python
 72 | from zeta import Trainer
 73 | 
 74 | model = ... # Your model definition here
 75 | Trainer(
 76 |     gradient_accumulate_every=2,
 77 |     batch_size=32,
 78 |     seq_len=128,
 79 |     model=model,
 80 |     learning_rate=0.001,
 81 |     seed=42,
 82 |     output_dir='./models/'
 83 | )
 84 | ```
 85 | 
 86 | **2. Resuming Training from a Checkpoint**
 87 | 
 88 | ```python
 89 | from zeta import Trainer
 90 | 
 91 | model = ... # Your model definition here
 92 | Trainer(
 93 |     gradient_accumulate_every=2,
 94 |     batch_size=32,
 95 |     seq_len=128,
 96 |     model=model,
 97 |     learning_rate=0.001,
 98 |     seed=42,
 99 |     resume_from_checkpoint='./models/checkpoint.pt',
100 |     output_dir='./models/'
101 | )
102 | ```
103 | 
104 | **3. Using FSDP and Activation Checkpointing**
105 | 
106 | ```python
107 | from zeta import Trainer
108 | 
109 | model = ... # Your model definition here
110 | Trainer(
111 |     gradient_accumulate_every=2,
112 |     batch_size=32,
113 |     seq_len=128,
114 |     model=model,
115 |     use_fsdp=True,
116 |     use_activation_checkpointing=True,
117 |     learning_rate=0.001,
118 |     seed=42,
119 |     output_dir='./models/'
120 | )
121 | ```
122 | 
123 | ---
124 | 
125 | ## Mathematical Description
126 | 
127 | Given a dataset \( D \) consisting of data points \( \{ (x_1, y_1), (x_2, y_2), ... (x_N, y_N) \} \), the trainer aims to minimize the loss function \( L \) with respect to model parameters \( \theta \):
128 | 
129 | \[ \theta^* = \arg\min_{\theta} \frac{1}{N} \sum_{i=1}^{N} L(f(x_i; \theta), y_i) \]
130 | 
131 | 
132 | 
133 | where \( f \) is the model's prediction function.
134 | 
135 | ---
136 | 
137 | ## Conclusions
138 | 
139 | The `Trainer` module from Zeta library streamlines the training process by abstracting away many complexities, making it a valuable tool for developers at all experience levels. Whether you are training a simple model or a complex architecture in a distributed environment, the `Trainer` module offers the flexibility and ease-of-use to get your models trained efficiently.


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/example.py


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: Package Docs
  2 | plugins:
  3 |   - glightbox
  4 |   - search
  5 | copyright: "&copy; APAC Corp, Inc."
  6 | extra_css:
  7 |   - docs/assets/css/extra.css
  8 | extra:
  9 |   # analytics:
 10 |   #   provider: google
 11 |   #   property: G-QM8EDPSCB6
 12 |   social:
 13 |     - icon: fontawesome/solid/house
 14 |       link: assets/img/ZetaLogoIcon.png
 15 |     - icon: fontawesome/brands/discord
 16 |       link: https://discord.gg/qUtxnK2NMf
 17 |     - icon: fontawesome/brands/github
 18 |       link: https://github.com/kyegomez/Zeta/
 19 |     - icon: fontawesome/brands/python
 20 |       link: https://pypi.org/project/Zeta/
 21 | theme:
 22 |     name: material
 23 |     custom_dir: docs/overrides
 24 |     logo: assets/img/ZetaLogoIcon.png
 25 |     palette:
 26 |       # Palette toggle for light mode
 27 |     - scheme: default
 28 |       primary: 'custom'
 29 |       toggle:
 30 |         icon: material/brightness-7 
 31 |         name: Switch to dark mode
 32 |     # Palette toggle for dark mode
 33 |     - scheme: slate
 34 |       primary: 'custom'
 35 |       accent: light blue
 36 |       toggle:
 37 |         icon: material/brightness-4
 38 |         name: Switch to light mode
 39 |     features:
 40 |         - content.code.copy
 41 |         - content.code.annotate
 42 |         - navigation.tabs
 43 |         - navigation.sections
 44 |         - navigation.expand
 45 |         - navigation.top
 46 |         - announce.dismiss
 47 |     font:
 48 |       text: Roboto
 49 |       code: Roboto Mono
 50 | 
 51 | extra_css:
 52 |   - stylesheets/extra.css
 53 | 
 54 | markdown_extensions:
 55 |   - pymdownx.highlight:
 56 |       anchor_linenums: true
 57 |       line_spans: __span
 58 |       pygments_lang_class: true
 59 |   - admonition
 60 |   - pymdownx.inlinehilite
 61 |   - pymdownx.snippets
 62 |   - pymdownx.superfences
 63 |   - pymdownx.details
 64 |   - pymdownx.tabbed
 65 |   - tables
 66 |   - def_list
 67 |   - footnotes
 68 | 
 69 | 
 70 | nav:
 71 | - Home:
 72 |     - Overview: "index.md"
 73 |     - Contributing: "contributing.md"
 74 |     - FAQ: "faq.md"
 75 |     - Purpose: "purpose.md"
 76 |     - Roadmap: "roadmap.md"
 77 |     - Design: "design.md"
 78 |     - Flywheel: "flywheel.md"
 79 |     - Bounties: "bounties.md"
 80 |     - Metric: "metric.md"
 81 |     - Distribution: "distribution"
 82 |     - Research: "research.md"
 83 |     - Demos: "demos.md"
 84 |     - Architecture: "architecture.md"
 85 |     - Checklist: "checklist.md"
 86 |     - Hiring: "hiring.md"
 87 | - Zeta:
 88 |     - Overview: "zeta/index.md"
 89 |     - zeta.nn:
 90 |       - zeta.nn.biases: 
 91 |         - Xpos: "zeta/nn/biases/xpos.md"
 92 |         - RelativePositionBias: "zeta/nn/biases/relative_bias.md"
 93 |         - AlibiPositionalBias: "zeta/nn/biases/alibi.md"
 94 |       - zeta.nn.embeddings:
 95 |         - MultiWay: "zeta/nn/embeddings/multiway.md"
 96 |         - RotaryEmbeddings: "zeta/nn/embeddings/rope.md"
 97 |         - TruncatedRotaryEmbedding: "zeta/nn/embeddings/truncated_rope.md"
 98 |       - zeta.nn.modules:
 99 |         - Lora: "zeta/nn/modules/lora.md"
100 |         - TokenLearner: "zeta/nn/modules/token_learner.md"
101 |       - zeta.nn.attention:
102 |         - FlashAttention: "zeta/nn/attention/flash_attention.md"
103 |         - MultiQueryAttention: "zeta/nn/attention/multiquery.md"
104 |         - MultiheadAttention: "zeta/nn/attention/multihead.md"
105 |         - FlashAttentionTwo: "zeta/nn/attention/flash2.md"
106 |         - BaseAttention: "zeta/nn/attention/base.md"
107 |       - zeta.nn.architecture:
108 |         - Decoder: "zeta/nn/architecture/decoder.md"
109 |         - Transformer: "zeta/nn/architecture/transformer.md"
110 |     - zeta.training:
111 |       - train: "zeta/training/train.md"
112 |       - zeta.training.loss:
113 |         - Nebula: "zeta/training/nebula.md"
114 |       - zeta.training.optimizers:
115 |         - DecoupledLionW: "zeta/training/optimizers/decoupled_lion.md"
116 |         - SophiaG: "zeta/training/optimizers/sophia.md"
117 |     - zeta.tokenizers:
118 |         - MultiModalTokenizer: "zeta/tokenizers/multi_modal_tokenizer.md"
119 |         - LanguageTokenizerGPTX: "zeta/tokenizers/language_tokenizer.md"
120 |         - SentencePieceTokenizer: "zeta/tokenizers/sentencepiece.md"
121 | - Examples:
122 |     - Overview: "examples/index.md"
123 |     - FlashAttention: "examples/nn/attentions/flash.md"
124 |     


--------------------------------------------------------------------------------
/package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/package/__init__.py


--------------------------------------------------------------------------------
/package/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/package/main.py


--------------------------------------------------------------------------------
/package/subfolder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/package/subfolder/__init__.py


--------------------------------------------------------------------------------
/package/subfolder/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Swarm-Corporation/Multi-Agent-Template-App/dbb3ebd78a39b698068b2d4eae4365450fa05dbe/package/subfolder/main.py


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["poetry-core>=1.0.0"]
 3 | build-backend = "poetry.core.masonry.api"
 4 | 
 5 | [tool.poetry]
 6 | name = "paper"
 7 | version = "0.0.1"
 8 | description = "Paper - Pytorch"
 9 | license = "MIT"
10 | authors = ["Kye Gomez <kye@apac.ai>"]
11 | homepage = "https://github.com/kyegomez/paper"
12 | documentation = "https://github.com/kyegomez/paper"  # Add this if you have documentation.
13 | readme = "README.md"  # Assuming you have a README.md
14 | repository = "https://github.com/kyegomez/paper"
15 | keywords = ["artificial intelligence", "deep learning", "optimizers", "Prompt Engineering"]
16 | classifiers = [
17 |     "Development Status :: 4 - Beta",
18 |     "Intended Audience :: Developers",
19 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
20 |     "License :: OSI Approved :: MIT License",
21 |     "Programming Language :: Python :: 3.9"
22 | ]
23 | 
24 | [tool.poetry.dependencies]
25 | python = "^3.10"
26 | swarms = "*"
27 | pydantic = "*"
28 | fastapi = "*"
29 | 
30 | [tool.poetry.group.lint.dependencies]
31 | ruff = "^0.6.2"
32 | types-toml = "^0.10.8.1"
33 | types-redis = "^4.3.21.6"
34 | types-pytz = "^2024.1.0.20240417"
35 | black = "^24.4.2"
36 | types-chardet = "^5.0.4.6"
37 | mypy-protobuf = "^3.0.0"
38 | 
39 | 
40 | [tool.autopep8]
41 | max_line_length = 80
42 | ignore = "E501,W6"  # or ["E501", "W6"]
43 | in-place = true
44 | recursive = true
45 | aggressive = 3
46 | 
47 | 
48 | [tool.ruff]
49 | line-length = 70
50 | 
51 | [tool.black]
52 | line-length = 70
53 | target-version = ['py38']
54 | preview = true
55 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | zetascale
3 | swarms
4 | 


--------------------------------------------------------------------------------
/scripts/code_quality.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Navigate to the directory containing the 'package' folder
 4 | # cd /path/to/your/code/directory
 5 | 
 6 | # Run autopep8 with max aggressiveness (-aaa) and in-place modification (-i)
 7 | # on all Python files (*.py) under the 'package' directory.
 8 | autopep8 --in-place --aggressive --aggressive --recursive --experimental --list-fixes package/
 9 | 
10 | # Run black with default settings, since black does not have an aggressiveness level.
11 | # Black will format all Python files it finds in the 'package' directory.
12 | black --experimental-string-processing package/
13 | 
14 | # Run ruff on the 'package' directory.
15 | # Add any additional flags if needed according to your version of ruff.
16 | ruff --unsafe_fix
17 | 
18 | # YAPF
19 | yapf --recursive --in-place --verbose --style=google --parallel package
20 | 


--------------------------------------------------------------------------------
/scripts/merge_all_prs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if we are inside a Git repository
 4 | if ! git rev-parse --git-dir > /dev/null 2>&1; then
 5 |     echo "Error: Must be run inside a Git repository."
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Fetch all open pull requests
10 | echo "Fetching open PRs..."
11 | prs=$(gh pr list --state open --json number --jq '.[].number')
12 | 
13 | # Check if there are PRs to merge
14 | if [ -z "$prs" ]; then
15 |     echo "No open PRs to merge."
16 |     exit 0
17 | fi
18 | 
19 | echo "Found PRs: $prs"
20 | 
21 | # Loop through each pull request number and merge it
22 | for pr in $prs; do
23 |     echo "Attempting to merge PR #$pr"
24 |     merge_output=$(gh pr merge $pr --auto --merge)
25 |     merge_status=$?
26 |     if [ $merge_status -ne 0 ]; then
27 |         echo "Failed to merge PR #$pr. Error: $merge_output"
28 |     else
29 |         echo "Successfully merged PR #$pr"
30 |     fi
31 | done
32 | 
33 | echo "Processing complete."
34 | 


--------------------------------------------------------------------------------
/scripts/test_name.sh:
--------------------------------------------------------------------------------
1 | find ./tests -name "*.py" -type f | while read file
2 | do
3 |   filename=$(basename "$file")
4 |   dir=$(dirname "$file")
5 |   if [[ $filename != test_* ]]; then
6 |     mv "$file" "$dir/test_$filename"
7 |   fi
8 | done


--------------------------------------------------------------------------------
/scripts/tests.sh:
--------------------------------------------------------------------------------
1 | find ./tests -name '*.py' -exec pytest {} \;


--------------------------------------------------------------------------------