├── fts ├── trainer │ ├── __init__.py │ └── base.py ├── utils │ ├── __init__.py │ └── main.py ├── inference │ ├── __init__.py │ ├── base.py │ ├── gptq.py │ └── hf_model.py ├── processing │ ├── __init__.py │ ├── base.py │ └── build_dataset.py ├── __init__.py └── finetuner.py ├── docs ├── applications │ ├── enterprise.md │ ├── customer_support.md │ └── marketing_agencies.md ├── .DS_Store ├── assets │ ├── img │ │ ├── ft-logo.png │ │ └── tools │ │ │ ├── toml.png │ │ │ ├── output.png │ │ │ └── poetry_setup.png │ └── css │ │ └── extra.css ├── demos.md ├── stylesheets │ └── extra.css ├── architecture.md ├── metric.md ├── overrides │ └── main.html ├── index.md ├── purpose.md ├── hiring.md ├── faq.md ├── ft │ ├── gptq_inference.md │ ├── index.md │ ├── inference.md │ └── finetuner.md ├── design.md ├── contributing.md ├── bounties.md ├── roadmap.md └── flywheel.md ├── .DS_Store ├── images ├── ft-logo.png └── agorabanner.png ├── inference.py ├── .pre-commit-config.yaml ├── requirements.txt ├── .readthedocs.yml ├── example.py ├── .github ├── workflows │ ├── pull-request-links.yml │ ├── docs.yml │ ├── welcome.yml │ ├── label.yml │ ├── pylint.yml │ ├── python-publish.yml │ ├── stale.yml │ ├── unit-test.yml │ ├── publish.yml │ └── test.yml ├── dependabot.yml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── FUNDING.yml └── PULL_REQUEST_TEMPLATE.yml ├── pyproject.toml ├── Makefile ├── playground └── llama2_english.py ├── mkdocs.yml ├── .gitignore ├── README.md └── LICENSE /fts/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fts/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fts/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fts/processing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/applications/enterprise.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/.DS_Store -------------------------------------------------------------------------------- /docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/.DS_Store -------------------------------------------------------------------------------- /images/ft-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/images/ft-logo.png -------------------------------------------------------------------------------- /images/agorabanner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/images/agorabanner.png -------------------------------------------------------------------------------- /docs/assets/img/ft-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/assets/img/ft-logo.png -------------------------------------------------------------------------------- /docs/assets/img/tools/toml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/assets/img/tools/toml.png -------------------------------------------------------------------------------- /docs/assets/img/tools/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/assets/img/tools/output.png -------------------------------------------------------------------------------- /docs/demos.md: -------------------------------------------------------------------------------- 1 | # Demo Ideas 2 | 3 | * GPT-4 4 | * Andromeda 5 | * Kosmos 6 | * LongNet 7 | * Text to video diffusion 8 | * Nebula 9 | -------------------------------------------------------------------------------- /docs/assets/img/tools/poetry_setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/assets/img/tools/poetry_setup.png -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --md-primary-fg-color: #8315F9; 3 | --md-accent-fg-color: #00FFCE; 4 | } -------------------------------------------------------------------------------- /docs/assets/css/extra.css: -------------------------------------------------------------------------------- 1 | .md-typeset__table { 2 | min-width: 100%; 3 | } 4 | 5 | .md-typeset table:not([class]) { 6 | display: table; 7 | } -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | from fts import Inference 2 | 3 | model = Inference( 4 | model_id="georgesung/llama2_7b_chat_uncensored", 5 | quantized=True 6 | ) 7 | 8 | model.run("What is your name") -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | - repo: https://github.com/astral-sh/ruff-pre-commit 2 | # Ruff version. 3 | rev: v0.0.286 4 | hooks: 5 | - id: ruff 6 | args: [--fix, --exit-non-zero-on-fix] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | bitsandbytes 3 | accelerate 4 | datasets 5 | rich 6 | tensorboard 7 | wandb 8 | tokenizers 9 | optimum 10 | 11 | 12 | mkdocs 13 | mkdocs-material 14 | mkdocs-glightbox 15 | -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | * Simple file structure 3 | * Fluid API 4 | * Useful error handling that provides potential solutions and root cause error understanding 5 | * nn, tokenizers, models, training 6 | * -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.11" 7 | 8 | mkdocs: 9 | configuration: mkdocs.yml 10 | 11 | python: 12 | install: 13 | - requirements: requirements.txt -------------------------------------------------------------------------------- /docs/metric.md: -------------------------------------------------------------------------------- 1 | # The Golden Metric: 2 | 3 | * We need to figure out a single metric that determines if we're accomplishing our goal with zeta which is to build zetascale superintelligent AI models as fast as possible with minimal code. 4 | 5 | -------------------------------------------------------------------------------- /fts/__init__.py: -------------------------------------------------------------------------------- 1 | from fts.finetuner import FineTuner 2 | from fts.inference.hf_model import Inference 3 | 4 | from fts.processing.base import Preprocessor, DefaultPreprocessor 5 | from fts.trainer.base import TrainerConfiguration, DefaultTrainerConfig 6 | 7 | from fts.processing.build_dataset import BuildDataset 8 | -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | 4 | 5 | {% block announce %} 6 |
7 | Star and contribute to Zeta on GitHub! 8 |
9 | {% endblock %} -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | from fts import FineTuner 2 | 3 | model_id="google/flan-t5-xxl" 4 | 5 | dataset_name="samsum" 6 | 7 | finetune = FineTuner( 8 | model_id=model_id, 9 | dataset_name="samsum", 10 | max_length=150, 11 | lora_r=16, 12 | lora_alpha=32, 13 | quantize=True 14 | ) 15 | 16 | 17 | finetune.train -------------------------------------------------------------------------------- /.github/workflows/pull-request-links.yml: -------------------------------------------------------------------------------- 1 | name: readthedocs/actions 2 | on: 3 | pull_request_target: 4 | types: 5 | - opened 6 | paths: 7 | - "docs/**" 8 | 9 | permissions: 10 | pull-requests: write 11 | 12 | jobs: 13 | pull-request-links: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: readthedocs/actions/preview@v1 17 | with: 18 | project-slug: ft -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates 2 | 3 | version: 2 4 | updates: 5 | - package-ecosystem: "github-actions" 6 | directory: "/" 7 | schedule: 8 | interval: "weekly" 9 | 10 | - package-ecosystem: "pip" 11 | directory: "/" 12 | schedule: 13 | interval: "weekly" 14 | 15 | -------------------------------------------------------------------------------- /fts/utils/main.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def print_trainable_parameters(model): 4 | trainable_params = 0 5 | all_param = 0 6 | for _, param in model.named_parameters(): 7 | all_param += param.numel() 8 | if param.requires_grad: 9 | trainable_params += param.numel() 10 | 11 | print( 12 | f"Trainable params: {trainable_params} || all params {all_param} || trainable: {100 * trainable_params / all_param}" 13 | ) -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Docs WorkFlow 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - main 8 | - develop 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: actions/setup-python@v5 15 | with: 16 | python-version: 3.x 17 | - run: pip install mkdocs-material 18 | - run: pip install "mkdocstrings[python]" 19 | - run: mkdocs gh-deploy --force -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Finetuning Suite Docs 2 | 3 | Welcome to Finetuning Suite's Documentation! 4 | 5 | Finetuning Suite is a modular framework that enables for seamless, reliable, and fluid finetuning and inference 6 | 7 | ## Finetuning Suite 8 | 9 | 10 | 11 | Finetuning Suite is a modular framework that enables for seamless, reliable, and fluid finetuning and inference 12 | [Click here for Finetuning Suite Documentation →](ft/) 13 | 14 | 15 | -------------------------------------------------------------------------------- /.github/workflows/welcome.yml: -------------------------------------------------------------------------------- 1 | name: Welcome WorkFlow 2 | 3 | on: 4 | issues: 5 | types: [opened] 6 | pull_request_target: 7 | types: [opened] 8 | 9 | jobs: 10 | build: 11 | name: 👋 Welcome 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/first-interaction@v1.3.0 15 | with: 16 | repo-token: ${{ secrets.GITHUB_TOKEN }} 17 | issue-message: "Hello there, thank you for opening an Issue ! 🙏🏻 The team was notified and they will get back to you asap." 18 | pr-message: "Hello there, thank you for opening an PR ! 🙏🏻 The team was notified and they will get back to you asap." -------------------------------------------------------------------------------- /.github/workflows/label.yml: -------------------------------------------------------------------------------- 1 | # This workflow will triage pull requests and apply a label based on the 2 | # paths that are modified in the pull request. 3 | # 4 | # To use this workflow, you will need to set up a .github/labeler.yml 5 | # file with configuration. For more information, see: 6 | # https://github.com/actions/labeler 7 | 8 | name: Labeler 9 | on: [pull_request_target] 10 | 11 | jobs: 12 | label: 13 | 14 | runs-on: ubuntu-latest 15 | permissions: 16 | contents: read 17 | pull-requests: write 18 | 19 | steps: 20 | - uses: actions/labeler@v5 21 | with: 22 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 23 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Pylint 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8", "3.9", "3.10"] 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v5 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pylint 21 | - name: Analysing the code with pylint 22 | run: | 23 | pylint $(git ls-files '*.py') 24 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ft-suite" 3 | version = "0.1.7" 4 | description = "A fine-tuning suite based on Transformers and LoRA." 5 | authors = ["Kye Gomez "] 6 | license = "MIT" 7 | packages = [ 8 | { include = "fts" }, 9 | { include = "fts/**/*.py" }, 10 | ] 11 | 12 | [tool.poetry.dependencies] 13 | python = "^3.7" 14 | torch = "*" 15 | transformers = "*" 16 | datasets = "*" 17 | peft = "*" 18 | accelerate = "*" 19 | optimum = "*" 20 | bitsandbytes = "*" 21 | 22 | 23 | [tool.poetry.dev-dependencies] 24 | pytest = "^5.2" 25 | 26 | [build-system] 27 | requires = ["poetry-core>=1.0.0"] 28 | build-backend = "poetry.core.masonry.api" 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: 'kyegomez' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [kyegomez] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: #Nothing 14 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Upload Python Package 3 | 4 | on: 5 | release: 6 | types: [published] 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | deploy: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Set up Python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: '3.x' 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install build 26 | - name: Build package 27 | run: python -m build 28 | - name: Publish package 29 | uses: pypa/gh-action-pypi-publish@2f6f737ca5f74c637829c0f5c3acd0e29ea5e8bf 30 | with: 31 | user: __token__ 32 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a detailed report on the bug and it's root cause. Conduct root cause error analysis 4 | title: "[BUG] " 5 | labels: bug 6 | assignees: kyegomez 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is and what the main root cause error is. Test very thoroughly before submitting. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /fts/inference/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | class InferenceHandler(ABC): 4 | @abstractmethod 5 | def run( 6 | self, 7 | prompt_text=None, 8 | model=None, 9 | tokenizer=None, 10 | device=None, 11 | max_length = None 12 | ): 13 | pass 14 | 15 | 16 | class DefaultInferenceHandler(InferenceHandler): 17 | def run( 18 | self, 19 | prompt_text, 20 | model, 21 | tokenizer, 22 | device, 23 | max_length 24 | ): 25 | inputs = tokenizer.encode(prompt_text, return_tensors="pt").to(self.device) 26 | outputs = model.run(inputs, max_length=max_length, do_sample=True) 27 | return tokenizer.decode(outputs[0], skip_special_tokens=True) 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. 2 | # 3 | # You can adjust the behavior by modifying this file. 4 | # For more information, see: 5 | # https://github.com/actions/stale 6 | name: Mark stale issues and pull requests 7 | 8 | on: 9 | schedule: 10 | - cron: '26 12 * * *' 11 | 12 | jobs: 13 | stale: 14 | 15 | runs-on: ubuntu-latest 16 | permissions: 17 | issues: write 18 | pull-requests: write 19 | 20 | steps: 21 | - uses: actions/stale@v9 22 | with: 23 | repo-token: ${{ secrets.GITHUB_TOKEN }} 24 | stale-issue-message: 'Stale issue message' 25 | stale-pr-message: 'Stale pull request message' 26 | stale-issue-label: 'no-issue-activity' 27 | stale-pr-label: 'no-pr-activity' -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: style check_code_quality 2 | 3 | export PYTHONPATH = . 4 | check_dirs := src 5 | 6 | style: 7 | black $(check_dirs) 8 | isort --profile black $(check_dirs) 9 | 10 | check_code_quality: 11 | black --check $(check_dirs) 12 | isort --check-only --profile black $(check_dirs) 13 | # stop the build if there are Python syntax errors or undefined names 14 | flake8 $(check_dirs) --count --select=E9,F63,F7,F82 --show-source --statistics 15 | # exit-zero treats all errors as warnings. E203 for black, E501 for docstring, W503 for line breaks before logical operators 16 | flake8 $(check_dirs) --count --max-line-length=88 --exit-zero --ignore=D --extend-ignore=E203,E501,W503 --statistics 17 | 18 | publish: 19 | python setup.py sdist bdist_wheel 20 | twine upload -r testpypi dist/* -u ${PYPI_USERNAME} -p ${PYPI_TEST_PASSWORD} --verbose 21 | twine check dist/* 22 | twine upload dist/* -u ${PYPI_USERNAME} -p ${PYPI_PASSWORD} --verbose -------------------------------------------------------------------------------- /.github/workflows/unit-test.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - name: Setup Python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: '3.10' 22 | 23 | - name: Install dependencies 24 | run: pip install -r requirements.txt 25 | 26 | - name: Run Python unit tests 27 | run: python3 -m unittest tests/finetuning_suite 28 | 29 | - name: Verify that the Docker image for the action builds 30 | run: docker build . --file Dockerfile 31 | 32 | - name: Integration test 1 33 | uses: ./ 34 | with: 35 | input-one: something 36 | input-two: true 37 | 38 | - name: Integration test 2 39 | uses: ./ 40 | with: 41 | input-one: something else 42 | input-two: false 43 | 44 | - name: Verify integration test results 45 | run: python3 -m unittest unittesting/finetuning_suite 46 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.yml: -------------------------------------------------------------------------------- 1 |