├── fts
├── trainer
│ ├── __init__.py
│ └── base.py
├── utils
│ ├── __init__.py
│ └── main.py
├── inference
│ ├── __init__.py
│ ├── base.py
│ ├── gptq.py
│ └── hf_model.py
├── processing
│ ├── __init__.py
│ ├── base.py
│ └── build_dataset.py
├── __init__.py
└── finetuner.py
├── docs
├── applications
│ ├── enterprise.md
│ ├── customer_support.md
│ └── marketing_agencies.md
├── .DS_Store
├── assets
│ ├── img
│ │ ├── ft-logo.png
│ │ └── tools
│ │ │ ├── toml.png
│ │ │ ├── output.png
│ │ │ └── poetry_setup.png
│ └── css
│ │ └── extra.css
├── demos.md
├── stylesheets
│ └── extra.css
├── architecture.md
├── metric.md
├── overrides
│ └── main.html
├── index.md
├── purpose.md
├── hiring.md
├── faq.md
├── ft
│ ├── gptq_inference.md
│ ├── index.md
│ ├── inference.md
│ └── finetuner.md
├── design.md
├── contributing.md
├── bounties.md
├── roadmap.md
└── flywheel.md
├── .DS_Store
├── images
├── ft-logo.png
└── agorabanner.png
├── inference.py
├── .pre-commit-config.yaml
├── requirements.txt
├── .readthedocs.yml
├── example.py
├── .github
├── workflows
│ ├── pull-request-links.yml
│ ├── docs.yml
│ ├── welcome.yml
│ ├── label.yml
│ ├── pylint.yml
│ ├── python-publish.yml
│ ├── stale.yml
│ ├── unit-test.yml
│ ├── publish.yml
│ └── test.yml
├── dependabot.yml
├── ISSUE_TEMPLATE
│ ├── feature_request.md
│ └── bug_report.md
├── FUNDING.yml
└── PULL_REQUEST_TEMPLATE.yml
├── pyproject.toml
├── Makefile
├── playground
└── llama2_english.py
├── mkdocs.yml
├── .gitignore
├── README.md
└── LICENSE
/fts/trainer/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/fts/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/fts/inference/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/fts/processing/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/applications/enterprise.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/.DS_Store
--------------------------------------------------------------------------------
/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/.DS_Store
--------------------------------------------------------------------------------
/images/ft-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/images/ft-logo.png
--------------------------------------------------------------------------------
/images/agorabanner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/images/agorabanner.png
--------------------------------------------------------------------------------
/docs/assets/img/ft-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/assets/img/ft-logo.png
--------------------------------------------------------------------------------
/docs/assets/img/tools/toml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/assets/img/tools/toml.png
--------------------------------------------------------------------------------
/docs/assets/img/tools/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/assets/img/tools/output.png
--------------------------------------------------------------------------------
/docs/demos.md:
--------------------------------------------------------------------------------
1 | # Demo Ideas
2 |
3 | * GPT-4
4 | * Andromeda
5 | * Kosmos
6 | * LongNet
7 | * Text to video diffusion
8 | * Nebula
9 |
--------------------------------------------------------------------------------
/docs/assets/img/tools/poetry_setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/Finetuning-Suite/HEAD/docs/assets/img/tools/poetry_setup.png
--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | :root {
2 | --md-primary-fg-color: #8315F9;
3 | --md-accent-fg-color: #00FFCE;
4 | }
--------------------------------------------------------------------------------
/docs/assets/css/extra.css:
--------------------------------------------------------------------------------
1 | .md-typeset__table {
2 | min-width: 100%;
3 | }
4 |
5 | .md-typeset table:not([class]) {
6 | display: table;
7 | }
--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
1 | from fts import Inference
2 |
3 | model = Inference(
4 | model_id="georgesung/llama2_7b_chat_uncensored",
5 | quantized=True
6 | )
7 |
8 | model.run("What is your name")
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | - repo: https://github.com/astral-sh/ruff-pre-commit
2 | # Ruff version.
3 | rev: v0.0.286
4 | hooks:
5 | - id: ruff
6 | args: [--fix, --exit-non-zero-on-fix]
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | bitsandbytes
3 | accelerate
4 | datasets
5 | rich
6 | tensorboard
7 | wandb
8 | tokenizers
9 | optimum
10 |
11 |
12 | mkdocs
13 | mkdocs-material
14 | mkdocs-glightbox
15 |
--------------------------------------------------------------------------------
/docs/architecture.md:
--------------------------------------------------------------------------------
1 | # Architecture
2 | * Simple file structure
3 | * Fluid API
4 | * Useful error handling that provides potential solutions and root cause error understanding
5 | * nn, tokenizers, models, training
6 | *
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-22.04
5 | tools:
6 | python: "3.11"
7 |
8 | mkdocs:
9 | configuration: mkdocs.yml
10 |
11 | python:
12 | install:
13 | - requirements: requirements.txt
--------------------------------------------------------------------------------
/docs/metric.md:
--------------------------------------------------------------------------------
1 | # The Golden Metric:
2 |
3 | * We need to figure out a single metric that determines if we're accomplishing our goal with zeta which is to build zetascale superintelligent AI models as fast as possible with minimal code.
4 |
5 |
--------------------------------------------------------------------------------
/fts/__init__.py:
--------------------------------------------------------------------------------
1 | from fts.finetuner import FineTuner
2 | from fts.inference.hf_model import Inference
3 |
4 | from fts.processing.base import Preprocessor, DefaultPreprocessor
5 | from fts.trainer.base import TrainerConfiguration, DefaultTrainerConfig
6 |
7 | from fts.processing.build_dataset import BuildDataset
8 |
--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 |
4 |
5 | {% block announce %}
6 |
9 | {% endblock %}
--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
1 | from fts import FineTuner
2 |
3 | model_id="google/flan-t5-xxl"
4 |
5 | dataset_name="samsum"
6 |
7 | finetune = FineTuner(
8 | model_id=model_id,
9 | dataset_name="samsum",
10 | max_length=150,
11 | lora_r=16,
12 | lora_alpha=32,
13 | quantize=True
14 | )
15 |
16 |
17 | finetune.train
--------------------------------------------------------------------------------
/.github/workflows/pull-request-links.yml:
--------------------------------------------------------------------------------
1 | name: readthedocs/actions
2 | on:
3 | pull_request_target:
4 | types:
5 | - opened
6 | paths:
7 | - "docs/**"
8 |
9 | permissions:
10 | pull-requests: write
11 |
12 | jobs:
13 | pull-request-links:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: readthedocs/actions/preview@v1
17 | with:
18 | project-slug: ft
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates
2 |
3 | version: 2
4 | updates:
5 | - package-ecosystem: "github-actions"
6 | directory: "/"
7 | schedule:
8 | interval: "weekly"
9 |
10 | - package-ecosystem: "pip"
11 | directory: "/"
12 | schedule:
13 | interval: "weekly"
14 |
15 |
--------------------------------------------------------------------------------
/fts/utils/main.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def print_trainable_parameters(model):
4 | trainable_params = 0
5 | all_param = 0
6 | for _, param in model.named_parameters():
7 | all_param += param.numel()
8 | if param.requires_grad:
9 | trainable_params += param.numel()
10 |
11 | print(
12 | f"Trainable params: {trainable_params} || all params {all_param} || trainable: {100 * trainable_params / all_param}"
13 | )
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: Docs WorkFlow
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | - main
8 | - develop
9 | jobs:
10 | deploy:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v3
14 | - uses: actions/setup-python@v5
15 | with:
16 | python-version: 3.x
17 | - run: pip install mkdocs-material
18 | - run: pip install "mkdocstrings[python]"
19 | - run: mkdocs gh-deploy --force
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # Finetuning Suite Docs
2 |
3 | Welcome to Finetuning Suite's Documentation!
4 |
5 | Finetuning Suite is a modular framework that enables for seamless, reliable, and fluid finetuning and inference
6 |
7 | ## Finetuning Suite
8 |
9 |
10 |
11 | Finetuning Suite is a modular framework that enables for seamless, reliable, and fluid finetuning and inference
12 | [Click here for Finetuning Suite Documentation →](ft/)
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.github/workflows/welcome.yml:
--------------------------------------------------------------------------------
1 | name: Welcome WorkFlow
2 |
3 | on:
4 | issues:
5 | types: [opened]
6 | pull_request_target:
7 | types: [opened]
8 |
9 | jobs:
10 | build:
11 | name: 👋 Welcome
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/first-interaction@v1.3.0
15 | with:
16 | repo-token: ${{ secrets.GITHUB_TOKEN }}
17 | issue-message: "Hello there, thank you for opening an Issue ! 🙏🏻 The team was notified and they will get back to you asap."
18 | pr-message: "Hello there, thank you for opening an PR ! 🙏🏻 The team was notified and they will get back to you asap."
--------------------------------------------------------------------------------
/.github/workflows/label.yml:
--------------------------------------------------------------------------------
1 | # This workflow will triage pull requests and apply a label based on the
2 | # paths that are modified in the pull request.
3 | #
4 | # To use this workflow, you will need to set up a .github/labeler.yml
5 | # file with configuration. For more information, see:
6 | # https://github.com/actions/labeler
7 |
8 | name: Labeler
9 | on: [pull_request_target]
10 |
11 | jobs:
12 | label:
13 |
14 | runs-on: ubuntu-latest
15 | permissions:
16 | contents: read
17 | pull-requests: write
18 |
19 | steps:
20 | - uses: actions/labeler@v5
21 | with:
22 | repo-token: "${{ secrets.GITHUB_TOKEN }}"
23 |
--------------------------------------------------------------------------------
/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
1 | name: Pylint
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | matrix:
10 | python-version: ["3.8", "3.9", "3.10"]
11 | steps:
12 | - uses: actions/checkout@v3
13 | - name: Set up Python ${{ matrix.python-version }}
14 | uses: actions/setup-python@v5
15 | with:
16 | python-version: ${{ matrix.python-version }}
17 | - name: Install dependencies
18 | run: |
19 | python -m pip install --upgrade pip
20 | pip install pylint
21 | - name: Analysing the code with pylint
22 | run: |
23 | pylint $(git ls-files '*.py')
24 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "ft-suite"
3 | version = "0.1.7"
4 | description = "A fine-tuning suite based on Transformers and LoRA."
5 | authors = ["Kye Gomez "]
6 | license = "MIT"
7 | packages = [
8 | { include = "fts" },
9 | { include = "fts/**/*.py" },
10 | ]
11 |
12 | [tool.poetry.dependencies]
13 | python = "^3.7"
14 | torch = "*"
15 | transformers = "*"
16 | datasets = "*"
17 | peft = "*"
18 | accelerate = "*"
19 | optimum = "*"
20 | bitsandbytes = "*"
21 |
22 |
23 | [tool.poetry.dev-dependencies]
24 | pytest = "^5.2"
25 |
26 | [build-system]
27 | requires = ["poetry-core>=1.0.0"]
28 | build-backend = "poetry.core.masonry.api"
29 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: 'kyegomez'
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [kyegomez]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: #Nothing
14 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 |
2 | name: Upload Python Package
3 |
4 | on:
5 | release:
6 | types: [published]
7 |
8 | permissions:
9 | contents: read
10 |
11 | jobs:
12 | deploy:
13 |
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 | - uses: actions/checkout@v3
18 | - name: Set up Python
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: '3.x'
22 | - name: Install dependencies
23 | run: |
24 | python -m pip install --upgrade pip
25 | pip install build
26 | - name: Build package
27 | run: python -m build
28 | - name: Publish package
29 | uses: pypa/gh-action-pypi-publish@2f6f737ca5f74c637829c0f5c3acd0e29ea5e8bf
30 | with:
31 | user: __token__
32 | password: ${{ secrets.PYPI_API_TOKEN }}
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a detailed report on the bug and it's root cause. Conduct root cause error analysis
4 | title: "[BUG] "
5 | labels: bug
6 | assignees: kyegomez
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is and what the main root cause error is. Test very thoroughly before submitting.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Additional context**
27 | Add any other context about the problem here.
28 |
--------------------------------------------------------------------------------
/fts/inference/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | class InferenceHandler(ABC):
4 | @abstractmethod
5 | def run(
6 | self,
7 | prompt_text=None,
8 | model=None,
9 | tokenizer=None,
10 | device=None,
11 | max_length = None
12 | ):
13 | pass
14 |
15 |
16 | class DefaultInferenceHandler(InferenceHandler):
17 | def run(
18 | self,
19 | prompt_text,
20 | model,
21 | tokenizer,
22 | device,
23 | max_length
24 | ):
25 | inputs = tokenizer.encode(prompt_text, return_tensors="pt").to(self.device)
26 | outputs = model.run(inputs, max_length=max_length, do_sample=True)
27 | return tokenizer.decode(outputs[0], skip_special_tokens=True)
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
2 | #
3 | # You can adjust the behavior by modifying this file.
4 | # For more information, see:
5 | # https://github.com/actions/stale
6 | name: Mark stale issues and pull requests
7 |
8 | on:
9 | schedule:
10 | - cron: '26 12 * * *'
11 |
12 | jobs:
13 | stale:
14 |
15 | runs-on: ubuntu-latest
16 | permissions:
17 | issues: write
18 | pull-requests: write
19 |
20 | steps:
21 | - uses: actions/stale@v9
22 | with:
23 | repo-token: ${{ secrets.GITHUB_TOKEN }}
24 | stale-issue-message: 'Stale issue message'
25 | stale-pr-message: 'Stale pull request message'
26 | stale-issue-label: 'no-issue-activity'
27 | stale-pr-label: 'no-pr-activity'
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: style check_code_quality
2 |
3 | export PYTHONPATH = .
4 | check_dirs := src
5 |
6 | style:
7 | black $(check_dirs)
8 | isort --profile black $(check_dirs)
9 |
10 | check_code_quality:
11 | black --check $(check_dirs)
12 | isort --check-only --profile black $(check_dirs)
13 | # stop the build if there are Python syntax errors or undefined names
14 | flake8 $(check_dirs) --count --select=E9,F63,F7,F82 --show-source --statistics
15 | # exit-zero treats all errors as warnings. E203 for black, E501 for docstring, W503 for line breaks before logical operators
16 | flake8 $(check_dirs) --count --max-line-length=88 --exit-zero --ignore=D --extend-ignore=E203,E501,W503 --statistics
17 |
18 | publish:
19 | python setup.py sdist bdist_wheel
20 | twine upload -r testpypi dist/* -u ${PYPI_USERNAME} -p ${PYPI_TEST_PASSWORD} --verbose
21 | twine check dist/*
22 | twine upload dist/* -u ${PYPI_USERNAME} -p ${PYPI_PASSWORD} --verbose
--------------------------------------------------------------------------------
/.github/workflows/unit-test.yml:
--------------------------------------------------------------------------------
1 | name: build
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 |
11 | build:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v3
17 |
18 | - name: Setup Python
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: '3.10'
22 |
23 | - name: Install dependencies
24 | run: pip install -r requirements.txt
25 |
26 | - name: Run Python unit tests
27 | run: python3 -m unittest tests/finetuning_suite
28 |
29 | - name: Verify that the Docker image for the action builds
30 | run: docker build . --file Dockerfile
31 |
32 | - name: Integration test 1
33 | uses: ./
34 | with:
35 | input-one: something
36 | input-two: true
37 |
38 | - name: Integration test 2
39 | uses: ./
40 | with:
41 | input-one: something else
42 | input-two: false
43 |
44 | - name: Verify integration test results
45 | run: python3 -m unittest unittesting/finetuning_suite
46 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.yml:
--------------------------------------------------------------------------------
1 |