├── .DS_Store
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── data
    ├── demo_data.jsonl
    ├── function_calling_demo.jsonl
    └── test_data_loading.py
├── dataset.py
├── demo.py
├── full_automation.py
├── merge.py
├── requirements.txt
├── training_args.yaml
└── utils
    ├── constants.py
    ├── flock_api.py
    ├── gpu_utils.py
    └── tool_utils.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FLock-io/testnet-training-node-quickstart/bdeffb0a7a664d89b8fc3eec501c2b9c79a8125c/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v5.0.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |       - id: end-of-file-fixer
 7 |       - id: trailing-whitespace
 8 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 9 |     rev: v0.11.0
10 |     hooks:
11 |       - id: ruff
12 |         args: [--fix]
13 |       - id: ruff-format
14 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04
 2 | 
 3 | RUN apt-get update && apt-get install -y \
 4 |     python3-dev \
 5 |     python3-pip \
 6 |     && ln -s /usr/bin/python3 /usr/bin/python \
 7 |     && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | WORKDIR /app
10 | 
11 | # Copy only requirements first to leverage Docker cache
12 | COPY requirements.txt .
13 | RUN pip3 install --no-cache-dir -r requirements.txt
14 | 
15 | # Copy the rest of the application code
16 | COPY . .
17 | 
18 | CMD ["python", "full_automation.py"]
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # testnet-training-node-quickstart
 2 | 
 3 | This repository contains demo scripts for you to fine-tune supported language models for train.flock.io.
 4 | 
 5 | ## Quickstart
 6 | 
 7 | ### Install Dependencies
 8 | 
 9 | To set up your environment, run the following commands:
10 | 
11 | ```bash
12 | conda create -n training-node python==3.10
13 | conda activate training-node
14 | pip install -r requirements.txt
15 | ```
16 | 
17 | ### File Structure
18 | 
19 | - [`dataset.py`](dataset.py) - Contains the logic to process the raw data from `demo_data.jsonl`.
20 | - [`demo_data.jsonl`](demo_data.jsonl) - Follows the shareGPT format. The training data you receive from the `fed-ledger` is in exactly the same format.
21 | - [`merge.py`](merge.py) - Contains the utility function for merging LoRA weights. If you are training with LoRA, please ensure you merge the adapter before uploading to your Hugging Face repository.
22 | - [`demo.py`](demo.py) - A training script that implements LoRA fine-tuning for supported language models.
23 | - [`full_automation.py`](full_automation.py) - A script that automates everything including get a task, download the training data, finetune supported models on training data, merge weights, upload to your HuggingFace model repo, and submit the task to fed-ledger.
24 | - [`training_args.yaml`](training_args.yaml) - A YAML defines the training hyper-parameters for fine-tuning. A detailed explanation on LoRA config can be found here: [LoRA Fine-tuning & Hyperparameters Explained](https://www.entrypointai.com/blog/lora-fine-tuning/)
25 | 
26 | ### Full Automation
27 | 
28 | Simply run
29 | 
30 | ```bash
31 | TASK_ID=<task-id> FLOCK_API_KEY="<your-flock-api-key-stakes-as-node-for-the-task>" HF_TOKEN="<your-hf-token>" CUDA_VISIBLE_DEVICES=0 HF_USERNAME="your-hf-user-name" python full_automation.py
32 | ```
33 | 
34 | The above command will automatically train and submit multiple LLMs that are smaller than the max parameters limitation for the given task. **Ensure that each model version is committed to the same repository to maintain version history.** Refer to the [Hugging Face documentation on commits](https://huggingface.co/docs/hub/repositories-getting-started#pushing-files) for guidance on how to manage commits effectively.
35 | 
36 | #### Bypass certain models
37 | 
38 | If you want to bypass certain models, simply comment out the model config in the [`training_args.yaml`](training_args.yaml)
39 | 
40 | ---
41 | 
42 | ### Play with demo.py
43 | 
44 | #### Start the Training
45 | 
46 | Execute the following command to start the training:
47 | 
48 | ```bash
49 | HF_TOKEN="hf_yourhftoken" CUDA_VISIBLE_DEVICES=0 python demo.py
50 | ```
51 | 
52 | This command initiates fine-tuning on the demo dataset, saves the fine-tuned model, merges the adapter to the base model, and saves the final model.
53 | 
54 | #### Upload the model folder to your HuggingFace repo
55 | 
56 | To maintain a clean version history and simplify change tracking, **commit your model changes to the existing repository** rather than creating a new one for each base model. For comprehensive guidance on uploading models and managing commits, consult the Hugging Face documentation on [models uploading](https://huggingface.co/docs/hub/en/models-uploading) and the [guide on commits](https://huggingface.co/docs/hub/repositories-getting-started#pushing-files).
57 | 
58 | #### Getting the task id
59 | 
60 | Before you submit the model script for a task, you will first need to stake on the task as a node.
61 | 
62 | - Go to the [FLock Training Platform](https://train.flock.io/stake)
63 | - Select **Training Node** tab and stake on the task you want to submit the model to.
64 | - The task ID is the ID of the task you want to submit the model to.
65 | 
66 | #### Submit the model
67 | 
68 | ```bash
69 | 
70 | curl --location 'https://fed-ledger-prod.flock.io/api/v1/tasks/submit-result' \
71 | --header 'flock-api-key: <your-api-key-with-staking-on-this-task-as-node>' \
72 | --header 'Content-Type: application/json' \
73 | --data '{
74 |     "task_id": 29,
75 |     "data":{
76 |         "hg_repo_id": "Qwen/Qwen1.5-1.8B-Chat",
77 |         "base_model": "qwen1.5",
78 |         "gpu_type": "<GPU-used-for-training>",
79 |         "revision": "<huggingface-model-commit-hash>"
80 |     }
81 | }'
82 | ```
83 | 
84 | - `task_id`: The ID of the task to which you are submitting a model.
85 | - `hg_repo_id`: The Hugging Face repository where the model is stored, typically in the format `username/repository-name`.
86 | - `base_model`: The base model used for training. A list of all supported models can be found [here](https://github.com/FLock-io/llm-loss-validator/blob/main/src/core/constant.py).
87 | - `gpu_type`: The type of GPU used for training the model.
88 | - `revision`: The commit hash from the Hugging Face repository. This uniquely identifies the version of the model that was trained and submitted, allowing for precise tracking of changes and updates.
89 | 


--------------------------------------------------------------------------------
/data/demo_data.jsonl:
--------------------------------------------------------------------------------
1 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What best practices should I follow when designing a website?"}, {"role": "assistant", "content": "Here are some best practices to follow when designing a website:\n\n1. Keep it simple and easy to navigate: Make sure your website is easy to navigate and that visitors can find what they are looking for quickly.\n\n2. Use a responsive design: Ensure that your website is optimized for all devices and screen sizes, including mobile phones and tablets.\n\n3. Use high-quality images and graphics: Use high-quality images and graphics to make your website visually appealing.\n\n4. Use colors and fonts consistently: Choose colors and fonts that match your brand and use them consistently throughout your website.\n\n5. Make it easy to read: Use clear and concise language, and choose a font size and style that is easy to read.\n\n6. Use white space effectively: Use white space to make your website look less cluttered and easier to read.\n\n7. Ensure fast loading speeds: Optimize your website's loading speed to ensure that visitors don't have to wait long to access your content.\n\n8. Use SEO best practices: Optimize your website for search engines by using keywords, meta-descriptions, and other SEO best practices.\n\n9. Test your website: Regularly test your website to ensure that it is functioning properly and that any issues are addressed quickly.\n\nBy following these best practices, you can create a website that is user-friendly, visually appealing, and easy to navigate, and that effectively communicates your brand's message to your target audience."}, {"role": "user", "content": "How can I optimize my website's loading speed?"}, {"role": "assistant", "content": "Here are some ways to optimize your website's loading speed:\n\n1. Minimize HTTP requests: Minimize the number of HTTP requests by reducing the number of files on your website, combining files, and using CSS sprites.\n\n2. Optimize images: Optimize your images by compressing them, reducing their size, and using the right file format.\n\n3. Use a content delivery network (CDN): Use a CDN to reduce the load time of your website by serving content from the nearest server to the user.\n\n4. Enable browser caching: Enable browser caching to store static files on the user's computer, which reduces the time it takes to load the website on subsequent visits.\n\n5. Minimize plugins and scripts: Minimize the number of plugins and scripts used on your website to reduce their impact on loading speed.\n\n6. Reduce server response time: Optimize your server response time by using a fast hosting provider, reducing the number of requests, and using caching and content compression.\n\n7. Use a fast theme or template: Use a fast theme or template that is well-optimized for speed.\n\n8. Minimize the use of redirects: Minimize the use of redirects, as they can slow down the loading speed of your website.\n\nBy following these tips, you can optimize your website's loading speed and create a better user experience for your visitors."}, {"role": "user", "content": "How can I make sure my website is optimized for search engines?"}, {"role": "assistant", "content": "Here are some ways to optimize your website for search engines:\n\n1. Conduct keyword research: Conduct keyword research to identify the keywords and phrases that your target audience is using to search for your products or services.\n\n2. Use keywords in your content: Use your target keywords in your content, including in your titles, headings, meta descriptions, and body copy.\n\n3. Create high-quality content: Create high-quality, informative content that provides value to your target audience and encourages them to share it.\n\n4. Use meta descriptions and title tags: Use meta descriptions and title tags to provide a brief summary of your content and make it easier for search engines to understand what your website is about.\n\n5. Use header tags: Use header tags (H1, H2, H3, etc.) to organize your content and make it easier for search engines to understand the hierarchy of your content.\n\n6. Optimize images: Optimize your images by using descriptive filenames, alt tags, and captions that include your target keywords.\n\n7. Build high-quality backlinks: Build high-quality backlinks from reputable websites that are relevant to your niche.\n\n8. Use social media: Use social media to promote your content and build your online presence, which can help improve your search engine rankings.\n\n9. Make sure your website is mobile-friendly: Ensure that your website is optimized for mobile devices, as mobile-friendly websites tend to rank higher in search engine results.\n\nBy following these tips, you can optimize your website for search engines and improve your visibility in search engine results pages (SERPs)."}, {"role": "user", "content": "How can I improve my website's accessibility for people with disabilities?"}, {"role": "assistant", "content": "Here are some ways to improve your website's accessibility for people with disabilities:\n\n1. Use descriptive and meaningful text for links: Use descriptive and meaningful text for links to help users understand where the link goes and what it does.\n\n2. Provide alternative text for images: Provide alternative text for images to help users with visual impairments understand the content of the image.\n\n3. Use headings and subheadings: Use headings and subheadings to structure your content and make it easier for screen readers to read.\n\n4. Use color with care: Use color with care and ensure that color is not the only way to convey information or distinguish between different elements on your website.\n\n5. Provide transcripts for audio and video content: Provide transcripts for audio and video content to help users with hearing impairments understand the content.\n\n6. Use clear and concise language: Use clear and concise language to make your content easy to understand for users with cognitive disabilities.\n\n7. Ensure the website is keyboard accessible: Ensure that your website is keyboard accessible to help users with motor disabilities navigate your website.\n\n8. Make sure your website is responsive: Ensure that your website is responsive and can be easily accessed on different devices and screen sizes.\n\n9. Use accessibility tools: Use accessibility tools such as screen readers, screen magnifiers, and other assistive technologies to test your website's accessibility.\n\nBy following these tips, you can make your website more accessible to people with disabilities and provide a better user experience for all users."}]}
2 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What steps should I take to reduce taxes on my income?"}, {"role": "assistant", "content": "There are several steps you can take to reduce taxes on your income:\n\n1. Contribute to a retirement account: Contributions to a traditional IRA or a 401(k) can reduce your taxable income and save you money on taxes.\n\n2. Itemize your deductions: If you have significant expenses such as mortgage interest, medical expenses, or charitable contributions, itemizing your deductions may reduce your taxable income.\n\n3. Take advantage of tax credits: Tax credits, such as the Earned Income Tax Credit or the Child Tax Credit, can reduce your tax bill dollar for dollar.\n\n4. Invest in tax-free or tax-deferred accounts: Investments in tax-free or tax-deferred accounts such as municipal bonds, Roth IRAs, or 529 plans can reduce your taxable income.\n\n5. Consider income splitting: If you are married, consider splitting income with your spouse to reduce your overall tax bill.\n\nIt\u2019s important to consult with a tax professional or financial advisor to determine the best strategy for your specific situation."}, {"role": "user", "content": "Can you give me an example of a tax credit that can help reduce my taxes?"}, {"role": "assistant", "content": "Sure! One example of a tax credit that can help reduce your taxes is the Child Tax Credit. This is a credit of up to $2,000 per child under the age of 17 that can be claimed on your tax return. To qualify for the full credit, your income must be below a certain threshold (currently $400,000 for married filing jointly, or $200,000 for all other filers).\n\nIf your income is above the threshold, you may still be eligible for a partial credit. The credit is partially refundable, meaning that if the credit exceeds your tax liability, you may receive a refund for the difference up to $1,400 per child.\n\nTo claim the Child Tax Credit, you must have a qualifying child who meets certain criteria, such as being a U.S. citizen, living with you for more than half the year, and being related to you.\n\nIt\u2019s important to note that tax laws can change, so it\u2019s always a good idea to check with a tax professional or use tax preparation software to ensure you are claiming all the credits and deductions you are eligible for."}, {"role": "user", "content": "Can you explain the difference between a traditional IRA and a Roth IRA?"}, {"role": "assistant", "content": "Yes, I can explain the difference between a traditional IRA and a Roth IRA.\n\nA traditional IRA is a retirement account in which you can make pre-tax contributions, meaning you can deduct your contribution from your taxable income in the year you make it. The money in your traditional IRA grows tax-deferred, which means you won\u2019t pay taxes on the earnings until you withdraw the money in retirement. When you withdraw money from a traditional IRA, it is taxed as ordinary income.\n\nA Roth IRA, on the other hand, is a retirement account in which you make after-tax contributions, meaning you don\u2019t get a tax deduction for your contribution in the year you make it. However, the money in your Roth IRA grows tax-free, which means you won\u2019t pay taxes on the earnings when you withdraw the money in retirement. Additionally, qualified withdrawals from a Roth IRA are tax-free.\n\nThe difference between the two types of accounts is primarily when you pay taxes: with a traditional IRA, you pay taxes on the money when you withdraw it in retirement, while with a Roth IRA, you pay taxes on the money when you contribute to the account.\n\nIt\u2019s important to note that both types of accounts have contribution limits and other rules, so it\u2019s a good idea to consult with a financial advisor or tax professional to determine which type of account is best for your individual situation."}, {"role": "user", "content": "What is the contribution limit for a Roth IRA in 2021?"}, {"role": "assistant", "content": "The contribution limit for a Roth IRA in 2021 is $6,000 for individuals under age 50, and $7,000 for those age 50 and above. This means that you can contribute up to $6,000 or $7,000 (depending on your age) per year to your Roth IRA, as long as your income falls below certain limits.\n\nIt\u2019s important to note that contribution limits may change from year to year, so it\u2019s always a good idea to check the current year\u2019s limits before making any contributions. Additionally, there are income limits that determine whether you are eligible to contribute to a Roth IRA, and those limits can also change from year to year.\n\nIf you have any questions about Roth IRA contributions or eligibility, it\u2019s a good idea to consult with a financial advisor or tax professional."}, {"role": "user", "content": "Can you explain the difference between a 401(k) and an IRA?"}, {"role": "assistant", "content": "Yes, I can explain the difference between a 401(k) and an IRA.\n\nA 401(k) is an employer-sponsored retirement plan that allows employees to contribute a portion of their pre-tax salary to the plan. Employers may also offer matching contributions up to a certain amount. The money in a 401(k) grows tax-deferred, meaning you won\u2019t pay taxes on the earnings until you withdraw the money in retirement. Contributions to a 401(k) are subject to annual limits, which are set by the IRS.\n\nAn IRA, or individual retirement account, is a retirement account that you can set up on your own, outside of an employer-sponsored plan. IRAs come in two main types: traditional and Roth. A traditional IRA allows you to make pre-tax contributions, meaning you can deduct your contribution from your taxable income in the year you make it. The money in a traditional IRA grows tax-deferred, and you\u2019ll pay taxes on the money when you withdraw it in retirement. With a Roth IRA, you make after-tax contributions, meaning you don\u2019t get a tax deduction for your contribution in the year you make it. The money in a Roth IRA grows tax-free, and qualified withdrawals from a Roth IRA are tax-free.\n\nThe main difference between a 401(k) and an IRA is that a 401(k) is an employer-sponsored plan, while an IRA is an individual retirement account that you set up on your own. Additionally, 401(k) plans may have higher contribution limits than IRAs, and employers may offer matching contributions or other incentives to encourage employees to participate in the plan.\n\nIt\u2019s a good idea to consult with a financial advisor or tax professional to determine which type of account is best for your individual situation."}]}
3 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What strategies should I use to focus better while studying?"}, {"role": "assistant", "content": "Here are some strategies that can help you focus better while studying:\n\n1. Create a conducive environment: Find a quiet, distraction-free location to study. Make sure the lighting is appropriate, and the temperature is comfortable.\n\n2. Set a study schedule: Create a study schedule that works for you and stick to it. This helps you stay focused and avoids procrastination.\n\n3. Take breaks: Taking short breaks between study sessions can help you stay focused and retain information better.\n\n4. Eliminate distractions: Turn off your phone, log out from social media, and avoid any other distractions that can prevent you from concentrating.\n\n5. Use active learning techniques: Use active learning techniques such as note-taking, summarizing, and teaching others to help you stay engaged.\n\n6. Stay motivated: Set goals for yourself and reward yourself when you achieve them. This can help you stay motivated and focused.\n\n7. Get enough sleep and exercise: A healthy lifestyle can help you stay alert and focused while studying. Make sure you get enough sleep and exercise regularly.\n\nRemember, every individual is different, and what works for one person may not work for another. Find what works best for you and stick to it."}, {"role": "user", "content": "Can you give me some examples of active learning techniques that I can use while studying?"}, {"role": "assistant", "content": "Sure! Here are some examples of active learning techniques that you can use while studying:\n\n1. Summarizing: Summarize the main ideas in your own words. This helps you retain the information better.\n\n2. Note-taking: Take notes while you\u2019re reading or listening to lectures. This helps you focus your attention and remember the key points.\n\n3. Teaching: Explain the concepts you\u2019ve learned to someone else. This helps you understand the material better and identify areas where you need further clarification.\n\n4. Practice questions: Complete practice questions or quizzes to test your understanding of the material.\n\n5. Mnemonics: Use mnemonics (memory aids) to help you remember important information. For example, using acronyms or visual associations.\n\n6. Group study: Study with a group of peers and share your knowledge with each other. This helps you learn from different perspectives and keeps you motivated.\n\n7. Mind mapping: Create visual diagrams or mind maps to organize information and understand the relationships between different concepts.\n\nRemember, using a combination of different active learning techniques can help you to better understand and retain information."}, {"role": "user", "content": "How can I incorporate active learning techniques while studying for math?"}, {"role": "assistant", "content": "Math is a subject that requires a lot of practice, so incorporating active learning techniques can be very helpful in mastering the concepts. Here are some ways you can use active learning techniques while studying for math:\n\n1. Practice problems: Math requires a lot of practice to master the concepts. Solve practice problems and work through examples to reinforce your understanding of the material.\n\n2. Explain concepts: Explain the math concepts you\u2019ve learned to someone else. This helps you solidify your understanding and identify areas where you need further clarification.\n\n3. Create flashcards: Create flashcards for formulas, equations, and definitions. Use them to quiz yourself and help you memorize the material.\n\n4. Write out explanations: Write out explanations of the steps you took to solve a problem. This helps you identify areas where you may have made mistakes and reinforce your understanding of the material.\n\n5. Work in groups: Study with a group of peers and work through problems together. This helps you learn from different perspectives and keeps you motivated.\n\n6. Use online resources: There are many online resources available for math, including interactive tutorials and videos. Use them to reinforce your understanding of the material.\n\nRemember, math can be challenging, but incorporating active learning techniques can help you to better understand and retain the material."}, {"role": "user", "content": "How can I manage my time effectively while studying for multiple exams?"}, {"role": "assistant", "content": "Managing your time effectively while studying for multiple exams can be challenging, but here are some tips that can help:\n\n1. Create a study schedule: Make a schedule that outlines your study sessions for each subject. Consider your other commitments and allocate sufficient time for each subject.\n\n2. Prioritize the most important exams: Determine which exams are the most important and allocate more study time to these subjects.\n\n3. Break down the material: Break down the material into smaller, manageable chunks. This makes it easier to study and makes the task less overwhelming.\n\n4. Use study aids: Use study aids such as flashcards or summary notes to help you study more efficiently.\n\n5. Avoid multitasking: Avoid multitasking and focus on one subject at a time. This helps you to concentrate better and retain information more effectively.\n\n6. Take breaks: Taking regular breaks between study sessions can help you to stay focused and avoid burnout.\n\n7. Review regularly: Regularly review your notes and study materials to reinforce your understanding of the material.\n\n8. Stay organized: Keep your study materials organized and easily accessible. This saves time and helps you to focus on studying rather than searching for materials.\n\nRemember, effective time management is crucial when studying for multiple exams. By creating a study schedule, breaking down the material, prioritizing tasks, and staying organized, you can manage your time effectively and achieve better results."}]}
4 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "How can I most efficiently manage my time?"}, {"role": "assistant", "content": "Here are some tips to manage your time more efficiently:\n\n1. Make a schedule: Create a schedule of your daily tasks and prioritize them based on importance.\n\n2. Use time blocks: Group similar tasks together and allocate specific time blocks for each task.\n\n3. Take breaks: Take short breaks in between tasks to help you stay focused and avoid burnout.\n\n4. Eliminate distractions: Remove any distractions that could interfere with your work, such as social media notifications or emails.\n\n5. Delegate tasks: If possible, delegate tasks to others to free up your time for more important tasks.\n\n6. Use technology: Use productivity apps or tools to help you manage your time, such as time-tracking apps, task management tools, or calendar apps.\n\n7. Be flexible: Be open to adjusting your schedule as needed and be willing to adapt to changes in your workload or priorities.\n\nRemember, effective time management requires discipline and practice, but by implementing these tips, you can increase your productivity and achieve your goals more efficiently."}, {"role": "user", "content": "Can you recommend any time-tracking apps?"}, {"role": "assistant", "content": "Sure, here are some popular time-tracking apps you can use:\n\n1. Toggl: Toggl is a simple and easy-to-use time-tracking app that allows you to track how much time you spend on each task or project.\n\n2. RescueTime: RescueTime is a time-tracking app that runs in the background and tracks how much time you spend on various websites and applications.\n\n3. Harvest: Harvest is a time-tracking app that also allows you to create and send invoices based on the time you've tracked.\n\n4. Clockify: Clockify is a free time-tracking app that lets you track time on multiple projects and tasks, and generate reports based on your time data.\n\n5. Timely: Timely is a time-tracking app that uses AI to automatically track your time and create a visual timeline of your workday.\n\n6. Everhour: Everhour is a time-tracking app that integrates with popular project management tools like Asana, Trello, and Basecamp.\n\nThese apps can help you track your time more accurately, improve your productivity, and help you stay on top of your work."}, {"role": "user", "content": "Can you explain how Clockify generates reports based on time data?"}, {"role": "assistant", "content": "Certainly! Clockify generates reports based on the time data you input into the app. Once you've tracked your time for different tasks or projects, you can generate reports that provide detailed insights into your work habits and productivity. Here are some of the features of Clockify's reporting system:\n\n1. Customizable reports: Clockify allows you to customize your reports based on a variety of criteria, such as date range, project, client, or task.\n\n2. Summary reports: Clockify can generate summary reports that provide an overview of your time data, including your total hours worked, billable hours, and non-billable hours.\n\n3. Detailed reports: You can also generate detailed reports that show a breakdown of your time data by day, week, month, or year.\n\n4. Exportable reports: Clockify allows you to export your reports in a variety of formats, including PDF, CSV, and Excel.\n\n5. Dashboard: Clockify has a dashboard that provides a real-time overview of your time data, including your current and upcoming tasks, as well as your progress towards your goals.\n\nOverall, Clockify's reporting system can provide valuable insights into your work habits and productivity, helping you make better decisions about how to manage your time and resources."}, {"role": "user", "content": "Can Clockify be integrated with other project management tools besides Asana, Trello, and Basecamp?"}, {"role": "assistant", "content": "Yes, Clockify can be integrated with other project management tools as well. In addition to Asana, Trello, and Basecamp, Clockify also integrates with many other popular project management tools, including:\n\n1. Jira: Jira is a popular project management tool used by many software development teams. With Clockify's Jira integration, you can track your time directly from Jira issues.\n\n2. ClickUp: ClickUp is a flexible task management tool that can be customized to fit any workflow. Clockify's ClickUp integration allows you to track time on individual tasks or projects.\n\n3. Monday.com: Monday.com is a visual project management tool that allows teams to collaborate and manage their work from a single platform. Clockify's Monday.com integration allows you to track time on tasks and projects directly from the Monday.com interface.\n\n4. Todoist: Todoist is a popular task management app that can be used to manage personal to-do lists or team projects. Clockify's Todoist integration allows you to track time on individual tasks or projects.\n\n5. MeisterTask: MeisterTask is a project management tool that allows teams to collaborate and manage their work from a single platform. Clockify's MeisterTask integration allows you to track time on individual tasks or projects directly from the MeisterTask interface.\n\nThere are many other project management tools that Clockify integrates with, so you can choose the one that works best for your team and your workflow."}]}
5 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What tips do you have for balancing work and home life?"}, {"role": "assistant", "content": "Here are a few tips for balancing work and home life:\n\n1. Set clear boundaries: Establish clear boundaries between work and home life. Try to stick to a regular work schedule and avoid working during your personal time.\n\n2. Prioritize your tasks: Prioritize your tasks to make sure you are accomplishing the most important things first. This can help you avoid wasting time on less important tasks and free up more time for your personal life.\n\n3. Take breaks: Taking regular breaks can help you stay focused and avoid burnout. Use your breaks to relax, exercise, or spend time with loved ones.\n\n4. Use technology to your advantage: Use technology to help you manage your work and personal life. For example, use a calendar app to schedule your work and personal commitments, or use a task management app to keep track of your to-do list.\n\n5. Learn to say no: Saying no to work commitments that don't align with your priorities can free up more time for your personal life.\n\nRemember that achieving a perfect balance between work and home life is not always possible. But by implementing these tips, you can create a healthy balance that works for you."}, {"role": "user", "content": "How can I develop better time management skills?"}, {"role": "assistant", "content": "Developing better time management skills can be challenging, but it is important for achieving your personal and professional goals. Here are some tips to help you improve your time management skills:\n\n1. Track your time: Start by tracking your time for a week or two to get a sense of how you are currently spending your time. This will help you identify any time-wasting activities and prioritize your tasks.\n\n2. Set goals: Set clear and specific goals for what you want to achieve in your personal and professional life. This will help you prioritize your tasks and stay focused on what really matters.\n\n3. Prioritize your tasks: Identify the most important tasks and prioritize them based on their urgency and importance. Use a to-do list or a task management app to keep track of your tasks.\n\n4. Use time management tools: There are many time management tools available, such as calendars, timers, and task lists. Use these tools to help you stay organized and on track.\n\n5. Learn to say no: Saying no to non-essential tasks or commitments can help you free up more time for the things that matter most.\n\n6. Take breaks: Taking regular breaks can help you stay focused and avoid burnout. Use your breaks to relax, exercise, or spend time with loved ones.\n\nRemember that developing better time management skills takes time and practice. Be patient with yourself and stay committed to your goals."}, {"role": "user", "content": "Can you recommend any particular time management tools that would be helpful?"}, {"role": "assistant", "content": "Sure! Here are some time management tools that you may find helpful:\n\n1. Calendar apps: Calendar apps like Google Calendar and Apple Calendar can help you schedule your appointments and tasks, set reminders, and keep track of your schedule.\n\n2. Task management apps: Task management apps like Trello, Asana, and Todoist can help you create to-do lists, set deadlines for your tasks, and prioritize your work.\n\n3. Pomodoro timers: Pomodoro timers like Focus@Will and Tomato Timer can help you stay focused by breaking your work into small, manageable chunks of time.\n\n4. Time-tracking apps: Time-tracking apps like RescueTime and Toggl can help you monitor how much time you spend on different tasks and activities, and identify areas where you can improve your productivity.\n\n5. Note-taking apps: Note-taking apps like Evernote and OneNote can help you jot down ideas, take notes during meetings, and keep track of important information.\n\n6. Distraction-blocker apps: Distraction-blocker apps like Freedom and Cold Turkey can help you stay focused by blocking distracting websites and apps during set times of day.\n\nRemember, the key to effective time management is finding a tool or set of tools that work best for you, and then using them consistently to help you stay focused and productive."}, {"role": "user", "content": "How can I improve my work efficiency?"}, {"role": "assistant", "content": "Improving your work efficiency can help you be more productive, complete your tasks more quickly, and reduce your stress levels. Here are some tips to help you improve your work efficiency:\n\n1. Set clear goals: Set clear and specific goals for what you want to achieve and break them down into smaller, actionable steps.\n\n2. Eliminate distractions: Eliminate distractions as much as possible by turning off notifications, closing unnecessary tabs or apps, and finding a quiet space to work.\n\n3. Prioritize your tasks: Identify the most important tasks and prioritize them based on their urgency and importance.\n\n4. Use time management tools: Use tools like calendars, timers, and task lists to help you stay organized and on track.\n\n5. Automate tasks: Automate tasks like email management and scheduling appointments to free up more time for important work.\n\n6. Delegate tasks: Delegate tasks where possible to colleagues or employees to help you focus on your most important responsibilities.\n\n7. Take breaks: Taking regular breaks can help you stay focused and avoid burnout. Use your breaks to relax, exercise, or spend time with loved ones.\n\nRemember, improving your work efficiency takes time and practice. Be patient with yourself and stay committed to your goals."}]}
6 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What best practices should I follow when designing a website?"}, {"role": "assistant", "content": "Here are some best practices to follow when designing a website:\n\n1. Keep it simple and easy to navigate: Make sure your website is easy to navigate and that visitors can find what they are looking for quickly.\n\n2. Use a responsive design: Ensure that your website is optimized for all devices and screen sizes, including mobile phones and tablets.\n\n3. Use high-quality images and graphics: Use high-quality images and graphics to make your website visually appealing.\n\n4. Use colors and fonts consistently: Choose colors and fonts that match your brand and use them consistently throughout your website.\n\n5. Make it easy to read: Use clear and concise language, and choose a font size and style that is easy to read.\n\n6. Use white space effectively: Use white space to make your website look less cluttered and easier to read.\n\n7. Ensure fast loading speeds: Optimize your website's loading speed to ensure that visitors don't have to wait long to access your content.\n\n8. Use SEO best practices: Optimize your website for search engines by using keywords, meta-descriptions, and other SEO best practices.\n\n9. Test your website: Regularly test your website to ensure that it is functioning properly and that any issues are addressed quickly.\n\nBy following these best practices, you can create a website that is user-friendly, visually appealing, and easy to navigate, and that effectively communicates your brand's message to your target audience."}, {"role": "user", "content": "How can I optimize my website's loading speed?"}, {"role": "assistant", "content": "Here are some ways to optimize your website's loading speed:\n\n1. Minimize HTTP requests: Minimize the number of HTTP requests by reducing the number of files on your website, combining files, and using CSS sprites.\n\n2. Optimize images: Optimize your images by compressing them, reducing their size, and using the right file format.\n\n3. Use a content delivery network (CDN): Use a CDN to reduce the load time of your website by serving content from the nearest server to the user.\n\n4. Enable browser caching: Enable browser caching to store static files on the user's computer, which reduces the time it takes to load the website on subsequent visits.\n\n5. Minimize plugins and scripts: Minimize the number of plugins and scripts used on your website to reduce their impact on loading speed.\n\n6. Reduce server response time: Optimize your server response time by using a fast hosting provider, reducing the number of requests, and using caching and content compression.\n\n7. Use a fast theme or template: Use a fast theme or template that is well-optimized for speed.\n\n8. Minimize the use of redirects: Minimize the use of redirects, as they can slow down the loading speed of your website.\n\nBy following these tips, you can optimize your website's loading speed and create a better user experience for your visitors."}, {"role": "user", "content": "How can I make sure my website is optimized for search engines?"}, {"role": "assistant", "content": "Here are some ways to optimize your website for search engines:\n\n1. Conduct keyword research: Conduct keyword research to identify the keywords and phrases that your target audience is using to search for your products or services.\n\n2. Use keywords in your content: Use your target keywords in your content, including in your titles, headings, meta descriptions, and body copy.\n\n3. Create high-quality content: Create high-quality, informative content that provides value to your target audience and encourages them to share it.\n\n4. Use meta descriptions and title tags: Use meta descriptions and title tags to provide a brief summary of your content and make it easier for search engines to understand what your website is about.\n\n5. Use header tags: Use header tags (H1, H2, H3, etc.) to organize your content and make it easier for search engines to understand the hierarchy of your content.\n\n6. Optimize images: Optimize your images by using descriptive filenames, alt tags, and captions that include your target keywords.\n\n7. Build high-quality backlinks: Build high-quality backlinks from reputable websites that are relevant to your niche.\n\n8. Use social media: Use social media to promote your content and build your online presence, which can help improve your search engine rankings.\n\n9. Make sure your website is mobile-friendly: Ensure that your website is optimized for mobile devices, as mobile-friendly websites tend to rank higher in search engine results.\n\nBy following these tips, you can optimize your website for search engines and improve your visibility in search engine results pages (SERPs)."}, {"role": "user", "content": "How can I improve my website's accessibility for people with disabilities?"}, {"role": "assistant", "content": "Here are some ways to improve your website's accessibility for people with disabilities:\n\n1. Use descriptive and meaningful text for links: Use descriptive and meaningful text for links to help users understand where the link goes and what it does.\n\n2. Provide alternative text for images: Provide alternative text for images to help users with visual impairments understand the content of the image.\n\n3. Use headings and subheadings: Use headings and subheadings to structure your content and make it easier for screen readers to read.\n\n4. Use color with care: Use color with care and ensure that color is not the only way to convey information or distinguish between different elements on your website.\n\n5. Provide transcripts for audio and video content: Provide transcripts for audio and video content to help users with hearing impairments understand the content.\n\n6. Use clear and concise language: Use clear and concise language to make your content easy to understand for users with cognitive disabilities.\n\n7. Ensure the website is keyboard accessible: Ensure that your website is keyboard accessible to help users with motor disabilities navigate your website.\n\n8. Make sure your website is responsive: Ensure that your website is responsive and can be easily accessed on different devices and screen sizes.\n\n9. Use accessibility tools: Use accessibility tools such as screen readers, screen magnifiers, and other assistive technologies to test your website's accessibility.\n\nBy following these tips, you can make your website more accessible to people with disabilities and provide a better user experience for all users."}]}
7 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What steps should I take to reduce taxes on my income?"}, {"role": "assistant", "content": "There are several steps you can take to reduce taxes on your income:\n\n1. Contribute to a retirement account: Contributions to a traditional IRA or a 401(k) can reduce your taxable income and save you money on taxes.\n\n2. Itemize your deductions: If you have significant expenses such as mortgage interest, medical expenses, or charitable contributions, itemizing your deductions may reduce your taxable income.\n\n3. Take advantage of tax credits: Tax credits, such as the Earned Income Tax Credit or the Child Tax Credit, can reduce your tax bill dollar for dollar.\n\n4. Invest in tax-free or tax-deferred accounts: Investments in tax-free or tax-deferred accounts such as municipal bonds, Roth IRAs, or 529 plans can reduce your taxable income.\n\n5. Consider income splitting: If you are married, consider splitting income with your spouse to reduce your overall tax bill.\n\nIt\u2019s important to consult with a tax professional or financial advisor to determine the best strategy for your specific situation."}, {"role": "user", "content": "Can you give me an example of a tax credit that can help reduce my taxes?"}, {"role": "assistant", "content": "Sure! One example of a tax credit that can help reduce your taxes is the Child Tax Credit. This is a credit of up to $2,000 per child under the age of 17 that can be claimed on your tax return. To qualify for the full credit, your income must be below a certain threshold (currently $400,000 for married filing jointly, or $200,000 for all other filers).\n\nIf your income is above the threshold, you may still be eligible for a partial credit. The credit is partially refundable, meaning that if the credit exceeds your tax liability, you may receive a refund for the difference up to $1,400 per child.\n\nTo claim the Child Tax Credit, you must have a qualifying child who meets certain criteria, such as being a U.S. citizen, living with you for more than half the year, and being related to you.\n\nIt\u2019s important to note that tax laws can change, so it\u2019s always a good idea to check with a tax professional or use tax preparation software to ensure you are claiming all the credits and deductions you are eligible for."}, {"role": "user", "content": "Can you explain the difference between a traditional IRA and a Roth IRA?"}, {"role": "assistant", "content": "Yes, I can explain the difference between a traditional IRA and a Roth IRA.\n\nA traditional IRA is a retirement account in which you can make pre-tax contributions, meaning you can deduct your contribution from your taxable income in the year you make it. The money in your traditional IRA grows tax-deferred, which means you won\u2019t pay taxes on the earnings until you withdraw the money in retirement. When you withdraw money from a traditional IRA, it is taxed as ordinary income.\n\nA Roth IRA, on the other hand, is a retirement account in which you make after-tax contributions, meaning you don\u2019t get a tax deduction for your contribution in the year you make it. However, the money in your Roth IRA grows tax-free, which means you won\u2019t pay taxes on the earnings when you withdraw the money in retirement. Additionally, qualified withdrawals from a Roth IRA are tax-free.\n\nThe difference between the two types of accounts is primarily when you pay taxes: with a traditional IRA, you pay taxes on the money when you withdraw it in retirement, while with a Roth IRA, you pay taxes on the money when you contribute to the account.\n\nIt\u2019s important to note that both types of accounts have contribution limits and other rules, so it\u2019s a good idea to consult with a financial advisor or tax professional to determine which type of account is best for your individual situation."}, {"role": "user", "content": "What is the contribution limit for a Roth IRA in 2021?"}, {"role": "assistant", "content": "The contribution limit for a Roth IRA in 2021 is $6,000 for individuals under age 50, and $7,000 for those age 50 and above. This means that you can contribute up to $6,000 or $7,000 (depending on your age) per year to your Roth IRA, as long as your income falls below certain limits.\n\nIt\u2019s important to note that contribution limits may change from year to year, so it\u2019s always a good idea to check the current year\u2019s limits before making any contributions. Additionally, there are income limits that determine whether you are eligible to contribute to a Roth IRA, and those limits can also change from year to year.\n\nIf you have any questions about Roth IRA contributions or eligibility, it\u2019s a good idea to consult with a financial advisor or tax professional."}, {"role": "user", "content": "Can you explain the difference between a 401(k) and an IRA?"}, {"role": "assistant", "content": "Yes, I can explain the difference between a 401(k) and an IRA.\n\nA 401(k) is an employer-sponsored retirement plan that allows employees to contribute a portion of their pre-tax salary to the plan. Employers may also offer matching contributions up to a certain amount. The money in a 401(k) grows tax-deferred, meaning you won\u2019t pay taxes on the earnings until you withdraw the money in retirement. Contributions to a 401(k) are subject to annual limits, which are set by the IRS.\n\nAn IRA, or individual retirement account, is a retirement account that you can set up on your own, outside of an employer-sponsored plan. IRAs come in two main types: traditional and Roth. A traditional IRA allows you to make pre-tax contributions, meaning you can deduct your contribution from your taxable income in the year you make it. The money in a traditional IRA grows tax-deferred, and you\u2019ll pay taxes on the money when you withdraw it in retirement. With a Roth IRA, you make after-tax contributions, meaning you don\u2019t get a tax deduction for your contribution in the year you make it. The money in a Roth IRA grows tax-free, and qualified withdrawals from a Roth IRA are tax-free.\n\nThe main difference between a 401(k) and an IRA is that a 401(k) is an employer-sponsored plan, while an IRA is an individual retirement account that you set up on your own. Additionally, 401(k) plans may have higher contribution limits than IRAs, and employers may offer matching contributions or other incentives to encourage employees to participate in the plan.\n\nIt\u2019s a good idea to consult with a financial advisor or tax professional to determine which type of account is best for your individual situation."}]}
8 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What strategies should I use to focus better while studying?"}, {"role": "assistant", "content": "Here are some strategies that can help you focus better while studying:\n\n1. Create a conducive environment: Find a quiet, distraction-free location to study. Make sure the lighting is appropriate, and the temperature is comfortable.\n\n2. Set a study schedule: Create a study schedule that works for you and stick to it. This helps you stay focused and avoids procrastination.\n\n3. Take breaks: Taking short breaks between study sessions can help you stay focused and retain information better.\n\n4. Eliminate distractions: Turn off your phone, log out from social media, and avoid any other distractions that can prevent you from concentrating.\n\n5. Use active learning techniques: Use active learning techniques such as note-taking, summarizing, and teaching others to help you stay engaged.\n\n6. Stay motivated: Set goals for yourself and reward yourself when you achieve them. This can help you stay motivated and focused.\n\n7. Get enough sleep and exercise: A healthy lifestyle can help you stay alert and focused while studying. Make sure you get enough sleep and exercise regularly.\n\nRemember, every individual is different, and what works for one person may not work for another. Find what works best for you and stick to it."}, {"role": "user", "content": "Can you give me some examples of active learning techniques that I can use while studying?"}, {"role": "assistant", "content": "Sure! Here are some examples of active learning techniques that you can use while studying:\n\n1. Summarizing: Summarize the main ideas in your own words. This helps you retain the information better.\n\n2. Note-taking: Take notes while you\u2019re reading or listening to lectures. This helps you focus your attention and remember the key points.\n\n3. Teaching: Explain the concepts you\u2019ve learned to someone else. This helps you understand the material better and identify areas where you need further clarification.\n\n4. Practice questions: Complete practice questions or quizzes to test your understanding of the material.\n\n5. Mnemonics: Use mnemonics (memory aids) to help you remember important information. For example, using acronyms or visual associations.\n\n6. Group study: Study with a group of peers and share your knowledge with each other. This helps you learn from different perspectives and keeps you motivated.\n\n7. Mind mapping: Create visual diagrams or mind maps to organize information and understand the relationships between different concepts.\n\nRemember, using a combination of different active learning techniques can help you to better understand and retain information."}, {"role": "user", "content": "How can I incorporate active learning techniques while studying for math?"}, {"role": "assistant", "content": "Math is a subject that requires a lot of practice, so incorporating active learning techniques can be very helpful in mastering the concepts. Here are some ways you can use active learning techniques while studying for math:\n\n1. Practice problems: Math requires a lot of practice to master the concepts. Solve practice problems and work through examples to reinforce your understanding of the material.\n\n2. Explain concepts: Explain the math concepts you\u2019ve learned to someone else. This helps you solidify your understanding and identify areas where you need further clarification.\n\n3. Create flashcards: Create flashcards for formulas, equations, and definitions. Use them to quiz yourself and help you memorize the material.\n\n4. Write out explanations: Write out explanations of the steps you took to solve a problem. This helps you identify areas where you may have made mistakes and reinforce your understanding of the material.\n\n5. Work in groups: Study with a group of peers and work through problems together. This helps you learn from different perspectives and keeps you motivated.\n\n6. Use online resources: There are many online resources available for math, including interactive tutorials and videos. Use them to reinforce your understanding of the material.\n\nRemember, math can be challenging, but incorporating active learning techniques can help you to better understand and retain the material."}, {"role": "user", "content": "How can I manage my time effectively while studying for multiple exams?"}, {"role": "assistant", "content": "Managing your time effectively while studying for multiple exams can be challenging, but here are some tips that can help:\n\n1. Create a study schedule: Make a schedule that outlines your study sessions for each subject. Consider your other commitments and allocate sufficient time for each subject.\n\n2. Prioritize the most important exams: Determine which exams are the most important and allocate more study time to these subjects.\n\n3. Break down the material: Break down the material into smaller, manageable chunks. This makes it easier to study and makes the task less overwhelming.\n\n4. Use study aids: Use study aids such as flashcards or summary notes to help you study more efficiently.\n\n5. Avoid multitasking: Avoid multitasking and focus on one subject at a time. This helps you to concentrate better and retain information more effectively.\n\n6. Take breaks: Taking regular breaks between study sessions can help you to stay focused and avoid burnout.\n\n7. Review regularly: Regularly review your notes and study materials to reinforce your understanding of the material.\n\n8. Stay organized: Keep your study materials organized and easily accessible. This saves time and helps you to focus on studying rather than searching for materials.\n\nRemember, effective time management is crucial when studying for multiple exams. By creating a study schedule, breaking down the material, prioritizing tasks, and staying organized, you can manage your time effectively and achieve better results."}]}
9 | 


--------------------------------------------------------------------------------
/data/test_data_loading.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import json
 3 | 
 4 | 
 5 | @pytest.fixture
 6 | def load_data():
 7 |     with open("function_calling_demo.jsonl", "r", encoding="utf8") as f:
 8 |         data_list = f.readlines()
 9 |     return data_list
10 | 
11 | 
12 | def test_data_loading(load_data):
13 |     # Verify data was loaded correctly
14 |     assert load_data is not None, "Data should be loaded"
15 |     assert len(load_data) > 0, "Data list should not be empty"
16 | 
17 |     # Verify each line is a valid JSON with required fields
18 |     for line in load_data:
19 |         try:
20 |             data = json.loads(line)
21 |             assert isinstance(data, dict), (
22 |                 "Each line should be a JSON object (dictionary)"
23 |             )
24 | 
25 |             # Check for "conversations" key
26 |             assert "conversations" in data, (
27 |                 "'conversations' key is missing in the JSON object"
28 |             )
29 |             assert isinstance(data["conversations"], list), (
30 |                 "'conversations' should be a list"
31 |             )
32 | 
33 |             # Check for 'user' and 'assistant' roles within "conversations"
34 |             roles = {conv["role"] for conv in data["conversations"] if "role" in conv}
35 |             assert "user" in roles, "Role 'user' is missing in conversations"
36 |             assert "assistant" in roles, "Role 'assistant' is missing in conversations"
37 | 
38 |         except json.JSONDecodeError:
39 |             pytest.fail("Each line in the data file should be valid JSON")
40 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Any, Dict, List
  3 | 
  4 | import torch
  5 | from loguru import logger
  6 | from torch.utils.data import Dataset
  7 | from utils.tool_utils import function_formatter
  8 | 
  9 | 
 10 | class SFTDataset(Dataset):
 11 |     def __init__(self, file, tokenizer, max_seq_length, template):
 12 |         self.tokenizer = tokenizer
 13 |         self.system_format = template["system_format"]
 14 |         self.user_format = template["user_format"]
 15 |         self.assistant_format = template["assistant_format"]
 16 |         self.tool_format = template["tool_format"]
 17 |         self.function_format = template["function_format"]
 18 |         self.observation_format = template["observation_format"]
 19 | 
 20 |         self.max_seq_length = max_seq_length
 21 |         logger.info("Loading data: {}".format(file))
 22 |         with open(file, "r", encoding="utf8") as f:
 23 |             data_list = f.readlines()
 24 |         logger.info("There are {} data in dataset".format(len(data_list)))
 25 |         self.data_list = data_list
 26 | 
 27 |     def __len__(self):
 28 |         return len(self.data_list)
 29 | 
 30 |     def __getitem__(self, index):
 31 |         data = self.data_list[index]
 32 |         data = json.loads(data)
 33 |         input_ids, target_mask = [], []
 34 | 
 35 |         # setting system information
 36 |         if self.system_format is not None:
 37 |             system = data["system"].strip() if "system" in data.keys() else self.system
 38 | 
 39 |             if system is not None:
 40 |                 system_text = self.system_format.format(content=system)
 41 |                 input_ids = self.tokenizer.encode(system_text, add_special_tokens=False)
 42 |                 target_mask = [0] * len(input_ids)
 43 | 
 44 |         conversations = data["conversations"]
 45 | 
 46 |         input_buffer = ""
 47 |         for i in range(len(conversations)):
 48 |             role = conversations[i]["role"]
 49 |             content = conversations[i]["content"].strip()
 50 | 
 51 |             if role != "assistant":
 52 |                 if role == "user":
 53 |                     human = self.user_format.format(
 54 |                         content=content, stop_token=self.tokenizer.eos_token
 55 |                     )
 56 |                     input_buffer += human
 57 | 
 58 |                 elif role == "function_call":
 59 |                     tool_calls = function_formatter(json.loads(content))
 60 |                     function = self.function_format.format(content=tool_calls)
 61 |                     input_buffer += function
 62 | 
 63 |                 elif role == "observation":
 64 |                     observation = self.observation_format.format(content=content)
 65 |                     input_buffer += observation
 66 |             else:
 67 |                 assistant = self.assistant_format.format(
 68 |                     content=content, stop_token=self.tokenizer.eos_token
 69 |                 )
 70 | 
 71 |                 input_tokens = self.tokenizer.encode(
 72 |                     input_buffer, add_special_tokens=False
 73 |                 )
 74 |                 output_tokens = self.tokenizer.encode(
 75 |                     assistant, add_special_tokens=False
 76 |                 )
 77 | 
 78 |                 input_ids += input_tokens + output_tokens
 79 |                 target_mask += [0] * len(input_tokens) + [1] * len(output_tokens)
 80 |                 input_buffer = ""
 81 | 
 82 |         assert len(input_ids) == len(target_mask)
 83 | 
 84 |         input_ids = input_ids[: self.max_seq_length]
 85 |         target_mask = target_mask[: self.max_seq_length]
 86 |         attention_mask = [1] * len(input_ids)
 87 |         assert len(input_ids) == len(target_mask) == len(attention_mask)
 88 |         inputs = {
 89 |             "input_ids": input_ids,
 90 |             "attention_mask": attention_mask,
 91 |             "target_mask": target_mask,
 92 |         }
 93 |         return inputs
 94 | 
 95 | 
 96 | class SFTDataCollator(object):
 97 |     def __init__(self, tokenizer, max_seq_length):
 98 |         self.tokenizer = tokenizer
 99 |         self.max_seq_length = max_seq_length
100 |         self.pad_token_id = tokenizer.pad_token_id
101 | 
102 |     def __call__(self, batch: List[Dict[str, Any]]) -> Dict[str, Any]:
103 |         # Find the maximum length in the batch
104 |         lengths = [len(x["input_ids"]) for x in batch if x["input_ids"] is not None]
105 |         # Take the maximum length in the batch, if it exceeds max_seq_length, take max_seq_length
106 |         batch_max_len = min(max(lengths), self.max_seq_length)
107 | 
108 |         input_ids_batch, attention_mask_batch, target_mask_batch = [], [], []
109 |         # Truncate and pad
110 |         for x in batch:
111 |             input_ids = x["input_ids"]
112 |             attention_mask = x["attention_mask"]
113 |             target_mask = x["target_mask"]
114 |             if input_ids is None:
115 |                 logger.info("some input_ids is None")
116 |                 continue
117 |             padding_len = batch_max_len - len(input_ids)
118 |             # Pad
119 |             input_ids = input_ids + [self.pad_token_id] * padding_len
120 |             attention_mask = attention_mask + [0] * padding_len
121 |             target_mask = target_mask + [0] * padding_len
122 |             # Truncate
123 |             input_ids = input_ids[: self.max_seq_length]
124 |             attention_mask = attention_mask[: self.max_seq_length]
125 |             target_mask = target_mask[: self.max_seq_length]
126 | 
127 |             input_ids_batch.append(input_ids)
128 |             attention_mask_batch.append(attention_mask)
129 |             target_mask_batch.append(target_mask)
130 | 
131 |         # Convert lists to tensors to get the final model input
132 |         input_ids_batch = torch.tensor(input_ids_batch, dtype=torch.long)
133 |         attention_mask_batch = torch.tensor(attention_mask_batch, dtype=torch.long)
134 |         target_mask_batch = torch.tensor(target_mask_batch, dtype=torch.long)
135 | 
136 |         labels = torch.where(target_mask_batch == 1, input_ids_batch, -100)
137 |         inputs = {
138 |             "input_ids": input_ids_batch,
139 |             "attention_mask": attention_mask_batch,
140 |             "labels": labels,
141 |         }
142 |         return inputs
143 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from dataclasses import dataclass
  3 | 
  4 | import torch
  5 | from peft import LoraConfig
  6 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
  7 | from trl import SFTTrainer, SFTConfig
  8 | 
  9 | from dataset import SFTDataCollator, SFTDataset
 10 | from utils.constants import model2template
 11 | 
 12 | 
 13 | @dataclass
 14 | class LoraTrainingArguments:
 15 |     per_device_train_batch_size: int
 16 |     gradient_accumulation_steps: int
 17 |     num_train_epochs: int
 18 |     lora_rank: int
 19 |     lora_alpha: int
 20 |     lora_dropout: int
 21 | 
 22 | 
 23 | def train_lora(
 24 |     model_id: str, context_length: int, training_args: LoraTrainingArguments
 25 | ):
 26 |     assert model_id in model2template, f"model_id {model_id} not supported"
 27 |     lora_config = LoraConfig(
 28 |         r=training_args.lora_rank,
 29 |         target_modules=[
 30 |             "q_proj",
 31 |             "v_proj",
 32 |         ],
 33 |         lora_alpha=training_args.lora_alpha,
 34 |         lora_dropout=training_args.lora_dropout,
 35 |         task_type="CAUSAL_LM",
 36 |     )
 37 | 
 38 |     # Load model in 4-bit to do qLoRA
 39 |     bnb_config = BitsAndBytesConfig(
 40 |         load_in_4bit=True,
 41 |         bnb_4bit_quant_type="nf4",
 42 |         bnb_4bit_compute_dtype=torch.bfloat16,
 43 |     )
 44 | 
 45 |     training_args = SFTConfig(
 46 |         per_device_train_batch_size=training_args.per_device_train_batch_size,
 47 |         gradient_accumulation_steps=training_args.gradient_accumulation_steps,
 48 |         warmup_steps=100,
 49 |         learning_rate=2e-4,
 50 |         bf16=True,
 51 |         logging_steps=20,
 52 |         output_dir="outputs",
 53 |         optim="paged_adamw_8bit",
 54 |         remove_unused_columns=False,
 55 |         num_train_epochs=training_args.num_train_epochs,
 56 |         max_seq_length=context_length,
 57 |     )
 58 |     tokenizer = AutoTokenizer.from_pretrained(
 59 |         model_id,
 60 |         use_fast=True,
 61 |     )
 62 |     model = AutoModelForCausalLM.from_pretrained(
 63 |         model_id,
 64 |         quantization_config=bnb_config,
 65 |         device_map={"": 0},
 66 |         token=os.environ["HF_TOKEN"],
 67 |     )
 68 | 
 69 |     # Load dataset
 70 |     dataset = SFTDataset(
 71 |         file="data/demo_data.jsonl",
 72 |         tokenizer=tokenizer,
 73 |         max_seq_length=context_length,
 74 |         template=model2template[model_id],
 75 |     )
 76 | 
 77 |     # Define trainer
 78 |     trainer = SFTTrainer(
 79 |         model=model,
 80 |         train_dataset=dataset,
 81 |         args=training_args,
 82 |         peft_config=lora_config,
 83 |         data_collator=SFTDataCollator(tokenizer, max_seq_length=context_length),
 84 |     )
 85 | 
 86 |     # Train model
 87 |     trainer.train()
 88 | 
 89 |     # save model
 90 |     trainer.save_model("outputs")
 91 | 
 92 |     # remove checkpoint folder
 93 |     os.system("rm -rf outputs/checkpoint-*")
 94 | 
 95 |     # upload lora weights and tokenizer
 96 |     print("Training Completed.")
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     # Define training arguments for LoRA fine-tuning
101 |     training_args = LoraTrainingArguments(
102 |         num_train_epochs=3,
103 |         per_device_train_batch_size=2,
104 |         gradient_accumulation_steps=2,
105 |         lora_rank=8,
106 |         lora_alpha=16,
107 |         lora_dropout=0.05,
108 |     )
109 | 
110 |     # Set model ID and context length
111 |     model_id = "Qwen/Qwen1.5-0.5B"
112 |     context_length = 2048
113 | 
114 |     # Start LoRA fine-tuning
115 |     train_lora(
116 |         model_id=model_id, context_length=context_length, training_args=training_args
117 |     )
118 | 


--------------------------------------------------------------------------------
/full_automation.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import requests
 5 | import yaml
 6 | from loguru import logger
 7 | from huggingface_hub import HfApi
 8 | 
 9 | from demo import LoraTrainingArguments, train_lora
10 | from utils.constants import model2base_model, model2size
11 | from utils.flock_api import get_task, submit_task
12 | from utils.gpu_utils import get_gpu_type
13 | 
14 | HF_USERNAME = os.environ["HF_USERNAME"]
15 | 
16 | if __name__ == "__main__":
17 |     task_id = os.environ["TASK_ID"]
18 |     # load trainin args
19 |     # define the path of the current file
20 |     current_folder = os.path.dirname(os.path.realpath(__file__))
21 |     with open(f"{current_folder}/training_args.yaml", "r") as f:
22 |         all_training_args = yaml.safe_load(f)
23 | 
24 |     task = get_task(task_id)
25 |     # log the task info
26 |     logger.info(json.dumps(task, indent=4))
27 |     # download data from a presigned url
28 |     data_url = task["data"]["training_set_url"]
29 |     context_length = task["data"]["context_length"]
30 |     max_params = task["data"]["max_params"]
31 | 
32 |     # filter out the model within the max_params
33 |     model2size = {k: v for k, v in model2size.items() if v <= max_params}
34 |     all_training_args = {k: v for k, v in all_training_args.items() if k in model2size}
35 |     logger.info(f"Models within the max_params: {all_training_args.keys()}")
36 |     # download in chunks
37 |     response = requests.get(data_url, stream=True)
38 |     with open("data/demo_data.jsonl", "wb") as f:
39 |         for chunk in response.iter_content(chunk_size=8192):
40 |             f.write(chunk)
41 | 
42 |     # train all feasible models and merge
43 |     for model_id in all_training_args.keys():
44 |         logger.info(f"Start to train the model {model_id}...")
45 |         # if OOM, proceed to the next model
46 |         try:
47 |             train_lora(
48 |                 model_id=model_id,
49 |                 context_length=context_length,
50 |                 training_args=LoraTrainingArguments(**all_training_args[model_id]),
51 |             )
52 |         except RuntimeError as e:
53 |             logger.error(f"Error: {e}")
54 |             logger.info("Proceed to the next model...")
55 |             continue
56 | 
57 |         # generate a random repo id based on timestamp
58 |         gpu_type = get_gpu_type()
59 | 
60 |         try:
61 |             logger.info("Start to push the lora weight to the hub...")
62 |             api = HfApi(token=os.environ["HF_TOKEN"])
63 |             repo_name = f"{HF_USERNAME}/task-{task_id}-{model_id.replace('/', '-')}"
64 |             # check whether the repo exists
65 |             try:
66 |                 api.create_repo(
67 |                     repo_name,
68 |                     exist_ok=False,
69 |                     repo_type="model",
70 |                 )
71 |             except Exception:
72 |                 logger.info(
73 |                     f"Repo {repo_name} already exists. Will commit the new version."
74 |                 )
75 | 
76 |             commit_message = api.upload_folder(
77 |                 folder_path="outputs",
78 |                 repo_id=repo_name,
79 |                 repo_type="model",
80 |             )
81 |             # get commit hash
82 |             commit_hash = commit_message.oid
83 |             logger.info(f"Commit hash: {commit_hash}")
84 |             logger.info(f"Repo name: {repo_name}")
85 |             # submit
86 |             submit_task(
87 |                 task_id, repo_name, model2base_model[model_id], gpu_type, commit_hash
88 |             )
89 |             logger.info("Task submitted successfully")
90 |         except Exception as e:
91 |             logger.error(f"Error: {e}")
92 |             logger.info("Proceed to the next model...")
93 |         finally:
94 |             # cleanup merged_model and output
95 |             os.system("rm -rf merged_model")
96 |             os.system("rm -rf outputs")
97 |             continue
98 | 


--------------------------------------------------------------------------------
/merge.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from peft import PeftModel
 3 | from transformers import AutoModelForCausalLM, AutoTokenizer
 4 | 
 5 | 
 6 | def merge_lora_to_base_model(
 7 |     model_name_or_path: str, adapter_name_or_path: str, save_path: str
 8 | ):
 9 |     tokenizer = AutoTokenizer.from_pretrained(
10 |         adapter_name_or_path,
11 |         use_fast=True,
12 |     )
13 |     model = AutoModelForCausalLM.from_pretrained(
14 |         model_name_or_path,
15 |         trust_remote_code=True,
16 |         low_cpu_mem_usage=True,
17 |         torch_dtype=torch.float16,
18 |         device_map={"": "cpu"},
19 |     )
20 |     model = PeftModel.from_pretrained(
21 |         model, adapter_name_or_path, device_map={"": "cpu"}
22 |     )
23 |     model = model.merge_and_unload()
24 | 
25 |     tokenizer.save_pretrained(save_path)
26 |     model.save_pretrained(save_path)
27 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.13.1
2 | transformers>=4.43.0,<=4.45.0
3 | peft>=0.10.0,<=0.13.2
4 | loguru
5 | trl>=0.9.3,<=0.9.6
6 | bitsandbytes
7 | pyyaml
8 | 


--------------------------------------------------------------------------------
/training_args.yaml:
--------------------------------------------------------------------------------
 1 | Qwen/Qwen2.5-0.5B-Instruct:
 2 |   per_device_train_batch_size: 1
 3 |   gradient_accumulation_steps: 8
 4 |   num_train_epochs: 1
 5 |   lora_rank: 8
 6 |   lora_alpha: 16
 7 |   lora_dropout: 0.1
 8 | 
 9 | Qwen/Qwen2.5-1.5B-Instruct:
10 |   per_device_train_batch_size: 1
11 |   gradient_accumulation_steps: 8
12 |   num_train_epochs: 1
13 |   lora_rank: 4
14 |   lora_alpha: 8
15 |   lora_dropout: 0.1
16 | 
17 | Qwen/Qwen2.5-7B-Instruct:
18 |   per_device_train_batch_size: 1
19 |   gradient_accumulation_steps: 8
20 |   num_train_epochs: 1
21 |   lora_rank: 4
22 |   lora_alpha: 8
23 |   lora_dropout: 0.1
24 | 


--------------------------------------------------------------------------------
/utils/constants.py:
--------------------------------------------------------------------------------
 1 | qwen_template = {
 2 |     "system_format": "<|im_start|>system\n{content}<|im_end|>\n",
 3 |     "user_format": "<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n",
 4 |     "assistant_format": "{content}<|im_end|>\n",
 5 |     "tool_format": "{content}",
 6 |     "function_format": "{content}",
 7 |     "observation_format": "<|im_start|>tool\n{content}<|im_end|>\n<|im_start|>assistant\n",
 8 |     "system": "You are a helpful assistant.",
 9 | }
10 | 
11 | model2template = {
12 |     "Qwen/Qwen2.5-0.5B-Instruct": qwen_template,
13 |     "Qwen/Qwen2.5-1.5B-Instruct": qwen_template,
14 |     "Qwen/Qwen2.5-7B-Instruct": qwen_template,
15 | }
16 | 
17 | model2size = {
18 |     "Qwen/Qwen2.5-0.5B-Instruct": 494_000_000,
19 |     "Qwen/Qwen2.5-1.5B-Instruct": 1_540_000_000,
20 |     "Qwen/Qwen2.5-7B-Instruct": 7_620_000_000,
21 | }
22 | 
23 | model2base_model = {
24 |     "Qwen/Qwen2.5-0.5B-Instruct": "qwen1.5",
25 |     "Qwen/Qwen2.5-1.5B-Instruct": "qwen1.5",
26 |     "Qwen/Qwen2.5-7B-Instruct": "qwen1.5",
27 | }
28 | 


--------------------------------------------------------------------------------
/utils/flock_api.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import requests
 5 | 
 6 | FLOCK_API_KEY = os.environ["FLOCK_API_KEY"]
 7 | FED_LEDGER_BASE_URL = "https://fed-ledger-prod.flock.io/api/v1"
 8 | 
 9 | 
10 | def get_task(task_id: int):
11 |     response = requests.request(
12 |         "GET", f"{FED_LEDGER_BASE_URL}/tasks/get?task_id={task_id}"
13 |     )
14 |     return response.json()
15 | 
16 | 
17 | def submit_task(
18 |     task_id: int, hg_repo_id: str, base_model: str, gpu_type: str, revision: str
19 | ):
20 |     payload = json.dumps(
21 |         {
22 |             "task_id": task_id,
23 |             "data": {
24 |                 "hg_repo_id": hg_repo_id,
25 |                 "base_model": base_model,
26 |                 "gpu_type": gpu_type,
27 |                 "revision": revision,
28 |             },
29 |         }
30 |     )
31 |     headers = {
32 |         "flock-api-key": FLOCK_API_KEY,
33 |         "Content-Type": "application/json",
34 |     }
35 |     response = requests.request(
36 |         "POST",
37 |         f"{FED_LEDGER_BASE_URL}/tasks/submit-result",
38 |         headers=headers,
39 |         data=payload,
40 |     )
41 |     if response.status_code != 200:
42 |         raise Exception(f"Failed to submit task: {response.text}")
43 |     return response.json()
44 | 


--------------------------------------------------------------------------------
/utils/gpu_utils.py:
--------------------------------------------------------------------------------
 1 | from torch.cuda import get_device_name
 2 | 
 3 | 
 4 | def get_gpu_type():
 5 |     try:
 6 |         gpu_name = get_device_name(0)
 7 |         return gpu_name
 8 |     except Exception as e:
 9 |         return f"Error retrieving GPU type: {e}"
10 | 


--------------------------------------------------------------------------------
/utils/tool_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List, Tuple
 2 | import json
 3 | 
 4 | DEFAULT_TOOL_PROMPT = (
 5 |     "You have access to the following tools:\n{tool_text}"
 6 |     "Use the following format if using a tool:\n"
 7 |     "```\n"
 8 |     "Action: tool name (one of [{tool_names}])\n"
 9 |     "Action Input: the input to the tool, in a JSON format representing the kwargs "
10 |     """(e.g. ```{{"input": "hello world", "num_beams": 5}}```)\n"""
11 |     "```\n"
12 | )
13 | 
14 | DEFAULT_FUNCTION_SLOTS = "Action: {name}\nAction Input: {arguments}\n"
15 | 
16 | 
17 | def tool_formater(tools: List[Dict[str, Any]]) -> str:
18 |     tool_text = ""
19 |     tool_names = []
20 |     for tool in tools:
21 |         param_text = ""
22 |         for name, param in tool["parameters"]["properties"].items():
23 |             required, enum, items = "", "", ""
24 |             if name in tool["parameters"].get("required", []):
25 |                 required = ", required"
26 | 
27 |             if param.get("enum", None):
28 |                 enum = ", should be one of [{}]".format(", ".join(param["enum"]))
29 | 
30 |             if param.get("items", None):
31 |                 items = ", where each item should be {}".format(
32 |                     param["items"].get("type", "")
33 |                 )
34 | 
35 |             param_text += "  - {name} ({type}{required}): {desc}{enum}{items}\n".format(
36 |                 name=name,
37 |                 type=param.get("type", ""),
38 |                 required=required,
39 |                 desc=param.get("description", ""),
40 |                 enum=enum,
41 |                 items=items,
42 |             )
43 | 
44 |         tool_text += "> Tool Name: {name}\nTool Description: {desc}\nTool Args:\n{args}\n".format(
45 |             name=tool["name"], desc=tool.get("description", ""), args=param_text
46 |         )
47 |         tool_names.append(tool["name"])
48 | 
49 |     return DEFAULT_TOOL_PROMPT.format(
50 |         tool_text=tool_text, tool_names=", ".join(tool_names)
51 |     )
52 | 
53 | 
54 | def function_formatter(tool_calls, function_slots=DEFAULT_FUNCTION_SLOTS) -> str:
55 |     functions: List[Tuple[str, str]] = []
56 |     if not isinstance(tool_calls, list):
57 |         tool_calls = [tool_calls]  # parrallel function calls
58 | 
59 |     for tool_call in tool_calls:
60 |         functions.append(
61 |             (tool_call["name"], json.dumps(tool_call["arguments"], ensure_ascii=False))
62 |         )
63 | 
64 |     elements = []
65 |     for name, arguments in functions:
66 |         text = function_slots.format(name=name, arguments=arguments)
67 |         elements.append(text)
68 | 
69 |     return "\n".join(elements) + "\n"
70 | 


--------------------------------------------------------------------------------