├── .DS_Store ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── data ├── demo_data.jsonl ├── function_calling_demo.jsonl └── test_data_loading.py ├── dataset.py ├── demo.py ├── full_automation.py ├── merge.py ├── requirements.txt ├── training_args.yaml └── utils ├── constants.py ├── flock_api.py ├── gpu_utils.py └── tool_utils.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FLock-io/testnet-training-node-quickstart/bdeffb0a7a664d89b8fc3eec501c2b9c79a8125c/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v5.0.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/astral-sh/ruff-pre-commit 9 | rev: v0.11.0 10 | hooks: 11 | - id: ruff 12 | args: [--fix] 13 | - id: ruff-format 14 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | python3-dev \ 5 | python3-pip \ 6 | && ln -s /usr/bin/python3 /usr/bin/python \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | WORKDIR /app 10 | 11 | # Copy only requirements first to leverage Docker cache 12 | COPY requirements.txt . 13 | RUN pip3 install --no-cache-dir -r requirements.txt 14 | 15 | # Copy the rest of the application code 16 | COPY . . 17 | 18 | CMD ["python", "full_automation.py"] 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # testnet-training-node-quickstart 2 | 3 | This repository contains demo scripts for you to fine-tune supported language models for train.flock.io. 4 | 5 | ## Quickstart 6 | 7 | ### Install Dependencies 8 | 9 | To set up your environment, run the following commands: 10 | 11 | ```bash 12 | conda create -n training-node python==3.10 13 | conda activate training-node 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | ### File Structure 18 | 19 | - [`dataset.py`](dataset.py) - Contains the logic to process the raw data from `demo_data.jsonl`. 20 | - [`demo_data.jsonl`](demo_data.jsonl) - Follows the shareGPT format. The training data you receive from the `fed-ledger` is in exactly the same format. 21 | - [`merge.py`](merge.py) - Contains the utility function for merging LoRA weights. If you are training with LoRA, please ensure you merge the adapter before uploading to your Hugging Face repository. 22 | - [`demo.py`](demo.py) - A training script that implements LoRA fine-tuning for supported language models. 23 | - [`full_automation.py`](full_automation.py) - A script that automates everything including get a task, download the training data, finetune supported models on training data, merge weights, upload to your HuggingFace model repo, and submit the task to fed-ledger. 24 | - [`training_args.yaml`](training_args.yaml) - A YAML defines the training hyper-parameters for fine-tuning. A detailed explanation on LoRA config can be found here: [LoRA Fine-tuning & Hyperparameters Explained](https://www.entrypointai.com/blog/lora-fine-tuning/) 25 | 26 | ### Full Automation 27 | 28 | Simply run 29 | 30 | ```bash 31 | TASK_ID= FLOCK_API_KEY="" HF_TOKEN="" CUDA_VISIBLE_DEVICES=0 HF_USERNAME="your-hf-user-name" python full_automation.py 32 | ``` 33 | 34 | The above command will automatically train and submit multiple LLMs that are smaller than the max parameters limitation for the given task. **Ensure that each model version is committed to the same repository to maintain version history.** Refer to the [Hugging Face documentation on commits](https://huggingface.co/docs/hub/repositories-getting-started#pushing-files) for guidance on how to manage commits effectively. 35 | 36 | #### Bypass certain models 37 | 38 | If you want to bypass certain models, simply comment out the model config in the [`training_args.yaml`](training_args.yaml) 39 | 40 | --- 41 | 42 | ### Play with demo.py 43 | 44 | #### Start the Training 45 | 46 | Execute the following command to start the training: 47 | 48 | ```bash 49 | HF_TOKEN="hf_yourhftoken" CUDA_VISIBLE_DEVICES=0 python demo.py 50 | ``` 51 | 52 | This command initiates fine-tuning on the demo dataset, saves the fine-tuned model, merges the adapter to the base model, and saves the final model. 53 | 54 | #### Upload the model folder to your HuggingFace repo 55 | 56 | To maintain a clean version history and simplify change tracking, **commit your model changes to the existing repository** rather than creating a new one for each base model. For comprehensive guidance on uploading models and managing commits, consult the Hugging Face documentation on [models uploading](https://huggingface.co/docs/hub/en/models-uploading) and the [guide on commits](https://huggingface.co/docs/hub/repositories-getting-started#pushing-files). 57 | 58 | #### Getting the task id 59 | 60 | Before you submit the model script for a task, you will first need to stake on the task as a node. 61 | 62 | - Go to the [FLock Training Platform](https://train.flock.io/stake) 63 | - Select **Training Node** tab and stake on the task you want to submit the model to. 64 | - The task ID is the ID of the task you want to submit the model to. 65 | 66 | #### Submit the model 67 | 68 | ```bash 69 | 70 | curl --location 'https://fed-ledger-prod.flock.io/api/v1/tasks/submit-result' \ 71 | --header 'flock-api-key: ' \ 72 | --header 'Content-Type: application/json' \ 73 | --data '{ 74 | "task_id": 29, 75 | "data":{ 76 | "hg_repo_id": "Qwen/Qwen1.5-1.8B-Chat", 77 | "base_model": "qwen1.5", 78 | "gpu_type": "", 79 | "revision": "" 80 | } 81 | }' 82 | ``` 83 | 84 | - `task_id`: The ID of the task to which you are submitting a model. 85 | - `hg_repo_id`: The Hugging Face repository where the model is stored, typically in the format `username/repository-name`. 86 | - `base_model`: The base model used for training. A list of all supported models can be found [here](https://github.com/FLock-io/llm-loss-validator/blob/main/src/core/constant.py). 87 | - `gpu_type`: The type of GPU used for training the model. 88 | - `revision`: The commit hash from the Hugging Face repository. This uniquely identifies the version of the model that was trained and submitted, allowing for precise tracking of changes and updates. 89 | -------------------------------------------------------------------------------- /data/demo_data.jsonl: -------------------------------------------------------------------------------- 1 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What best practices should I follow when designing a website?"}, {"role": "assistant", "content": "Here are some best practices to follow when designing a website:\n\n1. Keep it simple and easy to navigate: Make sure your website is easy to navigate and that visitors can find what they are looking for quickly.\n\n2. Use a responsive design: Ensure that your website is optimized for all devices and screen sizes, including mobile phones and tablets.\n\n3. Use high-quality images and graphics: Use high-quality images and graphics to make your website visually appealing.\n\n4. Use colors and fonts consistently: Choose colors and fonts that match your brand and use them consistently throughout your website.\n\n5. Make it easy to read: Use clear and concise language, and choose a font size and style that is easy to read.\n\n6. Use white space effectively: Use white space to make your website look less cluttered and easier to read.\n\n7. Ensure fast loading speeds: Optimize your website's loading speed to ensure that visitors don't have to wait long to access your content.\n\n8. Use SEO best practices: Optimize your website for search engines by using keywords, meta-descriptions, and other SEO best practices.\n\n9. Test your website: Regularly test your website to ensure that it is functioning properly and that any issues are addressed quickly.\n\nBy following these best practices, you can create a website that is user-friendly, visually appealing, and easy to navigate, and that effectively communicates your brand's message to your target audience."}, {"role": "user", "content": "How can I optimize my website's loading speed?"}, {"role": "assistant", "content": "Here are some ways to optimize your website's loading speed:\n\n1. Minimize HTTP requests: Minimize the number of HTTP requests by reducing the number of files on your website, combining files, and using CSS sprites.\n\n2. Optimize images: Optimize your images by compressing them, reducing their size, and using the right file format.\n\n3. Use a content delivery network (CDN): Use a CDN to reduce the load time of your website by serving content from the nearest server to the user.\n\n4. Enable browser caching: Enable browser caching to store static files on the user's computer, which reduces the time it takes to load the website on subsequent visits.\n\n5. Minimize plugins and scripts: Minimize the number of plugins and scripts used on your website to reduce their impact on loading speed.\n\n6. Reduce server response time: Optimize your server response time by using a fast hosting provider, reducing the number of requests, and using caching and content compression.\n\n7. Use a fast theme or template: Use a fast theme or template that is well-optimized for speed.\n\n8. Minimize the use of redirects: Minimize the use of redirects, as they can slow down the loading speed of your website.\n\nBy following these tips, you can optimize your website's loading speed and create a better user experience for your visitors."}, {"role": "user", "content": "How can I make sure my website is optimized for search engines?"}, {"role": "assistant", "content": "Here are some ways to optimize your website for search engines:\n\n1. Conduct keyword research: Conduct keyword research to identify the keywords and phrases that your target audience is using to search for your products or services.\n\n2. Use keywords in your content: Use your target keywords in your content, including in your titles, headings, meta descriptions, and body copy.\n\n3. Create high-quality content: Create high-quality, informative content that provides value to your target audience and encourages them to share it.\n\n4. Use meta descriptions and title tags: Use meta descriptions and title tags to provide a brief summary of your content and make it easier for search engines to understand what your website is about.\n\n5. Use header tags: Use header tags (H1, H2, H3, etc.) to organize your content and make it easier for search engines to understand the hierarchy of your content.\n\n6. Optimize images: Optimize your images by using descriptive filenames, alt tags, and captions that include your target keywords.\n\n7. Build high-quality backlinks: Build high-quality backlinks from reputable websites that are relevant to your niche.\n\n8. Use social media: Use social media to promote your content and build your online presence, which can help improve your search engine rankings.\n\n9. Make sure your website is mobile-friendly: Ensure that your website is optimized for mobile devices, as mobile-friendly websites tend to rank higher in search engine results.\n\nBy following these tips, you can optimize your website for search engines and improve your visibility in search engine results pages (SERPs)."}, {"role": "user", "content": "How can I improve my website's accessibility for people with disabilities?"}, {"role": "assistant", "content": "Here are some ways to improve your website's accessibility for people with disabilities:\n\n1. Use descriptive and meaningful text for links: Use descriptive and meaningful text for links to help users understand where the link goes and what it does.\n\n2. Provide alternative text for images: Provide alternative text for images to help users with visual impairments understand the content of the image.\n\n3. Use headings and subheadings: Use headings and subheadings to structure your content and make it easier for screen readers to read.\n\n4. Use color with care: Use color with care and ensure that color is not the only way to convey information or distinguish between different elements on your website.\n\n5. Provide transcripts for audio and video content: Provide transcripts for audio and video content to help users with hearing impairments understand the content.\n\n6. Use clear and concise language: Use clear and concise language to make your content easy to understand for users with cognitive disabilities.\n\n7. Ensure the website is keyboard accessible: Ensure that your website is keyboard accessible to help users with motor disabilities navigate your website.\n\n8. Make sure your website is responsive: Ensure that your website is responsive and can be easily accessed on different devices and screen sizes.\n\n9. Use accessibility tools: Use accessibility tools such as screen readers, screen magnifiers, and other assistive technologies to test your website's accessibility.\n\nBy following these tips, you can make your website more accessible to people with disabilities and provide a better user experience for all users."}]} 2 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What steps should I take to reduce taxes on my income?"}, {"role": "assistant", "content": "There are several steps you can take to reduce taxes on your income:\n\n1. Contribute to a retirement account: Contributions to a traditional IRA or a 401(k) can reduce your taxable income and save you money on taxes.\n\n2. Itemize your deductions: If you have significant expenses such as mortgage interest, medical expenses, or charitable contributions, itemizing your deductions may reduce your taxable income.\n\n3. Take advantage of tax credits: Tax credits, such as the Earned Income Tax Credit or the Child Tax Credit, can reduce your tax bill dollar for dollar.\n\n4. Invest in tax-free or tax-deferred accounts: Investments in tax-free or tax-deferred accounts such as municipal bonds, Roth IRAs, or 529 plans can reduce your taxable income.\n\n5. Consider income splitting: If you are married, consider splitting income with your spouse to reduce your overall tax bill.\n\nIt\u2019s important to consult with a tax professional or financial advisor to determine the best strategy for your specific situation."}, {"role": "user", "content": "Can you give me an example of a tax credit that can help reduce my taxes?"}, {"role": "assistant", "content": "Sure! One example of a tax credit that can help reduce your taxes is the Child Tax Credit. This is a credit of up to $2,000 per child under the age of 17 that can be claimed on your tax return. To qualify for the full credit, your income must be below a certain threshold (currently $400,000 for married filing jointly, or $200,000 for all other filers).\n\nIf your income is above the threshold, you may still be eligible for a partial credit. The credit is partially refundable, meaning that if the credit exceeds your tax liability, you may receive a refund for the difference up to $1,400 per child.\n\nTo claim the Child Tax Credit, you must have a qualifying child who meets certain criteria, such as being a U.S. citizen, living with you for more than half the year, and being related to you.\n\nIt\u2019s important to note that tax laws can change, so it\u2019s always a good idea to check with a tax professional or use tax preparation software to ensure you are claiming all the credits and deductions you are eligible for."}, {"role": "user", "content": "Can you explain the difference between a traditional IRA and a Roth IRA?"}, {"role": "assistant", "content": "Yes, I can explain the difference between a traditional IRA and a Roth IRA.\n\nA traditional IRA is a retirement account in which you can make pre-tax contributions, meaning you can deduct your contribution from your taxable income in the year you make it. The money in your traditional IRA grows tax-deferred, which means you won\u2019t pay taxes on the earnings until you withdraw the money in retirement. When you withdraw money from a traditional IRA, it is taxed as ordinary income.\n\nA Roth IRA, on the other hand, is a retirement account in which you make after-tax contributions, meaning you don\u2019t get a tax deduction for your contribution in the year you make it. However, the money in your Roth IRA grows tax-free, which means you won\u2019t pay taxes on the earnings when you withdraw the money in retirement. Additionally, qualified withdrawals from a Roth IRA are tax-free.\n\nThe difference between the two types of accounts is primarily when you pay taxes: with a traditional IRA, you pay taxes on the money when you withdraw it in retirement, while with a Roth IRA, you pay taxes on the money when you contribute to the account.\n\nIt\u2019s important to note that both types of accounts have contribution limits and other rules, so it\u2019s a good idea to consult with a financial advisor or tax professional to determine which type of account is best for your individual situation."}, {"role": "user", "content": "What is the contribution limit for a Roth IRA in 2021?"}, {"role": "assistant", "content": "The contribution limit for a Roth IRA in 2021 is $6,000 for individuals under age 50, and $7,000 for those age 50 and above. This means that you can contribute up to $6,000 or $7,000 (depending on your age) per year to your Roth IRA, as long as your income falls below certain limits.\n\nIt\u2019s important to note that contribution limits may change from year to year, so it\u2019s always a good idea to check the current year\u2019s limits before making any contributions. Additionally, there are income limits that determine whether you are eligible to contribute to a Roth IRA, and those limits can also change from year to year.\n\nIf you have any questions about Roth IRA contributions or eligibility, it\u2019s a good idea to consult with a financial advisor or tax professional."}, {"role": "user", "content": "Can you explain the difference between a 401(k) and an IRA?"}, {"role": "assistant", "content": "Yes, I can explain the difference between a 401(k) and an IRA.\n\nA 401(k) is an employer-sponsored retirement plan that allows employees to contribute a portion of their pre-tax salary to the plan. Employers may also offer matching contributions up to a certain amount. The money in a 401(k) grows tax-deferred, meaning you won\u2019t pay taxes on the earnings until you withdraw the money in retirement. Contributions to a 401(k) are subject to annual limits, which are set by the IRS.\n\nAn IRA, or individual retirement account, is a retirement account that you can set up on your own, outside of an employer-sponsored plan. IRAs come in two main types: traditional and Roth. A traditional IRA allows you to make pre-tax contributions, meaning you can deduct your contribution from your taxable income in the year you make it. The money in a traditional IRA grows tax-deferred, and you\u2019ll pay taxes on the money when you withdraw it in retirement. With a Roth IRA, you make after-tax contributions, meaning you don\u2019t get a tax deduction for your contribution in the year you make it. The money in a Roth IRA grows tax-free, and qualified withdrawals from a Roth IRA are tax-free.\n\nThe main difference between a 401(k) and an IRA is that a 401(k) is an employer-sponsored plan, while an IRA is an individual retirement account that you set up on your own. Additionally, 401(k) plans may have higher contribution limits than IRAs, and employers may offer matching contributions or other incentives to encourage employees to participate in the plan.\n\nIt\u2019s a good idea to consult with a financial advisor or tax professional to determine which type of account is best for your individual situation."}]} 3 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What strategies should I use to focus better while studying?"}, {"role": "assistant", "content": "Here are some strategies that can help you focus better while studying:\n\n1. Create a conducive environment: Find a quiet, distraction-free location to study. Make sure the lighting is appropriate, and the temperature is comfortable.\n\n2. Set a study schedule: Create a study schedule that works for you and stick to it. This helps you stay focused and avoids procrastination.\n\n3. Take breaks: Taking short breaks between study sessions can help you stay focused and retain information better.\n\n4. Eliminate distractions: Turn off your phone, log out from social media, and avoid any other distractions that can prevent you from concentrating.\n\n5. Use active learning techniques: Use active learning techniques such as note-taking, summarizing, and teaching others to help you stay engaged.\n\n6. Stay motivated: Set goals for yourself and reward yourself when you achieve them. This can help you stay motivated and focused.\n\n7. Get enough sleep and exercise: A healthy lifestyle can help you stay alert and focused while studying. Make sure you get enough sleep and exercise regularly.\n\nRemember, every individual is different, and what works for one person may not work for another. Find what works best for you and stick to it."}, {"role": "user", "content": "Can you give me some examples of active learning techniques that I can use while studying?"}, {"role": "assistant", "content": "Sure! Here are some examples of active learning techniques that you can use while studying:\n\n1. Summarizing: Summarize the main ideas in your own words. This helps you retain the information better.\n\n2. Note-taking: Take notes while you\u2019re reading or listening to lectures. This helps you focus your attention and remember the key points.\n\n3. Teaching: Explain the concepts you\u2019ve learned to someone else. This helps you understand the material better and identify areas where you need further clarification.\n\n4. Practice questions: Complete practice questions or quizzes to test your understanding of the material.\n\n5. Mnemonics: Use mnemonics (memory aids) to help you remember important information. For example, using acronyms or visual associations.\n\n6. Group study: Study with a group of peers and share your knowledge with each other. This helps you learn from different perspectives and keeps you motivated.\n\n7. Mind mapping: Create visual diagrams or mind maps to organize information and understand the relationships between different concepts.\n\nRemember, using a combination of different active learning techniques can help you to better understand and retain information."}, {"role": "user", "content": "How can I incorporate active learning techniques while studying for math?"}, {"role": "assistant", "content": "Math is a subject that requires a lot of practice, so incorporating active learning techniques can be very helpful in mastering the concepts. Here are some ways you can use active learning techniques while studying for math:\n\n1. Practice problems: Math requires a lot of practice to master the concepts. Solve practice problems and work through examples to reinforce your understanding of the material.\n\n2. Explain concepts: Explain the math concepts you\u2019ve learned to someone else. This helps you solidify your understanding and identify areas where you need further clarification.\n\n3. Create flashcards: Create flashcards for formulas, equations, and definitions. Use them to quiz yourself and help you memorize the material.\n\n4. Write out explanations: Write out explanations of the steps you took to solve a problem. This helps you identify areas where you may have made mistakes and reinforce your understanding of the material.\n\n5. Work in groups: Study with a group of peers and work through problems together. This helps you learn from different perspectives and keeps you motivated.\n\n6. Use online resources: There are many online resources available for math, including interactive tutorials and videos. Use them to reinforce your understanding of the material.\n\nRemember, math can be challenging, but incorporating active learning techniques can help you to better understand and retain the material."}, {"role": "user", "content": "How can I manage my time effectively while studying for multiple exams?"}, {"role": "assistant", "content": "Managing your time effectively while studying for multiple exams can be challenging, but here are some tips that can help:\n\n1. Create a study schedule: Make a schedule that outlines your study sessions for each subject. Consider your other commitments and allocate sufficient time for each subject.\n\n2. Prioritize the most important exams: Determine which exams are the most important and allocate more study time to these subjects.\n\n3. Break down the material: Break down the material into smaller, manageable chunks. This makes it easier to study and makes the task less overwhelming.\n\n4. Use study aids: Use study aids such as flashcards or summary notes to help you study more efficiently.\n\n5. Avoid multitasking: Avoid multitasking and focus on one subject at a time. This helps you to concentrate better and retain information more effectively.\n\n6. Take breaks: Taking regular breaks between study sessions can help you to stay focused and avoid burnout.\n\n7. Review regularly: Regularly review your notes and study materials to reinforce your understanding of the material.\n\n8. Stay organized: Keep your study materials organized and easily accessible. This saves time and helps you to focus on studying rather than searching for materials.\n\nRemember, effective time management is crucial when studying for multiple exams. By creating a study schedule, breaking down the material, prioritizing tasks, and staying organized, you can manage your time effectively and achieve better results."}]} 4 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "How can I most efficiently manage my time?"}, {"role": "assistant", "content": "Here are some tips to manage your time more efficiently:\n\n1. Make a schedule: Create a schedule of your daily tasks and prioritize them based on importance.\n\n2. Use time blocks: Group similar tasks together and allocate specific time blocks for each task.\n\n3. Take breaks: Take short breaks in between tasks to help you stay focused and avoid burnout.\n\n4. Eliminate distractions: Remove any distractions that could interfere with your work, such as social media notifications or emails.\n\n5. Delegate tasks: If possible, delegate tasks to others to free up your time for more important tasks.\n\n6. Use technology: Use productivity apps or tools to help you manage your time, such as time-tracking apps, task management tools, or calendar apps.\n\n7. Be flexible: Be open to adjusting your schedule as needed and be willing to adapt to changes in your workload or priorities.\n\nRemember, effective time management requires discipline and practice, but by implementing these tips, you can increase your productivity and achieve your goals more efficiently."}, {"role": "user", "content": "Can you recommend any time-tracking apps?"}, {"role": "assistant", "content": "Sure, here are some popular time-tracking apps you can use:\n\n1. Toggl: Toggl is a simple and easy-to-use time-tracking app that allows you to track how much time you spend on each task or project.\n\n2. RescueTime: RescueTime is a time-tracking app that runs in the background and tracks how much time you spend on various websites and applications.\n\n3. Harvest: Harvest is a time-tracking app that also allows you to create and send invoices based on the time you've tracked.\n\n4. Clockify: Clockify is a free time-tracking app that lets you track time on multiple projects and tasks, and generate reports based on your time data.\n\n5. Timely: Timely is a time-tracking app that uses AI to automatically track your time and create a visual timeline of your workday.\n\n6. Everhour: Everhour is a time-tracking app that integrates with popular project management tools like Asana, Trello, and Basecamp.\n\nThese apps can help you track your time more accurately, improve your productivity, and help you stay on top of your work."}, {"role": "user", "content": "Can you explain how Clockify generates reports based on time data?"}, {"role": "assistant", "content": "Certainly! Clockify generates reports based on the time data you input into the app. Once you've tracked your time for different tasks or projects, you can generate reports that provide detailed insights into your work habits and productivity. Here are some of the features of Clockify's reporting system:\n\n1. Customizable reports: Clockify allows you to customize your reports based on a variety of criteria, such as date range, project, client, or task.\n\n2. Summary reports: Clockify can generate summary reports that provide an overview of your time data, including your total hours worked, billable hours, and non-billable hours.\n\n3. Detailed reports: You can also generate detailed reports that show a breakdown of your time data by day, week, month, or year.\n\n4. Exportable reports: Clockify allows you to export your reports in a variety of formats, including PDF, CSV, and Excel.\n\n5. Dashboard: Clockify has a dashboard that provides a real-time overview of your time data, including your current and upcoming tasks, as well as your progress towards your goals.\n\nOverall, Clockify's reporting system can provide valuable insights into your work habits and productivity, helping you make better decisions about how to manage your time and resources."}, {"role": "user", "content": "Can Clockify be integrated with other project management tools besides Asana, Trello, and Basecamp?"}, {"role": "assistant", "content": "Yes, Clockify can be integrated with other project management tools as well. In addition to Asana, Trello, and Basecamp, Clockify also integrates with many other popular project management tools, including:\n\n1. Jira: Jira is a popular project management tool used by many software development teams. With Clockify's Jira integration, you can track your time directly from Jira issues.\n\n2. ClickUp: ClickUp is a flexible task management tool that can be customized to fit any workflow. Clockify's ClickUp integration allows you to track time on individual tasks or projects.\n\n3. Monday.com: Monday.com is a visual project management tool that allows teams to collaborate and manage their work from a single platform. Clockify's Monday.com integration allows you to track time on tasks and projects directly from the Monday.com interface.\n\n4. Todoist: Todoist is a popular task management app that can be used to manage personal to-do lists or team projects. Clockify's Todoist integration allows you to track time on individual tasks or projects.\n\n5. MeisterTask: MeisterTask is a project management tool that allows teams to collaborate and manage their work from a single platform. Clockify's MeisterTask integration allows you to track time on individual tasks or projects directly from the MeisterTask interface.\n\nThere are many other project management tools that Clockify integrates with, so you can choose the one that works best for your team and your workflow."}]} 5 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What tips do you have for balancing work and home life?"}, {"role": "assistant", "content": "Here are a few tips for balancing work and home life:\n\n1. Set clear boundaries: Establish clear boundaries between work and home life. Try to stick to a regular work schedule and avoid working during your personal time.\n\n2. Prioritize your tasks: Prioritize your tasks to make sure you are accomplishing the most important things first. This can help you avoid wasting time on less important tasks and free up more time for your personal life.\n\n3. Take breaks: Taking regular breaks can help you stay focused and avoid burnout. Use your breaks to relax, exercise, or spend time with loved ones.\n\n4. Use technology to your advantage: Use technology to help you manage your work and personal life. For example, use a calendar app to schedule your work and personal commitments, or use a task management app to keep track of your to-do list.\n\n5. Learn to say no: Saying no to work commitments that don't align with your priorities can free up more time for your personal life.\n\nRemember that achieving a perfect balance between work and home life is not always possible. But by implementing these tips, you can create a healthy balance that works for you."}, {"role": "user", "content": "How can I develop better time management skills?"}, {"role": "assistant", "content": "Developing better time management skills can be challenging, but it is important for achieving your personal and professional goals. Here are some tips to help you improve your time management skills:\n\n1. Track your time: Start by tracking your time for a week or two to get a sense of how you are currently spending your time. This will help you identify any time-wasting activities and prioritize your tasks.\n\n2. Set goals: Set clear and specific goals for what you want to achieve in your personal and professional life. This will help you prioritize your tasks and stay focused on what really matters.\n\n3. Prioritize your tasks: Identify the most important tasks and prioritize them based on their urgency and importance. Use a to-do list or a task management app to keep track of your tasks.\n\n4. Use time management tools: There are many time management tools available, such as calendars, timers, and task lists. Use these tools to help you stay organized and on track.\n\n5. Learn to say no: Saying no to non-essential tasks or commitments can help you free up more time for the things that matter most.\n\n6. Take breaks: Taking regular breaks can help you stay focused and avoid burnout. Use your breaks to relax, exercise, or spend time with loved ones.\n\nRemember that developing better time management skills takes time and practice. Be patient with yourself and stay committed to your goals."}, {"role": "user", "content": "Can you recommend any particular time management tools that would be helpful?"}, {"role": "assistant", "content": "Sure! Here are some time management tools that you may find helpful:\n\n1. Calendar apps: Calendar apps like Google Calendar and Apple Calendar can help you schedule your appointments and tasks, set reminders, and keep track of your schedule.\n\n2. Task management apps: Task management apps like Trello, Asana, and Todoist can help you create to-do lists, set deadlines for your tasks, and prioritize your work.\n\n3. Pomodoro timers: Pomodoro timers like Focus@Will and Tomato Timer can help you stay focused by breaking your work into small, manageable chunks of time.\n\n4. Time-tracking apps: Time-tracking apps like RescueTime and Toggl can help you monitor how much time you spend on different tasks and activities, and identify areas where you can improve your productivity.\n\n5. Note-taking apps: Note-taking apps like Evernote and OneNote can help you jot down ideas, take notes during meetings, and keep track of important information.\n\n6. Distraction-blocker apps: Distraction-blocker apps like Freedom and Cold Turkey can help you stay focused by blocking distracting websites and apps during set times of day.\n\nRemember, the key to effective time management is finding a tool or set of tools that work best for you, and then using them consistently to help you stay focused and productive."}, {"role": "user", "content": "How can I improve my work efficiency?"}, {"role": "assistant", "content": "Improving your work efficiency can help you be more productive, complete your tasks more quickly, and reduce your stress levels. Here are some tips to help you improve your work efficiency:\n\n1. Set clear goals: Set clear and specific goals for what you want to achieve and break them down into smaller, actionable steps.\n\n2. Eliminate distractions: Eliminate distractions as much as possible by turning off notifications, closing unnecessary tabs or apps, and finding a quiet space to work.\n\n3. Prioritize your tasks: Identify the most important tasks and prioritize them based on their urgency and importance.\n\n4. Use time management tools: Use tools like calendars, timers, and task lists to help you stay organized and on track.\n\n5. Automate tasks: Automate tasks like email management and scheduling appointments to free up more time for important work.\n\n6. Delegate tasks: Delegate tasks where possible to colleagues or employees to help you focus on your most important responsibilities.\n\n7. Take breaks: Taking regular breaks can help you stay focused and avoid burnout. Use your breaks to relax, exercise, or spend time with loved ones.\n\nRemember, improving your work efficiency takes time and practice. Be patient with yourself and stay committed to your goals."}]} 6 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What best practices should I follow when designing a website?"}, {"role": "assistant", "content": "Here are some best practices to follow when designing a website:\n\n1. Keep it simple and easy to navigate: Make sure your website is easy to navigate and that visitors can find what they are looking for quickly.\n\n2. Use a responsive design: Ensure that your website is optimized for all devices and screen sizes, including mobile phones and tablets.\n\n3. Use high-quality images and graphics: Use high-quality images and graphics to make your website visually appealing.\n\n4. Use colors and fonts consistently: Choose colors and fonts that match your brand and use them consistently throughout your website.\n\n5. Make it easy to read: Use clear and concise language, and choose a font size and style that is easy to read.\n\n6. Use white space effectively: Use white space to make your website look less cluttered and easier to read.\n\n7. Ensure fast loading speeds: Optimize your website's loading speed to ensure that visitors don't have to wait long to access your content.\n\n8. Use SEO best practices: Optimize your website for search engines by using keywords, meta-descriptions, and other SEO best practices.\n\n9. Test your website: Regularly test your website to ensure that it is functioning properly and that any issues are addressed quickly.\n\nBy following these best practices, you can create a website that is user-friendly, visually appealing, and easy to navigate, and that effectively communicates your brand's message to your target audience."}, {"role": "user", "content": "How can I optimize my website's loading speed?"}, {"role": "assistant", "content": "Here are some ways to optimize your website's loading speed:\n\n1. Minimize HTTP requests: Minimize the number of HTTP requests by reducing the number of files on your website, combining files, and using CSS sprites.\n\n2. Optimize images: Optimize your images by compressing them, reducing their size, and using the right file format.\n\n3. Use a content delivery network (CDN): Use a CDN to reduce the load time of your website by serving content from the nearest server to the user.\n\n4. Enable browser caching: Enable browser caching to store static files on the user's computer, which reduces the time it takes to load the website on subsequent visits.\n\n5. Minimize plugins and scripts: Minimize the number of plugins and scripts used on your website to reduce their impact on loading speed.\n\n6. Reduce server response time: Optimize your server response time by using a fast hosting provider, reducing the number of requests, and using caching and content compression.\n\n7. Use a fast theme or template: Use a fast theme or template that is well-optimized for speed.\n\n8. Minimize the use of redirects: Minimize the use of redirects, as they can slow down the loading speed of your website.\n\nBy following these tips, you can optimize your website's loading speed and create a better user experience for your visitors."}, {"role": "user", "content": "How can I make sure my website is optimized for search engines?"}, {"role": "assistant", "content": "Here are some ways to optimize your website for search engines:\n\n1. Conduct keyword research: Conduct keyword research to identify the keywords and phrases that your target audience is using to search for your products or services.\n\n2. Use keywords in your content: Use your target keywords in your content, including in your titles, headings, meta descriptions, and body copy.\n\n3. Create high-quality content: Create high-quality, informative content that provides value to your target audience and encourages them to share it.\n\n4. Use meta descriptions and title tags: Use meta descriptions and title tags to provide a brief summary of your content and make it easier for search engines to understand what your website is about.\n\n5. Use header tags: Use header tags (H1, H2, H3, etc.) to organize your content and make it easier for search engines to understand the hierarchy of your content.\n\n6. Optimize images: Optimize your images by using descriptive filenames, alt tags, and captions that include your target keywords.\n\n7. Build high-quality backlinks: Build high-quality backlinks from reputable websites that are relevant to your niche.\n\n8. Use social media: Use social media to promote your content and build your online presence, which can help improve your search engine rankings.\n\n9. Make sure your website is mobile-friendly: Ensure that your website is optimized for mobile devices, as mobile-friendly websites tend to rank higher in search engine results.\n\nBy following these tips, you can optimize your website for search engines and improve your visibility in search engine results pages (SERPs)."}, {"role": "user", "content": "How can I improve my website's accessibility for people with disabilities?"}, {"role": "assistant", "content": "Here are some ways to improve your website's accessibility for people with disabilities:\n\n1. Use descriptive and meaningful text for links: Use descriptive and meaningful text for links to help users understand where the link goes and what it does.\n\n2. Provide alternative text for images: Provide alternative text for images to help users with visual impairments understand the content of the image.\n\n3. Use headings and subheadings: Use headings and subheadings to structure your content and make it easier for screen readers to read.\n\n4. Use color with care: Use color with care and ensure that color is not the only way to convey information or distinguish between different elements on your website.\n\n5. Provide transcripts for audio and video content: Provide transcripts for audio and video content to help users with hearing impairments understand the content.\n\n6. Use clear and concise language: Use clear and concise language to make your content easy to understand for users with cognitive disabilities.\n\n7. Ensure the website is keyboard accessible: Ensure that your website is keyboard accessible to help users with motor disabilities navigate your website.\n\n8. Make sure your website is responsive: Ensure that your website is responsive and can be easily accessed on different devices and screen sizes.\n\n9. Use accessibility tools: Use accessibility tools such as screen readers, screen magnifiers, and other assistive technologies to test your website's accessibility.\n\nBy following these tips, you can make your website more accessible to people with disabilities and provide a better user experience for all users."}]} 7 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What steps should I take to reduce taxes on my income?"}, {"role": "assistant", "content": "There are several steps you can take to reduce taxes on your income:\n\n1. Contribute to a retirement account: Contributions to a traditional IRA or a 401(k) can reduce your taxable income and save you money on taxes.\n\n2. Itemize your deductions: If you have significant expenses such as mortgage interest, medical expenses, or charitable contributions, itemizing your deductions may reduce your taxable income.\n\n3. Take advantage of tax credits: Tax credits, such as the Earned Income Tax Credit or the Child Tax Credit, can reduce your tax bill dollar for dollar.\n\n4. Invest in tax-free or tax-deferred accounts: Investments in tax-free or tax-deferred accounts such as municipal bonds, Roth IRAs, or 529 plans can reduce your taxable income.\n\n5. Consider income splitting: If you are married, consider splitting income with your spouse to reduce your overall tax bill.\n\nIt\u2019s important to consult with a tax professional or financial advisor to determine the best strategy for your specific situation."}, {"role": "user", "content": "Can you give me an example of a tax credit that can help reduce my taxes?"}, {"role": "assistant", "content": "Sure! One example of a tax credit that can help reduce your taxes is the Child Tax Credit. This is a credit of up to $2,000 per child under the age of 17 that can be claimed on your tax return. To qualify for the full credit, your income must be below a certain threshold (currently $400,000 for married filing jointly, or $200,000 for all other filers).\n\nIf your income is above the threshold, you may still be eligible for a partial credit. The credit is partially refundable, meaning that if the credit exceeds your tax liability, you may receive a refund for the difference up to $1,400 per child.\n\nTo claim the Child Tax Credit, you must have a qualifying child who meets certain criteria, such as being a U.S. citizen, living with you for more than half the year, and being related to you.\n\nIt\u2019s important to note that tax laws can change, so it\u2019s always a good idea to check with a tax professional or use tax preparation software to ensure you are claiming all the credits and deductions you are eligible for."}, {"role": "user", "content": "Can you explain the difference between a traditional IRA and a Roth IRA?"}, {"role": "assistant", "content": "Yes, I can explain the difference between a traditional IRA and a Roth IRA.\n\nA traditional IRA is a retirement account in which you can make pre-tax contributions, meaning you can deduct your contribution from your taxable income in the year you make it. The money in your traditional IRA grows tax-deferred, which means you won\u2019t pay taxes on the earnings until you withdraw the money in retirement. When you withdraw money from a traditional IRA, it is taxed as ordinary income.\n\nA Roth IRA, on the other hand, is a retirement account in which you make after-tax contributions, meaning you don\u2019t get a tax deduction for your contribution in the year you make it. However, the money in your Roth IRA grows tax-free, which means you won\u2019t pay taxes on the earnings when you withdraw the money in retirement. Additionally, qualified withdrawals from a Roth IRA are tax-free.\n\nThe difference between the two types of accounts is primarily when you pay taxes: with a traditional IRA, you pay taxes on the money when you withdraw it in retirement, while with a Roth IRA, you pay taxes on the money when you contribute to the account.\n\nIt\u2019s important to note that both types of accounts have contribution limits and other rules, so it\u2019s a good idea to consult with a financial advisor or tax professional to determine which type of account is best for your individual situation."}, {"role": "user", "content": "What is the contribution limit for a Roth IRA in 2021?"}, {"role": "assistant", "content": "The contribution limit for a Roth IRA in 2021 is $6,000 for individuals under age 50, and $7,000 for those age 50 and above. This means that you can contribute up to $6,000 or $7,000 (depending on your age) per year to your Roth IRA, as long as your income falls below certain limits.\n\nIt\u2019s important to note that contribution limits may change from year to year, so it\u2019s always a good idea to check the current year\u2019s limits before making any contributions. Additionally, there are income limits that determine whether you are eligible to contribute to a Roth IRA, and those limits can also change from year to year.\n\nIf you have any questions about Roth IRA contributions or eligibility, it\u2019s a good idea to consult with a financial advisor or tax professional."}, {"role": "user", "content": "Can you explain the difference between a 401(k) and an IRA?"}, {"role": "assistant", "content": "Yes, I can explain the difference between a 401(k) and an IRA.\n\nA 401(k) is an employer-sponsored retirement plan that allows employees to contribute a portion of their pre-tax salary to the plan. Employers may also offer matching contributions up to a certain amount. The money in a 401(k) grows tax-deferred, meaning you won\u2019t pay taxes on the earnings until you withdraw the money in retirement. Contributions to a 401(k) are subject to annual limits, which are set by the IRS.\n\nAn IRA, or individual retirement account, is a retirement account that you can set up on your own, outside of an employer-sponsored plan. IRAs come in two main types: traditional and Roth. A traditional IRA allows you to make pre-tax contributions, meaning you can deduct your contribution from your taxable income in the year you make it. The money in a traditional IRA grows tax-deferred, and you\u2019ll pay taxes on the money when you withdraw it in retirement. With a Roth IRA, you make after-tax contributions, meaning you don\u2019t get a tax deduction for your contribution in the year you make it. The money in a Roth IRA grows tax-free, and qualified withdrawals from a Roth IRA are tax-free.\n\nThe main difference between a 401(k) and an IRA is that a 401(k) is an employer-sponsored plan, while an IRA is an individual retirement account that you set up on your own. Additionally, 401(k) plans may have higher contribution limits than IRAs, and employers may offer matching contributions or other incentives to encourage employees to participate in the plan.\n\nIt\u2019s a good idea to consult with a financial advisor or tax professional to determine which type of account is best for your individual situation."}]} 8 | {"system": "You are a helpful assistant.", "conversations": [{"role": "user", "content": "What strategies should I use to focus better while studying?"}, {"role": "assistant", "content": "Here are some strategies that can help you focus better while studying:\n\n1. Create a conducive environment: Find a quiet, distraction-free location to study. Make sure the lighting is appropriate, and the temperature is comfortable.\n\n2. Set a study schedule: Create a study schedule that works for you and stick to it. This helps you stay focused and avoids procrastination.\n\n3. Take breaks: Taking short breaks between study sessions can help you stay focused and retain information better.\n\n4. Eliminate distractions: Turn off your phone, log out from social media, and avoid any other distractions that can prevent you from concentrating.\n\n5. Use active learning techniques: Use active learning techniques such as note-taking, summarizing, and teaching others to help you stay engaged.\n\n6. Stay motivated: Set goals for yourself and reward yourself when you achieve them. This can help you stay motivated and focused.\n\n7. Get enough sleep and exercise: A healthy lifestyle can help you stay alert and focused while studying. Make sure you get enough sleep and exercise regularly.\n\nRemember, every individual is different, and what works for one person may not work for another. Find what works best for you and stick to it."}, {"role": "user", "content": "Can you give me some examples of active learning techniques that I can use while studying?"}, {"role": "assistant", "content": "Sure! Here are some examples of active learning techniques that you can use while studying:\n\n1. Summarizing: Summarize the main ideas in your own words. This helps you retain the information better.\n\n2. Note-taking: Take notes while you\u2019re reading or listening to lectures. This helps you focus your attention and remember the key points.\n\n3. Teaching: Explain the concepts you\u2019ve learned to someone else. This helps you understand the material better and identify areas where you need further clarification.\n\n4. Practice questions: Complete practice questions or quizzes to test your understanding of the material.\n\n5. Mnemonics: Use mnemonics (memory aids) to help you remember important information. For example, using acronyms or visual associations.\n\n6. Group study: Study with a group of peers and share your knowledge with each other. This helps you learn from different perspectives and keeps you motivated.\n\n7. Mind mapping: Create visual diagrams or mind maps to organize information and understand the relationships between different concepts.\n\nRemember, using a combination of different active learning techniques can help you to better understand and retain information."}, {"role": "user", "content": "How can I incorporate active learning techniques while studying for math?"}, {"role": "assistant", "content": "Math is a subject that requires a lot of practice, so incorporating active learning techniques can be very helpful in mastering the concepts. Here are some ways you can use active learning techniques while studying for math:\n\n1. Practice problems: Math requires a lot of practice to master the concepts. Solve practice problems and work through examples to reinforce your understanding of the material.\n\n2. Explain concepts: Explain the math concepts you\u2019ve learned to someone else. This helps you solidify your understanding and identify areas where you need further clarification.\n\n3. Create flashcards: Create flashcards for formulas, equations, and definitions. Use them to quiz yourself and help you memorize the material.\n\n4. Write out explanations: Write out explanations of the steps you took to solve a problem. This helps you identify areas where you may have made mistakes and reinforce your understanding of the material.\n\n5. Work in groups: Study with a group of peers and work through problems together. This helps you learn from different perspectives and keeps you motivated.\n\n6. Use online resources: There are many online resources available for math, including interactive tutorials and videos. Use them to reinforce your understanding of the material.\n\nRemember, math can be challenging, but incorporating active learning techniques can help you to better understand and retain the material."}, {"role": "user", "content": "How can I manage my time effectively while studying for multiple exams?"}, {"role": "assistant", "content": "Managing your time effectively while studying for multiple exams can be challenging, but here are some tips that can help:\n\n1. Create a study schedule: Make a schedule that outlines your study sessions for each subject. Consider your other commitments and allocate sufficient time for each subject.\n\n2. Prioritize the most important exams: Determine which exams are the most important and allocate more study time to these subjects.\n\n3. Break down the material: Break down the material into smaller, manageable chunks. This makes it easier to study and makes the task less overwhelming.\n\n4. Use study aids: Use study aids such as flashcards or summary notes to help you study more efficiently.\n\n5. Avoid multitasking: Avoid multitasking and focus on one subject at a time. This helps you to concentrate better and retain information more effectively.\n\n6. Take breaks: Taking regular breaks between study sessions can help you to stay focused and avoid burnout.\n\n7. Review regularly: Regularly review your notes and study materials to reinforce your understanding of the material.\n\n8. Stay organized: Keep your study materials organized and easily accessible. This saves time and helps you to focus on studying rather than searching for materials.\n\nRemember, effective time management is crucial when studying for multiple exams. By creating a study schedule, breaking down the material, prioritizing tasks, and staying organized, you can manage your time effectively and achieve better results."}]} 9 | -------------------------------------------------------------------------------- /data/test_data_loading.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import json 3 | 4 | 5 | @pytest.fixture 6 | def load_data(): 7 | with open("function_calling_demo.jsonl", "r", encoding="utf8") as f: 8 | data_list = f.readlines() 9 | return data_list 10 | 11 | 12 | def test_data_loading(load_data): 13 | # Verify data was loaded correctly 14 | assert load_data is not None, "Data should be loaded" 15 | assert len(load_data) > 0, "Data list should not be empty" 16 | 17 | # Verify each line is a valid JSON with required fields 18 | for line in load_data: 19 | try: 20 | data = json.loads(line) 21 | assert isinstance(data, dict), ( 22 | "Each line should be a JSON object (dictionary)" 23 | ) 24 | 25 | # Check for "conversations" key 26 | assert "conversations" in data, ( 27 | "'conversations' key is missing in the JSON object" 28 | ) 29 | assert isinstance(data["conversations"], list), ( 30 | "'conversations' should be a list" 31 | ) 32 | 33 | # Check for 'user' and 'assistant' roles within "conversations" 34 | roles = {conv["role"] for conv in data["conversations"] if "role" in conv} 35 | assert "user" in roles, "Role 'user' is missing in conversations" 36 | assert "assistant" in roles, "Role 'assistant' is missing in conversations" 37 | 38 | except json.JSONDecodeError: 39 | pytest.fail("Each line in the data file should be valid JSON") 40 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Dict, List 3 | 4 | import torch 5 | from loguru import logger 6 | from torch.utils.data import Dataset 7 | from utils.tool_utils import function_formatter 8 | 9 | 10 | class SFTDataset(Dataset): 11 | def __init__(self, file, tokenizer, max_seq_length, template): 12 | self.tokenizer = tokenizer 13 | self.system_format = template["system_format"] 14 | self.user_format = template["user_format"] 15 | self.assistant_format = template["assistant_format"] 16 | self.tool_format = template["tool_format"] 17 | self.function_format = template["function_format"] 18 | self.observation_format = template["observation_format"] 19 | 20 | self.max_seq_length = max_seq_length 21 | logger.info("Loading data: {}".format(file)) 22 | with open(file, "r", encoding="utf8") as f: 23 | data_list = f.readlines() 24 | logger.info("There are {} data in dataset".format(len(data_list))) 25 | self.data_list = data_list 26 | 27 | def __len__(self): 28 | return len(self.data_list) 29 | 30 | def __getitem__(self, index): 31 | data = self.data_list[index] 32 | data = json.loads(data) 33 | input_ids, target_mask = [], [] 34 | 35 | # setting system information 36 | if self.system_format is not None: 37 | system = data["system"].strip() if "system" in data.keys() else self.system 38 | 39 | if system is not None: 40 | system_text = self.system_format.format(content=system) 41 | input_ids = self.tokenizer.encode(system_text, add_special_tokens=False) 42 | target_mask = [0] * len(input_ids) 43 | 44 | conversations = data["conversations"] 45 | 46 | input_buffer = "" 47 | for i in range(len(conversations)): 48 | role = conversations[i]["role"] 49 | content = conversations[i]["content"].strip() 50 | 51 | if role != "assistant": 52 | if role == "user": 53 | human = self.user_format.format( 54 | content=content, stop_token=self.tokenizer.eos_token 55 | ) 56 | input_buffer += human 57 | 58 | elif role == "function_call": 59 | tool_calls = function_formatter(json.loads(content)) 60 | function = self.function_format.format(content=tool_calls) 61 | input_buffer += function 62 | 63 | elif role == "observation": 64 | observation = self.observation_format.format(content=content) 65 | input_buffer += observation 66 | else: 67 | assistant = self.assistant_format.format( 68 | content=content, stop_token=self.tokenizer.eos_token 69 | ) 70 | 71 | input_tokens = self.tokenizer.encode( 72 | input_buffer, add_special_tokens=False 73 | ) 74 | output_tokens = self.tokenizer.encode( 75 | assistant, add_special_tokens=False 76 | ) 77 | 78 | input_ids += input_tokens + output_tokens 79 | target_mask += [0] * len(input_tokens) + [1] * len(output_tokens) 80 | input_buffer = "" 81 | 82 | assert len(input_ids) == len(target_mask) 83 | 84 | input_ids = input_ids[: self.max_seq_length] 85 | target_mask = target_mask[: self.max_seq_length] 86 | attention_mask = [1] * len(input_ids) 87 | assert len(input_ids) == len(target_mask) == len(attention_mask) 88 | inputs = { 89 | "input_ids": input_ids, 90 | "attention_mask": attention_mask, 91 | "target_mask": target_mask, 92 | } 93 | return inputs 94 | 95 | 96 | class SFTDataCollator(object): 97 | def __init__(self, tokenizer, max_seq_length): 98 | self.tokenizer = tokenizer 99 | self.max_seq_length = max_seq_length 100 | self.pad_token_id = tokenizer.pad_token_id 101 | 102 | def __call__(self, batch: List[Dict[str, Any]]) -> Dict[str, Any]: 103 | # Find the maximum length in the batch 104 | lengths = [len(x["input_ids"]) for x in batch if x["input_ids"] is not None] 105 | # Take the maximum length in the batch, if it exceeds max_seq_length, take max_seq_length 106 | batch_max_len = min(max(lengths), self.max_seq_length) 107 | 108 | input_ids_batch, attention_mask_batch, target_mask_batch = [], [], [] 109 | # Truncate and pad 110 | for x in batch: 111 | input_ids = x["input_ids"] 112 | attention_mask = x["attention_mask"] 113 | target_mask = x["target_mask"] 114 | if input_ids is None: 115 | logger.info("some input_ids is None") 116 | continue 117 | padding_len = batch_max_len - len(input_ids) 118 | # Pad 119 | input_ids = input_ids + [self.pad_token_id] * padding_len 120 | attention_mask = attention_mask + [0] * padding_len 121 | target_mask = target_mask + [0] * padding_len 122 | # Truncate 123 | input_ids = input_ids[: self.max_seq_length] 124 | attention_mask = attention_mask[: self.max_seq_length] 125 | target_mask = target_mask[: self.max_seq_length] 126 | 127 | input_ids_batch.append(input_ids) 128 | attention_mask_batch.append(attention_mask) 129 | target_mask_batch.append(target_mask) 130 | 131 | # Convert lists to tensors to get the final model input 132 | input_ids_batch = torch.tensor(input_ids_batch, dtype=torch.long) 133 | attention_mask_batch = torch.tensor(attention_mask_batch, dtype=torch.long) 134 | target_mask_batch = torch.tensor(target_mask_batch, dtype=torch.long) 135 | 136 | labels = torch.where(target_mask_batch == 1, input_ids_batch, -100) 137 | inputs = { 138 | "input_ids": input_ids_batch, 139 | "attention_mask": attention_mask_batch, 140 | "labels": labels, 141 | } 142 | return inputs 143 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass 3 | 4 | import torch 5 | from peft import LoraConfig 6 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig 7 | from trl import SFTTrainer, SFTConfig 8 | 9 | from dataset import SFTDataCollator, SFTDataset 10 | from utils.constants import model2template 11 | 12 | 13 | @dataclass 14 | class LoraTrainingArguments: 15 | per_device_train_batch_size: int 16 | gradient_accumulation_steps: int 17 | num_train_epochs: int 18 | lora_rank: int 19 | lora_alpha: int 20 | lora_dropout: int 21 | 22 | 23 | def train_lora( 24 | model_id: str, context_length: int, training_args: LoraTrainingArguments 25 | ): 26 | assert model_id in model2template, f"model_id {model_id} not supported" 27 | lora_config = LoraConfig( 28 | r=training_args.lora_rank, 29 | target_modules=[ 30 | "q_proj", 31 | "v_proj", 32 | ], 33 | lora_alpha=training_args.lora_alpha, 34 | lora_dropout=training_args.lora_dropout, 35 | task_type="CAUSAL_LM", 36 | ) 37 | 38 | # Load model in 4-bit to do qLoRA 39 | bnb_config = BitsAndBytesConfig( 40 | load_in_4bit=True, 41 | bnb_4bit_quant_type="nf4", 42 | bnb_4bit_compute_dtype=torch.bfloat16, 43 | ) 44 | 45 | training_args = SFTConfig( 46 | per_device_train_batch_size=training_args.per_device_train_batch_size, 47 | gradient_accumulation_steps=training_args.gradient_accumulation_steps, 48 | warmup_steps=100, 49 | learning_rate=2e-4, 50 | bf16=True, 51 | logging_steps=20, 52 | output_dir="outputs", 53 | optim="paged_adamw_8bit", 54 | remove_unused_columns=False, 55 | num_train_epochs=training_args.num_train_epochs, 56 | max_seq_length=context_length, 57 | ) 58 | tokenizer = AutoTokenizer.from_pretrained( 59 | model_id, 60 | use_fast=True, 61 | ) 62 | model = AutoModelForCausalLM.from_pretrained( 63 | model_id, 64 | quantization_config=bnb_config, 65 | device_map={"": 0}, 66 | token=os.environ["HF_TOKEN"], 67 | ) 68 | 69 | # Load dataset 70 | dataset = SFTDataset( 71 | file="data/demo_data.jsonl", 72 | tokenizer=tokenizer, 73 | max_seq_length=context_length, 74 | template=model2template[model_id], 75 | ) 76 | 77 | # Define trainer 78 | trainer = SFTTrainer( 79 | model=model, 80 | train_dataset=dataset, 81 | args=training_args, 82 | peft_config=lora_config, 83 | data_collator=SFTDataCollator(tokenizer, max_seq_length=context_length), 84 | ) 85 | 86 | # Train model 87 | trainer.train() 88 | 89 | # save model 90 | trainer.save_model("outputs") 91 | 92 | # remove checkpoint folder 93 | os.system("rm -rf outputs/checkpoint-*") 94 | 95 | # upload lora weights and tokenizer 96 | print("Training Completed.") 97 | 98 | 99 | if __name__ == "__main__": 100 | # Define training arguments for LoRA fine-tuning 101 | training_args = LoraTrainingArguments( 102 | num_train_epochs=3, 103 | per_device_train_batch_size=2, 104 | gradient_accumulation_steps=2, 105 | lora_rank=8, 106 | lora_alpha=16, 107 | lora_dropout=0.05, 108 | ) 109 | 110 | # Set model ID and context length 111 | model_id = "Qwen/Qwen1.5-0.5B" 112 | context_length = 2048 113 | 114 | # Start LoRA fine-tuning 115 | train_lora( 116 | model_id=model_id, context_length=context_length, training_args=training_args 117 | ) 118 | -------------------------------------------------------------------------------- /full_automation.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import requests 5 | import yaml 6 | from loguru import logger 7 | from huggingface_hub import HfApi 8 | 9 | from demo import LoraTrainingArguments, train_lora 10 | from utils.constants import model2base_model, model2size 11 | from utils.flock_api import get_task, submit_task 12 | from utils.gpu_utils import get_gpu_type 13 | 14 | HF_USERNAME = os.environ["HF_USERNAME"] 15 | 16 | if __name__ == "__main__": 17 | task_id = os.environ["TASK_ID"] 18 | # load trainin args 19 | # define the path of the current file 20 | current_folder = os.path.dirname(os.path.realpath(__file__)) 21 | with open(f"{current_folder}/training_args.yaml", "r") as f: 22 | all_training_args = yaml.safe_load(f) 23 | 24 | task = get_task(task_id) 25 | # log the task info 26 | logger.info(json.dumps(task, indent=4)) 27 | # download data from a presigned url 28 | data_url = task["data"]["training_set_url"] 29 | context_length = task["data"]["context_length"] 30 | max_params = task["data"]["max_params"] 31 | 32 | # filter out the model within the max_params 33 | model2size = {k: v for k, v in model2size.items() if v <= max_params} 34 | all_training_args = {k: v for k, v in all_training_args.items() if k in model2size} 35 | logger.info(f"Models within the max_params: {all_training_args.keys()}") 36 | # download in chunks 37 | response = requests.get(data_url, stream=True) 38 | with open("data/demo_data.jsonl", "wb") as f: 39 | for chunk in response.iter_content(chunk_size=8192): 40 | f.write(chunk) 41 | 42 | # train all feasible models and merge 43 | for model_id in all_training_args.keys(): 44 | logger.info(f"Start to train the model {model_id}...") 45 | # if OOM, proceed to the next model 46 | try: 47 | train_lora( 48 | model_id=model_id, 49 | context_length=context_length, 50 | training_args=LoraTrainingArguments(**all_training_args[model_id]), 51 | ) 52 | except RuntimeError as e: 53 | logger.error(f"Error: {e}") 54 | logger.info("Proceed to the next model...") 55 | continue 56 | 57 | # generate a random repo id based on timestamp 58 | gpu_type = get_gpu_type() 59 | 60 | try: 61 | logger.info("Start to push the lora weight to the hub...") 62 | api = HfApi(token=os.environ["HF_TOKEN"]) 63 | repo_name = f"{HF_USERNAME}/task-{task_id}-{model_id.replace('/', '-')}" 64 | # check whether the repo exists 65 | try: 66 | api.create_repo( 67 | repo_name, 68 | exist_ok=False, 69 | repo_type="model", 70 | ) 71 | except Exception: 72 | logger.info( 73 | f"Repo {repo_name} already exists. Will commit the new version." 74 | ) 75 | 76 | commit_message = api.upload_folder( 77 | folder_path="outputs", 78 | repo_id=repo_name, 79 | repo_type="model", 80 | ) 81 | # get commit hash 82 | commit_hash = commit_message.oid 83 | logger.info(f"Commit hash: {commit_hash}") 84 | logger.info(f"Repo name: {repo_name}") 85 | # submit 86 | submit_task( 87 | task_id, repo_name, model2base_model[model_id], gpu_type, commit_hash 88 | ) 89 | logger.info("Task submitted successfully") 90 | except Exception as e: 91 | logger.error(f"Error: {e}") 92 | logger.info("Proceed to the next model...") 93 | finally: 94 | # cleanup merged_model and output 95 | os.system("rm -rf merged_model") 96 | os.system("rm -rf outputs") 97 | continue 98 | -------------------------------------------------------------------------------- /merge.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from peft import PeftModel 3 | from transformers import AutoModelForCausalLM, AutoTokenizer 4 | 5 | 6 | def merge_lora_to_base_model( 7 | model_name_or_path: str, adapter_name_or_path: str, save_path: str 8 | ): 9 | tokenizer = AutoTokenizer.from_pretrained( 10 | adapter_name_or_path, 11 | use_fast=True, 12 | ) 13 | model = AutoModelForCausalLM.from_pretrained( 14 | model_name_or_path, 15 | trust_remote_code=True, 16 | low_cpu_mem_usage=True, 17 | torch_dtype=torch.float16, 18 | device_map={"": "cpu"}, 19 | ) 20 | model = PeftModel.from_pretrained( 21 | model, adapter_name_or_path, device_map={"": "cpu"} 22 | ) 23 | model = model.merge_and_unload() 24 | 25 | tokenizer.save_pretrained(save_path) 26 | model.save_pretrained(save_path) 27 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.13.1 2 | transformers>=4.43.0,<=4.45.0 3 | peft>=0.10.0,<=0.13.2 4 | loguru 5 | trl>=0.9.3,<=0.9.6 6 | bitsandbytes 7 | pyyaml 8 | -------------------------------------------------------------------------------- /training_args.yaml: -------------------------------------------------------------------------------- 1 | Qwen/Qwen2.5-0.5B-Instruct: 2 | per_device_train_batch_size: 1 3 | gradient_accumulation_steps: 8 4 | num_train_epochs: 1 5 | lora_rank: 8 6 | lora_alpha: 16 7 | lora_dropout: 0.1 8 | 9 | Qwen/Qwen2.5-1.5B-Instruct: 10 | per_device_train_batch_size: 1 11 | gradient_accumulation_steps: 8 12 | num_train_epochs: 1 13 | lora_rank: 4 14 | lora_alpha: 8 15 | lora_dropout: 0.1 16 | 17 | Qwen/Qwen2.5-7B-Instruct: 18 | per_device_train_batch_size: 1 19 | gradient_accumulation_steps: 8 20 | num_train_epochs: 1 21 | lora_rank: 4 22 | lora_alpha: 8 23 | lora_dropout: 0.1 24 | -------------------------------------------------------------------------------- /utils/constants.py: -------------------------------------------------------------------------------- 1 | qwen_template = { 2 | "system_format": "<|im_start|>system\n{content}<|im_end|>\n", 3 | "user_format": "<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n", 4 | "assistant_format": "{content}<|im_end|>\n", 5 | "tool_format": "{content}", 6 | "function_format": "{content}", 7 | "observation_format": "<|im_start|>tool\n{content}<|im_end|>\n<|im_start|>assistant\n", 8 | "system": "You are a helpful assistant.", 9 | } 10 | 11 | model2template = { 12 | "Qwen/Qwen2.5-0.5B-Instruct": qwen_template, 13 | "Qwen/Qwen2.5-1.5B-Instruct": qwen_template, 14 | "Qwen/Qwen2.5-7B-Instruct": qwen_template, 15 | } 16 | 17 | model2size = { 18 | "Qwen/Qwen2.5-0.5B-Instruct": 494_000_000, 19 | "Qwen/Qwen2.5-1.5B-Instruct": 1_540_000_000, 20 | "Qwen/Qwen2.5-7B-Instruct": 7_620_000_000, 21 | } 22 | 23 | model2base_model = { 24 | "Qwen/Qwen2.5-0.5B-Instruct": "qwen1.5", 25 | "Qwen/Qwen2.5-1.5B-Instruct": "qwen1.5", 26 | "Qwen/Qwen2.5-7B-Instruct": "qwen1.5", 27 | } 28 | -------------------------------------------------------------------------------- /utils/flock_api.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import requests 5 | 6 | FLOCK_API_KEY = os.environ["FLOCK_API_KEY"] 7 | FED_LEDGER_BASE_URL = "https://fed-ledger-prod.flock.io/api/v1" 8 | 9 | 10 | def get_task(task_id: int): 11 | response = requests.request( 12 | "GET", f"{FED_LEDGER_BASE_URL}/tasks/get?task_id={task_id}" 13 | ) 14 | return response.json() 15 | 16 | 17 | def submit_task( 18 | task_id: int, hg_repo_id: str, base_model: str, gpu_type: str, revision: str 19 | ): 20 | payload = json.dumps( 21 | { 22 | "task_id": task_id, 23 | "data": { 24 | "hg_repo_id": hg_repo_id, 25 | "base_model": base_model, 26 | "gpu_type": gpu_type, 27 | "revision": revision, 28 | }, 29 | } 30 | ) 31 | headers = { 32 | "flock-api-key": FLOCK_API_KEY, 33 | "Content-Type": "application/json", 34 | } 35 | response = requests.request( 36 | "POST", 37 | f"{FED_LEDGER_BASE_URL}/tasks/submit-result", 38 | headers=headers, 39 | data=payload, 40 | ) 41 | if response.status_code != 200: 42 | raise Exception(f"Failed to submit task: {response.text}") 43 | return response.json() 44 | -------------------------------------------------------------------------------- /utils/gpu_utils.py: -------------------------------------------------------------------------------- 1 | from torch.cuda import get_device_name 2 | 3 | 4 | def get_gpu_type(): 5 | try: 6 | gpu_name = get_device_name(0) 7 | return gpu_name 8 | except Exception as e: 9 | return f"Error retrieving GPU type: {e}" 10 | -------------------------------------------------------------------------------- /utils/tool_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List, Tuple 2 | import json 3 | 4 | DEFAULT_TOOL_PROMPT = ( 5 | "You have access to the following tools:\n{tool_text}" 6 | "Use the following format if using a tool:\n" 7 | "```\n" 8 | "Action: tool name (one of [{tool_names}])\n" 9 | "Action Input: the input to the tool, in a JSON format representing the kwargs " 10 | """(e.g. ```{{"input": "hello world", "num_beams": 5}}```)\n""" 11 | "```\n" 12 | ) 13 | 14 | DEFAULT_FUNCTION_SLOTS = "Action: {name}\nAction Input: {arguments}\n" 15 | 16 | 17 | def tool_formater(tools: List[Dict[str, Any]]) -> str: 18 | tool_text = "" 19 | tool_names = [] 20 | for tool in tools: 21 | param_text = "" 22 | for name, param in tool["parameters"]["properties"].items(): 23 | required, enum, items = "", "", "" 24 | if name in tool["parameters"].get("required", []): 25 | required = ", required" 26 | 27 | if param.get("enum", None): 28 | enum = ", should be one of [{}]".format(", ".join(param["enum"])) 29 | 30 | if param.get("items", None): 31 | items = ", where each item should be {}".format( 32 | param["items"].get("type", "") 33 | ) 34 | 35 | param_text += " - {name} ({type}{required}): {desc}{enum}{items}\n".format( 36 | name=name, 37 | type=param.get("type", ""), 38 | required=required, 39 | desc=param.get("description", ""), 40 | enum=enum, 41 | items=items, 42 | ) 43 | 44 | tool_text += "> Tool Name: {name}\nTool Description: {desc}\nTool Args:\n{args}\n".format( 45 | name=tool["name"], desc=tool.get("description", ""), args=param_text 46 | ) 47 | tool_names.append(tool["name"]) 48 | 49 | return DEFAULT_TOOL_PROMPT.format( 50 | tool_text=tool_text, tool_names=", ".join(tool_names) 51 | ) 52 | 53 | 54 | def function_formatter(tool_calls, function_slots=DEFAULT_FUNCTION_SLOTS) -> str: 55 | functions: List[Tuple[str, str]] = [] 56 | if not isinstance(tool_calls, list): 57 | tool_calls = [tool_calls] # parrallel function calls 58 | 59 | for tool_call in tool_calls: 60 | functions.append( 61 | (tool_call["name"], json.dumps(tool_call["arguments"], ensure_ascii=False)) 62 | ) 63 | 64 | elements = [] 65 | for name, arguments in functions: 66 | text = function_slots.format(name=name, arguments=arguments) 67 | elements.append(text) 68 | 69 | return "\n".join(elements) + "\n" 70 | --------------------------------------------------------------------------------