├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── FAQ.md ├── LICENSE ├── LLaMA-Adapter-V2-arXiv.pdf ├── LLaMA-Adapter-arXiv.pdf ├── MODEL_CARD.md ├── README.md ├── README_llama.md ├── alpaca_finetuning_v1 ├── engine_finetuning.py ├── extract_adapter_from_checkpoint.py ├── finetuning.py ├── finetuning.sh ├── llama │ ├── __init__.py │ ├── generation.py │ ├── model.py │ └── tokenizer.py ├── models_llama_adapter.py └── util │ ├── datasets.py │ ├── lars.py │ ├── lr_decay.py │ ├── lr_sched.py │ ├── misc.py │ └── pos_embed.py ├── docs ├── chat_demo.png ├── langchain_LLaMA_AdapterV2_demo.ipynb ├── langchain_integration.md ├── logo_v1.png ├── logo_v2.png ├── logo_v3.png ├── logo_v4.png ├── multi_model_example_1.png ├── multi_model_example_2.png ├── multi_model_example_3.png ├── multi_model_example_4.png ├── multimodal.png ├── pipeline.png └── youtube.png ├── download.sh ├── example.py ├── generate.sh ├── gorilla ├── README.md ├── alpaca_finetuning_v1 │ ├── engine_finetuning.py │ ├── extract_adapter_from_checkpoint.py │ ├── finetuning.py │ ├── finetuning_hf.sh │ ├── finetuning_tf.sh │ ├── finetuning_th.sh │ ├── llama │ │ ├── __init__.py │ │ ├── generation.py │ │ ├── model.py │ │ └── tokenizer.py │ ├── models_llama_adapter.py │ └── util │ │ ├── datasets.py │ │ ├── lars.py │ │ ├── lr_decay.py │ │ ├── lr_sched.py │ │ ├── misc.py │ │ └── pos_embed.py ├── finetune │ ├── configs │ │ └── finetune │ │ │ ├── EN.yaml │ │ │ ├── gorilla_hf.yaml │ │ │ ├── gorilla_tf.yaml │ │ │ └── gorilla_th.yaml │ ├── conversation.py │ ├── data │ │ └── alpaca.py │ ├── data_preprocess.py │ ├── demo.py │ ├── engine_finetune.py │ ├── engine_pretrain.py │ ├── exps │ │ └── train │ │ │ ├── base │ │ │ └── run.sh │ │ │ └── rev │ │ │ └── run.sh │ ├── global_configs.py │ ├── main_finetune.py │ ├── main_pretrain.py │ ├── model │ │ ├── LLM │ │ │ ├── __init__.py │ │ │ ├── llama.py │ │ │ └── revllama.py │ │ ├── __init__.py │ │ ├── meta.py │ │ └── tokenizer.py │ ├── params.json │ ├── scripts │ │ ├── finetune │ │ │ ├── finetune_7B_gorilla_hf.sh │ │ │ ├── finetune_7B_gorilla_tf.sh │ │ │ └── finetune_7B_gorilla_th.sh │ │ └── tools │ │ │ ├── debug_finetune_131_7B.sh │ │ │ └── merge130.sh │ ├── submitit_pretrain.py │ ├── tools │ │ └── get_consolidated_ckpt.py │ ├── transformer.py │ └── util │ │ ├── crop.py │ │ ├── lr_decay.py │ │ ├── lr_sched.py │ │ ├── misc.py │ │ └── pos_embed.py ├── gorilla-main │ ├── .github │ │ └── ISSUE_TEMPLATE │ │ │ ├── apibench.md │ │ │ ├── custom-template.md │ │ │ ├── feature_request.md │ │ │ └── hosted-gorilla-.md │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── data │ │ ├── README.md │ │ ├── api │ │ │ ├── huggingface_api.jsonl │ │ │ ├── tensorflowhub_api.jsonl │ │ │ └── torchhub_api.jsonl │ │ ├── apibench │ │ │ ├── huggingface_eval.json │ │ │ ├── huggingface_train.json │ │ │ ├── tensorflow_eval.json │ │ │ ├── tensorflow_train.json │ │ │ ├── torchhub_eval.json │ │ │ └── torchhub_train.json │ │ └── apizoo │ │ │ └── shawnharmsen1.json │ ├── eval │ │ ├── README.md │ │ ├── eval-data │ │ │ ├── questions │ │ │ │ ├── huggingface │ │ │ │ │ ├── questions_huggingface_0_shot.jsonl │ │ │ │ │ ├── questions_huggingface_bm25.jsonl │ │ │ │ │ ├── questions_huggingface_gpt_index.jsonl │ │ │ │ │ └── questions_huggingface_oracle.jsonl │ │ │ │ ├── tensorflowhub │ │ │ │ │ ├── questions_tensorflowhub_0_shot.jsonl │ │ │ │ │ ├── questions_tensorflowhub_bm25.jsonl │ │ │ │ │ ├── questions_tensorflowhub_gpt_index.jsonl │ │ │ │ │ └── questions_tensorflowhub_oracle.jsonl │ │ │ │ └── torchhub │ │ │ │ │ ├── questions_torchhub_0_shot.jsonl │ │ │ │ │ ├── questions_torchhub_bm25.jsonl │ │ │ │ │ ├── questions_torchhub_gpt_index.jsonl │ │ │ │ │ └── questions_torchhub_oracle.jsonl │ │ │ └── responses │ │ │ │ ├── huggingface │ │ │ │ ├── response_huggingface_Gorilla_FT_0_shot.jsonl │ │ │ │ ├── response_huggingface_Gorilla_FT_bm25.jsonl │ │ │ │ ├── response_huggingface_Gorilla_FT_gpt_index.jsonl │ │ │ │ ├── response_huggingface_Gorilla_FT_oracle.jsonl │ │ │ │ ├── response_huggingface_Gorilla_RT_0_shot.jsonl │ │ │ │ ├── response_huggingface_Gorilla_RT_bm25.jsonl │ │ │ │ ├── response_huggingface_Gorilla_RT_gpt_index.jsonl │ │ │ │ └── response_huggingface_Gorilla_RT_oracle.jsonl │ │ │ │ ├── tensorflowhub │ │ │ │ ├── response_tensorflowhub_Gorilla_FT_0_shot.jsonl │ │ │ │ ├── response_tensorflowhub_Gorilla_FT_bm25.jsonl │ │ │ │ ├── response_tensorflowhub_Gorilla_FT_gpt_index.jsonl │ │ │ │ ├── response_tensorflowhub_Gorilla_FT_oracle.jsonl │ │ │ │ ├── response_tensorflowhub_Gorilla_RT_0_shot.jsonl │ │ │ │ ├── response_tensorflowhub_Gorilla_RT_bm25.jsonl │ │ │ │ ├── response_tensorflowhub_Gorilla_RT_gpt_index.jsonl │ │ │ │ └── response_tensorflowhub_Gorilla_RT_oracle.jsonl │ │ │ │ └── torchhub │ │ │ │ ├── response_torchhub_Gorilla_FT_0_shot.jsonl │ │ │ │ ├── response_torchhub_Gorilla_FT_bm25.jsonl │ │ │ │ ├── response_torchhub_Gorilla_FT_gpt_index.jsonl │ │ │ │ ├── response_torchhub_Gorilla_FT_oracle.jsonl │ │ │ │ ├── response_torchhub_Gorilla_RT_0_shot.jsonl │ │ │ │ ├── response_torchhub_Gorilla_RT_bm25.jsonl │ │ │ │ ├── response_torchhub_Gorilla_RT_gpt_index.jsonl │ │ │ │ └── response_torchhub_Gorilla_RT_oracle.jsonl │ │ ├── eval-scripts │ │ │ ├── ast_eval_hf.py │ │ │ ├── ast_eval_tf.py │ │ │ ├── ast_eval_th.py │ │ │ └── codebleu │ │ │ │ ├── __init__.py │ │ │ │ ├── bleu.py │ │ │ │ ├── dataflow_match.py │ │ │ │ ├── keywords │ │ │ │ ├── c_sharp.txt │ │ │ │ ├── java.txt │ │ │ │ └── python.txt │ │ │ │ ├── parser │ │ │ │ ├── DFG.py │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ ├── build.sh │ │ │ │ ├── tree-sitter-python │ │ │ │ │ ├── .gitattributes │ │ │ │ │ ├── .github │ │ │ │ │ │ └── workflows │ │ │ │ │ │ │ └── ci.yml │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── .npmignore │ │ │ │ │ ├── Cargo.toml │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── README.md │ │ │ │ │ ├── binding.gyp │ │ │ │ │ ├── bindings │ │ │ │ │ │ ├── node │ │ │ │ │ │ │ ├── binding.cc │ │ │ │ │ │ │ └── index.js │ │ │ │ │ │ └── rust │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── build.rs │ │ │ │ │ │ │ └── lib.rs │ │ │ │ │ ├── examples │ │ │ │ │ │ ├── compound-statement-without-trailing-newline.py │ │ │ │ │ │ ├── crlf-line-endings.py │ │ │ │ │ │ ├── mixed-spaces-tabs.py │ │ │ │ │ │ ├── multiple-newlines.py │ │ │ │ │ │ ├── python2-grammar-crlf.py │ │ │ │ │ │ ├── python2-grammar.py │ │ │ │ │ │ ├── python3-grammar-crlf.py │ │ │ │ │ │ ├── python3-grammar.py │ │ │ │ │ │ ├── python3.8_grammar.py │ │ │ │ │ │ ├── simple-statements-without-trailing-newline.py │ │ │ │ │ │ ├── tabs.py │ │ │ │ │ │ └── trailing-whitespace.py │ │ │ │ │ ├── grammar.js │ │ │ │ │ ├── package.json │ │ │ │ │ ├── queries │ │ │ │ │ │ ├── highlights.scm │ │ │ │ │ │ └── tags.scm │ │ │ │ │ ├── script │ │ │ │ │ │ ├── known_failures.txt │ │ │ │ │ │ └── parse-examples │ │ │ │ │ ├── src │ │ │ │ │ │ ├── grammar.json │ │ │ │ │ │ ├── node-types.json │ │ │ │ │ │ ├── parser.c │ │ │ │ │ │ ├── scanner.cc │ │ │ │ │ │ └── tree_sitter │ │ │ │ │ │ │ └── parser.h │ │ │ │ │ └── test │ │ │ │ │ │ ├── corpus │ │ │ │ │ │ ├── errors.txt │ │ │ │ │ │ ├── expressions.txt │ │ │ │ │ │ ├── literals.txt │ │ │ │ │ │ ├── pattern_matching.txt │ │ │ │ │ │ └── statements.txt │ │ │ │ │ │ └── highlight │ │ │ │ │ │ ├── keywords.py │ │ │ │ │ │ ├── parameters.py │ │ │ │ │ │ └── pattern_matching.py │ │ │ │ └── utils.py │ │ │ │ ├── readme.txt │ │ │ │ ├── syntax_check.py │ │ │ │ ├── syntax_match.py │ │ │ │ ├── utils.py │ │ │ │ └── weighted_ngram_match.py │ │ └── get_llm_responses.py │ ├── inference │ │ ├── README.md │ │ ├── apply_delta.py │ │ ├── example_questions │ │ │ └── example_questions.jsonl │ │ ├── gorilla_eval.py │ │ ├── requirements.txt │ │ └── serve │ │ │ ├── conv_template.py │ │ │ ├── gorilla_cli.py │ │ │ └── gorilla_falcon_cli.py │ └── requirements.txt └── inference │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── FAQ.md │ ├── LICENSE │ ├── MODEL_CARD.md │ ├── README.md │ ├── download.sh │ ├── example.py │ ├── gorilla_inference_full_finetune.py │ ├── gorilla_inference_llama_adapter_v1.py │ ├── llama │ ├── __init__.py │ ├── generation.py │ ├── model.py │ └── tokenizer.py │ ├── llama_for_adapter │ ├── __init__.py │ ├── generation.py │ ├── model.py │ └── tokenizer.py │ ├── requirements.txt │ └── setup.py ├── imagebind_LLM ├── ImageBind │ ├── .assets │ │ ├── bird_audio.wav │ │ ├── bird_image.jpg │ │ ├── car_audio.wav │ │ ├── car_image.jpg │ │ ├── dog_audio.wav │ │ └── dog_image.jpg │ ├── .gitignore │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── README.md │ ├── bpe │ │ └── bpe_simple_vocab_16e6.txt.gz │ ├── data.py │ ├── demo.py │ ├── model_card.md │ ├── models │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── imagebind_model.py │ │ ├── multimodal_preprocessors.py │ │ ├── pointbert │ │ │ ├── PointTransformer_8192point.yaml │ │ │ ├── checkpoint.py │ │ │ ├── dvae.py │ │ │ ├── logger.py │ │ │ ├── misc.py │ │ │ └── point_encoder.py │ │ └── transformer.py │ └── requirements.txt ├── README.md ├── convert_ckpt.py ├── data │ └── dataset.py ├── demo.py ├── demo_3d.py ├── docs │ └── train.md ├── engine_finetune.py ├── engine_pretrain.py ├── examples │ ├── airplane.pt │ ├── car.pt │ ├── door.pt │ ├── girl.jpg │ ├── girl_bgm.wav │ └── toilet.pt ├── exps │ ├── finetune.sh │ ├── finetune_ENCN.sh │ └── pretrain.sh ├── gradio_app.py ├── image_generate.py ├── llama │ ├── __init__.py │ ├── llama.py │ ├── llama_adapter.py │ ├── tokenizer.py │ └── utils.py ├── main_finetune.py ├── main_pretrain.py ├── requirements.txt ├── tools │ └── get_chinese_llama.py └── util │ ├── lr_sched.py │ └── misc.py ├── llama ├── __init__.py ├── generation.py ├── model.py └── tokenizer.py ├── llama_adapter_v2_chat65b ├── README.md ├── chat_demo.py ├── checkpoints │ └── model_zoo.md ├── conversation.py ├── environment.yml ├── llama │ ├── __init__.py │ ├── generation.py │ ├── model.py │ └── tokenizer.py ├── models_llama_adapter.py ├── scripts │ ├── srun_chat_llama65b_bias_scale_norm.sh │ └── torchrun_chat_llama65b_bias_scale_norm.sh └── util │ └── misc.py ├── llama_adapter_v2_multimodal7b ├── README.md ├── data │ └── dataset.py ├── demo.py ├── docs │ ├── eval.md │ └── train.md ├── engine_finetune.py ├── engine_pretrain.py ├── exps │ ├── finetune.sh │ └── pretrain.sh ├── gradio_app.py ├── llama │ ├── __init__.py │ ├── llama.py │ ├── llama_adapter.py │ ├── tokenizer.py │ └── utils.py ├── main_finetune.py ├── main_pretrain.py ├── requirements.txt └── util │ ├── evaluate_mme.py │ ├── extract_adapter_from_checkpoint.py │ ├── lr_sched.py │ └── misc.py ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── setup.py └── utils └── quantization.py /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v3.2.0 7 | hooks: 8 | - id: trailing-whitespace 9 | - id: end-of-file-fixer 10 | - id: check-yaml 11 | 12 | - repo: https://github.com/PyCQA/isort 13 | rev: 5.10.1 14 | hooks: 15 | - id: isort 16 | name: Format imports 17 | 18 | - repo: https://github.com/psf/black 19 | rev: 22.3.0 20 | hooks: 21 | - id: black 22 | name: Format code 23 | 24 | - repo: https://github.com/asottile/blacken-docs 25 | rev: v1.12.1 26 | hooks: 27 | - id: blacken-docs 28 | args: [--line-length=120] 29 | additional_dependencies: [black==21.12b0] 30 | 31 | - repo: https://github.com/charliermarsh/ruff-pre-commit 32 | rev: "v0.0.237" 33 | hooks: 34 | - id: ruff 35 | args: ["--fix"] 36 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | This Code of Conduct also applies outside the project spaces when there is a 56 | reasonable belief that an individual's behavior may have a negative impact on 57 | the project or its community. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported by contacting the project team at . All 63 | complaints will be reviewed and investigated and will result in a response that 64 | is deemed necessary and appropriate to the circumstances. The project team is 65 | obligated to maintain confidentiality with regard to the reporter of an incident. 66 | Further details of specific enforcement policies may be posted separately. 67 | 68 | Project maintainers who do not follow or enforce the Code of Conduct in good 69 | faith may face temporary or permanent repercussions as determined by other 70 | members of the project's leadership. 71 | 72 | ## Attribution 73 | 74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 76 | 77 | [homepage]: https://www.contributor-covenant.org 78 | 79 | For answers to common questions about this code of conduct, see 80 | https://www.contributor-covenant.org/faq -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to LLaMA 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `main`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Meta's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Meta has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to LLaMA, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | ## 1. The download.sh script doesn't work on default bash in MacOS X: 3 | 4 | Please see answers from theses issues: 5 | - https://github.com/facebookresearch/llama/issues/41#issuecomment-1451290160 6 | - https://github.com/facebookresearch/llama/issues/53#issue-1606582963 7 | 8 | 9 | ## 2. Generations are bad! 10 | 11 | Keep in mind these models are not finetuned for question answering. As such, they should be prompted so that the expected answer is the natural continuation of the prompt. 12 | 13 | Here are a few examples of prompts (from [issue#69](https://github.com/facebookresearch/llama/issues/69)) geared towards finetuned models, and how to modify them to get the expected results: 14 | - Do not prompt with "What is the meaning of life? Be concise and do not repeat yourself." but with "I believe the meaning of life is" 15 | - Do not prompt with "Explain the theory of relativity." but with "Simply put, the theory of relativity states that" 16 | - Do not prompt with "Ten easy steps to build a website..." but with "Building a website can be done in 10 simple steps:\n" 17 | 18 | To be able to directly prompt the models with questions / instructions, you can either: 19 | - Prompt it with few-shot examples so that the model understands the task you have in mind. 20 | - Finetune the models on datasets of instructions to make them more robust to input prompts. 21 | 22 | We've updated `example.py` with more sample prompts. Overall, always keep in mind that models are very sensitive to prompts (particularly when they have not been finetuned). 23 | 24 | ## 3. CUDA Out of memory errors 25 | 26 | The `example.py` file pre-allocates a cache according to these settings: 27 | ```python 28 | model_args: ModelArgs = ModelArgs(max_seq_len=max_seq_len, max_batch_size=max_batch_size, **params) 29 | ``` 30 | 31 | Accounting for 14GB of memory for the model weights (7B model), this leaves 16GB available for the decoding cache which stores 2 * 2 * n_layers * max_batch_size * max_seq_len * n_heads * head_dim bytes. 32 | 33 | With default parameters, this cache was about 17GB (2 * 2 * 32 * 32 * 1024 * 32 * 128) for the 7B model. 34 | 35 | We've added command line options to `example.py` and changed the default `max_seq_len` to 512 which should allow decoding on 30GB GPUs. 36 | 37 | Feel free to lower these settings according to your hardware. 38 | 39 | ## 4. Other languages 40 | The model was trained primarily on English, but also on a few other languages with Latin or Cyrillic alphabets. 41 | 42 | For instance, LLaMA was trained on Wikipedia for the 20 following languages: bg, ca, cs, da, de, en, es, fr, hr, hu, it, nl, pl, pt, ro, ru, sl, sr, sv, uk. 43 | 44 | LLaMA's tokenizer splits unseen characters into UTF-8 bytes, as a result, it might also be able to process other languages like Chinese or Japanese, even though they use different characters. 45 | 46 | Although the fraction of these languages in the training was negligible, LLaMA still showcases some abilities in Chinese-English translation: 47 | 48 | ``` 49 | Prompt = "J'aime le chocolat = I like chocolate\n祝你一天过得愉快 =" 50 | Output = "I wish you a nice day" 51 | ``` -------------------------------------------------------------------------------- /LLaMA-Adapter-V2-arXiv.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/LLaMA-Adapter-V2-arXiv.pdf -------------------------------------------------------------------------------- /LLaMA-Adapter-arXiv.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/LLaMA-Adapter-arXiv.pdf -------------------------------------------------------------------------------- /README_llama.md: -------------------------------------------------------------------------------- 1 | # LLaMA 2 | 3 | This repository is intended as a minimal, hackable and readable example to load [LLaMA](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) ([arXiv](https://arxiv.org/abs/2302.13971v1)) models and run inference. 4 | In order to download the checkpoints and tokenizer, fill this [google form](https://forms.gle/jk851eBVbX1m5TAv5) 5 | 6 | ## Setup 7 | 8 | In a conda env with pytorch / cuda available, run: 9 | ``` 10 | pip install -r requirements.txt 11 | ``` 12 | Then in this repository: 13 | ``` 14 | pip install -e . 15 | ``` 16 | 17 | ## Download 18 | 19 | Once your request is approved, you will receive links to download the tokenizer and model files. 20 | Edit the `download.sh` script with the signed url provided in the email to download the model weights and tokenizer. 21 | 22 | ## Inference 23 | 24 | The provided `example.py` can be run on a single or multi-gpu node with `torchrun` and will output completions for two pre-defined prompts. Using `TARGET_FOLDER` as defined in `download.sh`: 25 | ``` 26 | torchrun --nproc_per_node MP example.py --ckpt_dir $TARGET_FOLDER/model_size --tokenizer_path $TARGET_FOLDER/tokenizer.model 27 | ``` 28 | 29 | Different models require different MP values: 30 | 31 | | Model | MP | 32 | |--------|----| 33 | | 7B | 1 | 34 | | 13B | 2 | 35 | | 33B | 4 | 36 | | 65B | 8 | 37 | 38 | ## FAQ 39 | 40 | - [1. The download.sh script doesn't work on default bash in MacOS X](FAQ.md#1) 41 | - [2. Generations are bad!](FAQ.md#2) 42 | - [3. CUDA Out of memory errors](FAQ.md#3) 43 | - [4. Other languages](FAQ.md#4) 44 | 45 | ## Reference 46 | 47 | LLaMA: Open and Efficient Foundation Language Models -- https://arxiv.org/abs/2302.13971 48 | 49 | ``` 50 | @article{touvron2023llama, 51 | title={LLaMA: Open and Efficient Foundation Language Models}, 52 | author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume}, 53 | journal={arXiv preprint arXiv:2302.13971}, 54 | year={2023} 55 | } 56 | ``` 57 | 58 | ## Model Card 59 | See [MODEL_CARD.md](MODEL_CARD.md) 60 | 61 | ## License 62 | See the [LICENSE](LICENSE) file. 63 | -------------------------------------------------------------------------------- /alpaca_finetuning_v1/extract_adapter_from_checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | model = torch.load("./checkpoint/checkpoint-4.pth", map_location="cpu") 4 | new_model = dict() 5 | weight_list = ["layers." + str(i) + ".attention.gate" for i in range(32)] 6 | old_weight_list = ["layers." + str(i) + ".attention.gate" for i in range(32)] 7 | weight_list = weight_list + ["adapter_query.weight"] 8 | 9 | print(weight_list) 10 | print(model["model"]["adapter_query.weight"].shape) 11 | 12 | for i in range(len(weight_list)): 13 | new_model[weight_list[i]] = model["model"][weight_list[i]] 14 | 15 | torch.save(new_model, "adapter_adapter_len10_layer30_epoch5.pth") 16 | -------------------------------------------------------------------------------- /alpaca_finetuning_v1/finetuning.sh: -------------------------------------------------------------------------------- 1 | torchrun --nproc_per_node 8 finetuning.py \ 2 | --model Llama7B_adapter \ 3 | --llama_model_path $TARGET_FOLDER/ \ 4 | --data_path $DATA_PATH/alpaca_data.json \ 5 | --adapter_layer 30 \ 6 | --adapter_len 10 \ 7 | --max_seq_len 512 \ 8 | --batch_size 4 \ 9 | --epochs 5 \ 10 | --warmup_epochs 2 \ 11 | --blr 9e-3 \ 12 | --weight_decay 0.02 \ 13 | --output_dir ./checkpoint/ 14 | -------------------------------------------------------------------------------- /alpaca_finetuning_v1/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | from .generation import LLaMA 5 | from .model import ModelArgs, Transformer 6 | from .tokenizer import Tokenizer 7 | -------------------------------------------------------------------------------- /alpaca_finetuning_v1/llama/generation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | from typing import List 5 | 6 | import torch 7 | 8 | from llama.model import Transformer 9 | from llama.tokenizer import Tokenizer 10 | 11 | 12 | class LLaMA: 13 | def __init__(self, model: Transformer, tokenizer: Tokenizer): 14 | self.model = model 15 | self.tokenizer = tokenizer 16 | 17 | def generate( 18 | self, 19 | prompts: List[str], 20 | max_gen_len: int, 21 | temperature: float = 0.8, 22 | top_p: float = 0.95, 23 | ) -> List[str]: 24 | bsz = len(prompts) 25 | params = self.model.params 26 | assert bsz <= params.max_batch_size, (bsz, params.max_batch_size) 27 | 28 | prompt_tokens = [self.tokenizer.encode(x, bos=True, eos=False) for x in prompts] 29 | 30 | min_prompt_size = min([len(t) for t in prompt_tokens]) 31 | max_prompt_size = max([len(t) for t in prompt_tokens]) 32 | 33 | total_len = min(params.max_seq_len, max_gen_len + max_prompt_size) 34 | 35 | tokens = torch.full((bsz, total_len), self.tokenizer.pad_id).cuda().long() 36 | for k, t in enumerate(prompt_tokens): 37 | tokens[k, : len(t)] = torch.tensor(t).long() 38 | input_text_mask = tokens != self.tokenizer.pad_id 39 | start_pos = min_prompt_size 40 | prev_pos = 0 41 | for cur_pos in range(start_pos, total_len): 42 | logits = self.model.forward_only(tokens[:, prev_pos:cur_pos], prev_pos) 43 | if temperature > 0: 44 | probs = torch.softmax(logits / temperature, dim=-1) 45 | next_token = sample_top_p(probs, top_p) 46 | else: 47 | next_token = torch.argmax(logits, dim=-1) 48 | next_token = next_token.reshape(-1) 49 | # only replace token if prompt has already been generated 50 | next_token = torch.where(input_text_mask[:, cur_pos], tokens[:, cur_pos], next_token) 51 | tokens[:, cur_pos] = next_token 52 | prev_pos = cur_pos 53 | 54 | decoded = [] 55 | for i, t in enumerate(tokens.tolist()): 56 | # cut to max gen len 57 | t = t[: len(prompt_tokens[i]) + max_gen_len] 58 | # cut to eos tok if any 59 | try: 60 | t = t[: t.index(self.tokenizer.eos_id)] 61 | except ValueError: 62 | pass 63 | decoded.append(self.tokenizer.decode(t)) 64 | return decoded 65 | 66 | 67 | def sample_top_p(probs, p): 68 | probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True) 69 | probs_sum = torch.cumsum(probs_sort, dim=-1) 70 | mask = probs_sum - probs_sort > p 71 | probs_sort[mask] = 0.0 72 | probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True)) 73 | next_token = torch.multinomial(probs_sort, num_samples=1) 74 | next_token = torch.gather(probs_idx, -1, next_token) 75 | return next_token 76 | -------------------------------------------------------------------------------- /alpaca_finetuning_v1/llama/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | import os 5 | from logging import getLogger 6 | from typing import List 7 | 8 | from sentencepiece import SentencePieceProcessor 9 | 10 | logger = getLogger() 11 | 12 | 13 | class Tokenizer: 14 | def __init__(self, model_path: str): 15 | # reload tokenizer 16 | assert os.path.isfile(model_path), model_path 17 | self.sp_model = SentencePieceProcessor(model_file=model_path) 18 | logger.info(f"Reloaded SentencePiece model from {model_path}") 19 | 20 | # BOS / EOS token IDs 21 | self.n_words: int = self.sp_model.vocab_size() 22 | self.bos_id: int = self.sp_model.bos_id() 23 | self.eos_id: int = self.sp_model.eos_id() 24 | self.pad_id: int = self.sp_model.pad_id() 25 | logger.info(f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}") 26 | assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() 27 | 28 | def encode(self, s: str, bos: bool, eos: bool) -> List[int]: 29 | assert type(s) is str 30 | t = self.sp_model.encode(s) 31 | if bos: 32 | t = [self.bos_id] + t 33 | if eos: 34 | t = t + [self.eos_id] 35 | return t 36 | 37 | def decode(self, t: List[int]) -> str: 38 | return self.sp_model.decode(t) 39 | -------------------------------------------------------------------------------- /alpaca_finetuning_v1/models_llama_adapter.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import torch 4 | 5 | from llama import ModelArgs, Tokenizer, Transformer 6 | 7 | 8 | def Llama7B_adapter(args, **kwargs): 9 | 10 | llama_model_path = args.llama_model_path 11 | model_name = "7B" 12 | 13 | checkpoint = torch.load(llama_model_path + model_name + "/consolidated.00.pth", map_location="cpu") 14 | print(llama_model_path + model_name + "/consolidated.00.pth") 15 | 16 | with open(llama_model_path + model_name + "/params.json", "r") as f: 17 | params = json.loads(f.read()) 18 | 19 | model_args: ModelArgs = ModelArgs( 20 | max_seq_len=args.max_seq_len, 21 | max_batch_size=32, 22 | adapter_len=args.adapter_len, 23 | adapter_layer=args.adapter_layer, 24 | **params 25 | ) 26 | tokenizer = Tokenizer(model_path=llama_model_path + "/tokenizer.model") 27 | 28 | model_args.vocab_size = tokenizer.n_words 29 | torch.set_default_tensor_type(torch.cuda.HalfTensor) 30 | model_llama_adapter = Transformer(model_args) 31 | torch.set_default_tensor_type(torch.FloatTensor) 32 | model_llama_adapter.load_state_dict(checkpoint, strict=False) 33 | 34 | for name, param in model_llama_adapter.named_parameters(): 35 | if "adapter" not in name: 36 | param.requires_grad = False 37 | else: 38 | param.requires_grad = True 39 | param.data = param.data.float() 40 | 41 | for name, param in model_llama_adapter.layers[-1 * args.adapter_layer :].named_parameters(): 42 | if "gate" in name or "adapter" in name: 43 | param.data = param.data.float() 44 | param.requires_grad = True 45 | 46 | return model_llama_adapter 47 | 48 | 49 | # set recommended archs 50 | Llama7B_adapter = Llama7B_adapter 51 | -------------------------------------------------------------------------------- /alpaca_finetuning_v1/util/datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # DeiT: https://github.com/facebookresearch/deit 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | 13 | import PIL 14 | from timm.data import create_transform 15 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 16 | from torchvision import datasets, transforms 17 | 18 | 19 | def build_dataset(is_train, args): 20 | transform = build_transform(is_train, args) 21 | 22 | root = os.path.join(args.data_path, "train" if is_train else "val") 23 | dataset = datasets.ImageFolder(root, transform=transform) 24 | 25 | print(dataset) 26 | 27 | return dataset 28 | 29 | 30 | def build_transform(is_train, args): 31 | mean = IMAGENET_DEFAULT_MEAN 32 | std = IMAGENET_DEFAULT_STD 33 | # train transform 34 | if is_train: 35 | # this should always dispatch to transforms_imagenet_train 36 | transform = create_transform( 37 | input_size=args.input_size, 38 | is_training=True, 39 | color_jitter=args.color_jitter, 40 | auto_augment=args.aa, 41 | interpolation="bicubic", 42 | re_prob=args.reprob, 43 | re_mode=args.remode, 44 | re_count=args.recount, 45 | mean=mean, 46 | std=std, 47 | ) 48 | return transform 49 | 50 | # eval transform 51 | t = [] 52 | if args.input_size <= 224: 53 | crop_pct = 224 / 256 54 | else: 55 | crop_pct = 1.0 56 | size = int(args.input_size / crop_pct) 57 | t.append( 58 | transforms.Resize(size, interpolation=PIL.Image.BICUBIC), # to maintain same ratio w.r.t. 224 images 59 | ) 60 | t.append(transforms.CenterCrop(args.input_size)) 61 | 62 | t.append(transforms.ToTensor()) 63 | t.append(transforms.Normalize(mean, std)) 64 | return transforms.Compose(t) 65 | -------------------------------------------------------------------------------- /alpaca_finetuning_v1/util/lars.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # LARS optimizer, implementation from MoCo v3: 8 | # https://github.com/facebookresearch/moco-v3 9 | # -------------------------------------------------------- 10 | 11 | import torch 12 | 13 | 14 | class LARS(torch.optim.Optimizer): 15 | """ 16 | LARS optimizer, no rate scaling or weight decay for parameters <= 1D. 17 | """ 18 | def __init__(self, params, lr=0, weight_decay=0, momentum=0.9, trust_coefficient=0.001): 19 | defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum, trust_coefficient=trust_coefficient) 20 | super().__init__(params, defaults) 21 | 22 | @torch.no_grad() 23 | def step(self): 24 | for g in self.param_groups: 25 | for p in g['params']: 26 | dp = p.grad 27 | 28 | if dp is None: 29 | continue 30 | 31 | if p.ndim > 1: # if not normalization gamma/beta or bias 32 | dp = dp.add(p, alpha=g['weight_decay']) 33 | param_norm = torch.norm(p) 34 | update_norm = torch.norm(dp) 35 | one = torch.ones_like(param_norm) 36 | q = torch.where(param_norm > 0., 37 | torch.where(update_norm > 0, 38 | (g['trust_coefficient'] * param_norm / update_norm), one), 39 | one) 40 | dp = dp.mul(q) 41 | 42 | param_state = self.state[p] 43 | if 'mu' not in param_state: 44 | param_state['mu'] = torch.zeros_like(p) 45 | mu = param_state['mu'] 46 | mu.mul_(g['momentum']).add_(dp) 47 | p.add_(mu, alpha=-g['lr']) -------------------------------------------------------------------------------- /alpaca_finetuning_v1/util/lr_decay.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # ELECTRA https://github.com/google-research/electra 9 | # BEiT: https://github.com/microsoft/unilm/tree/master/beit 10 | # -------------------------------------------------------- 11 | 12 | import json 13 | 14 | 15 | def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75): 16 | """ 17 | Parameter groups for layer-wise lr decay 18 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58 19 | """ 20 | param_group_names = {} 21 | param_groups = {} 22 | 23 | num_layers = len(model.blocks) + 1 24 | 25 | layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1)) 26 | 27 | for n, p in model.named_parameters(): 28 | if not p.requires_grad: 29 | continue 30 | 31 | # no decay: all 1D parameters and model specific ones 32 | if p.ndim == 1 or n in no_weight_decay_list: 33 | g_decay = "no_decay" 34 | this_decay = 0. 35 | else: 36 | g_decay = "decay" 37 | this_decay = weight_decay 38 | 39 | layer_id = get_layer_id_for_vit(n, num_layers) 40 | group_name = "layer_%d_%s" % (layer_id, g_decay) 41 | 42 | if group_name not in param_group_names: 43 | this_scale = layer_scales[layer_id] 44 | 45 | param_group_names[group_name] = { 46 | "lr_scale": this_scale, 47 | "weight_decay": this_decay, 48 | "params": [], 49 | } 50 | param_groups[group_name] = { 51 | "lr_scale": this_scale, 52 | "weight_decay": this_decay, 53 | "params": [], 54 | } 55 | 56 | param_group_names[group_name]["params"].append(n) 57 | param_groups[group_name]["params"].append(p) 58 | 59 | # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2)) 60 | 61 | return list(param_groups.values()) 62 | 63 | 64 | def get_layer_id_for_vit(name, num_layers): 65 | """ 66 | Assign a parameter with its layer id 67 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33 68 | """ 69 | if name in ['cls_token', 'pos_embed']: 70 | return 0 71 | elif name.startswith('patch_embed'): 72 | return 0 73 | elif name.startswith('blocks'): 74 | return int(name.split('.')[1]) + 1 75 | else: 76 | return num_layers -------------------------------------------------------------------------------- /alpaca_finetuning_v1/util/lr_sched.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | 10 | def adjust_learning_rate(optimizer, epoch, args): 11 | """Decay the learning rate with half-cycle cosine after warmup""" 12 | if epoch < args.warmup_epochs: 13 | lr = args.lr * epoch / args.warmup_epochs 14 | else: 15 | lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * ( 16 | 1.0 + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)) 17 | ) 18 | for param_group in optimizer.param_groups: 19 | if "lr_scale" in param_group: 20 | param_group["lr"] = lr * param_group["lr_scale"] 21 | else: 22 | param_group["lr"] = lr 23 | return lr 24 | -------------------------------------------------------------------------------- /docs/chat_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/chat_demo.png -------------------------------------------------------------------------------- /docs/logo_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/logo_v1.png -------------------------------------------------------------------------------- /docs/logo_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/logo_v2.png -------------------------------------------------------------------------------- /docs/logo_v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/logo_v3.png -------------------------------------------------------------------------------- /docs/logo_v4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/logo_v4.png -------------------------------------------------------------------------------- /docs/multi_model_example_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/multi_model_example_1.png -------------------------------------------------------------------------------- /docs/multi_model_example_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/multi_model_example_2.png -------------------------------------------------------------------------------- /docs/multi_model_example_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/multi_model_example_3.png -------------------------------------------------------------------------------- /docs/multi_model_example_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/multi_model_example_4.png -------------------------------------------------------------------------------- /docs/multimodal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/multimodal.png -------------------------------------------------------------------------------- /docs/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/pipeline.png -------------------------------------------------------------------------------- /docs/youtube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/docs/youtube.png -------------------------------------------------------------------------------- /download.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | PRESIGNED_URL="" # replace with presigned url from email 5 | MODEL_SIZE="7B,13B,30B,65B" # edit this list with the model sizes you wish to download 6 | TARGET_FOLDER="" # where all files should end up 7 | 8 | declare -A N_SHARD_DICT 9 | 10 | N_SHARD_DICT["7B"]="0" 11 | N_SHARD_DICT["13B"]="1" 12 | N_SHARD_DICT["30B"]="3" 13 | N_SHARD_DICT["65B"]="7" 14 | 15 | echo "Downloading tokenizer" 16 | wget ${PRESIGNED_URL/'*'/"tokenizer.model"} -O ${TARGET_FOLDER}"/tokenizer.model" 17 | wget ${PRESIGNED_URL/'*'/"tokenizer_checklist.chk"} -O ${TARGET_FOLDER}"/tokenizer_checklist.chk" 18 | 19 | (cd ${TARGET_FOLDER} && md5sum -c tokenizer_checklist.chk) 20 | 21 | for i in ${MODEL_SIZE//,/ } 22 | do 23 | echo "Downloading ${i}" 24 | mkdir -p ${TARGET_FOLDER}"/${i}" 25 | for s in $(seq -f "0%g" 0 ${N_SHARD_DICT[$i]}) 26 | do 27 | wget ${PRESIGNED_URL/'*'/"${i}/consolidated.${s}.pth"} -O ${TARGET_FOLDER}"/${i}/consolidated.${s}.pth" 28 | done 29 | wget ${PRESIGNED_URL/'*'/"${i}/params.json"} -O ${TARGET_FOLDER}"/${i}/params.json" 30 | wget ${PRESIGNED_URL/'*'/"${i}/checklist.chk"} -O ${TARGET_FOLDER}"/${i}/checklist.chk" 31 | echo "Checking checksums" 32 | (cd ${TARGET_FOLDER}"/${i}" && md5sum -c checklist.chk) 33 | done -------------------------------------------------------------------------------- /generate.sh: -------------------------------------------------------------------------------- 1 | torchrun --nproc_per_node 1 example.py \ 2 | --ckpt_dir /data1/llma/7B \ 3 | --tokenizer_path /data1/llma/tokenizer.model \ 4 | --adapter_path llama_adapter_len10_layer30_release.pth \ 5 | --quantizer False 6 | 7 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/extract_adapter_from_checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | 4 | args = argparse.ArgumentParser("extract", add_help=False) 5 | 6 | args.add_argument("--model_path", type=str) 7 | 8 | args = args.parse_args() 9 | 10 | model = torch.load(args.model_path, map_location="cpu") 11 | new_model = dict() 12 | weight_list = ["layers." + str(i) + ".attention.gate" for i in range(32)] 13 | old_weight_list = ["layers." + str(i) + ".attention.gate" for i in range(32)] 14 | weight_list = weight_list + ["adapter_query.weight"] 15 | 16 | print(weight_list) 17 | print(model["model"]["adapter_query.weight"].shape) 18 | 19 | for i in range(len(weight_list)): 20 | new_model[weight_list[i]] = model["model"][weight_list[i]] 21 | 22 | save_path = args.model_path.replace('.pth', '-adapter.pth') 23 | torch.save(new_model, save_path) 24 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/finetuning_hf.sh: -------------------------------------------------------------------------------- 1 | llama_path="/data1/llma/7B" 2 | 3 | torchrun --nproc_per_node 2 --master_port=29501 finetuning.py \ 4 | --model Llama7B_adapter \ 5 | --llama_model_path "$llama_path"/ \ 6 | --data_path ../gorilla-main/data/apibench/huggingface_train.json \ 7 | --adapter_layer 30 \ 8 | --adapter_len 10 \ 9 | --max_seq_len 512 \ 10 | --batch_size 4 \ 11 | --epochs 10 \ 12 | --warmup_epochs 2 \ 13 | --blr 8e-1 \ 14 | --weight_decay 0.02 \ 15 | --output_dir ./checkpoint/exp_hf -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/finetuning_tf.sh: -------------------------------------------------------------------------------- 1 | llama_path="/data1/llma/7B" 2 | 3 | torchrun --nproc_per_node 6 --master_port=29502 finetuning.py \ 4 | --model Llama7B_adapter \ 5 | --llama_model_path "$llama_path"/ \ 6 | --data_path ../gorilla-main/data/apibench/tensorflow_train.json \ 7 | --adapter_layer 30 \ 8 | --adapter_len 10 \ 9 | --max_seq_len 512 \ 10 | --batch_size 4 \ 11 | --epochs 5 \ 12 | --warmup_epochs 2 \ 13 | --blr 6e-2 \ 14 | --weight_decay 0.02 \ 15 | --output_dir ./checkpoint/exp_tf 16 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/finetuning_th.sh: -------------------------------------------------------------------------------- 1 | llama_path="/data1/llma/7B" 2 | 3 | torchrun --nproc_per_node 1 finetuning.py \ 4 | --model Llama7B_adapter \ 5 | --llama_model_path "$llama_path"/ \ 6 | --data_path ../gorilla-main/data/apibench/torchhub_train.json \ 7 | --adapter_layer 30 \ 8 | --adapter_len 10 \ 9 | --max_seq_len 512 \ 10 | --batch_size 4 \ 11 | --epochs 5 \ 12 | --warmup_epochs 2 \ 13 | --blr 9e-3 \ 14 | --weight_decay 0.02 \ 15 | --output_dir ./checkpoint/exp_th 16 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | from .generation import LLaMA 5 | from .model import ModelArgs, Transformer 6 | from .tokenizer import Tokenizer 7 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/llama/generation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | from typing import List 5 | 6 | import torch 7 | 8 | from llama.model import Transformer 9 | from llama.tokenizer import Tokenizer 10 | 11 | 12 | class LLaMA: 13 | def __init__(self, model: Transformer, tokenizer: Tokenizer): 14 | self.model = model 15 | self.tokenizer = tokenizer 16 | 17 | def generate( 18 | self, 19 | prompts: List[str], 20 | max_gen_len: int, 21 | temperature: float = 0.8, 22 | top_p: float = 0.95, 23 | ) -> List[str]: 24 | bsz = len(prompts) 25 | params = self.model.params 26 | assert bsz <= params.max_batch_size, (bsz, params.max_batch_size) 27 | 28 | prompt_tokens = [self.tokenizer.encode(x, bos=True, eos=False) for x in prompts] 29 | 30 | min_prompt_size = min([len(t) for t in prompt_tokens]) 31 | max_prompt_size = max([len(t) for t in prompt_tokens]) 32 | 33 | total_len = min(params.max_seq_len, max_gen_len + max_prompt_size) 34 | 35 | tokens = torch.full((bsz, total_len), self.tokenizer.pad_id).cuda().long() 36 | for k, t in enumerate(prompt_tokens): 37 | tokens[k, : len(t)] = torch.tensor(t).long() 38 | input_text_mask = tokens != self.tokenizer.pad_id 39 | start_pos = min_prompt_size 40 | prev_pos = 0 41 | for cur_pos in range(start_pos, total_len): 42 | logits = self.model.forward_only(tokens[:, prev_pos:cur_pos], prev_pos) 43 | if temperature > 0: 44 | probs = torch.softmax(logits / temperature, dim=-1) 45 | next_token = sample_top_p(probs, top_p) 46 | else: 47 | next_token = torch.argmax(logits, dim=-1) 48 | next_token = next_token.reshape(-1) 49 | # only replace token if prompt has already been generated 50 | next_token = torch.where(input_text_mask[:, cur_pos], tokens[:, cur_pos], next_token) 51 | tokens[:, cur_pos] = next_token 52 | prev_pos = cur_pos 53 | 54 | decoded = [] 55 | for i, t in enumerate(tokens.tolist()): 56 | # cut to max gen len 57 | t = t[: len(prompt_tokens[i]) + max_gen_len] 58 | # cut to eos tok if any 59 | try: 60 | t = t[: t.index(self.tokenizer.eos_id)] 61 | except ValueError: 62 | pass 63 | decoded.append(self.tokenizer.decode(t)) 64 | return decoded 65 | 66 | 67 | def sample_top_p(probs, p): 68 | probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True) 69 | probs_sum = torch.cumsum(probs_sort, dim=-1) 70 | mask = probs_sum - probs_sort > p 71 | probs_sort[mask] = 0.0 72 | probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True)) 73 | next_token = torch.multinomial(probs_sort, num_samples=1) 74 | next_token = torch.gather(probs_idx, -1, next_token) 75 | return next_token 76 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/llama/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | import os 5 | from logging import getLogger 6 | from typing import List 7 | 8 | from sentencepiece import SentencePieceProcessor 9 | 10 | logger = getLogger() 11 | 12 | 13 | class Tokenizer: 14 | def __init__(self, model_path: str): 15 | # reload tokenizer 16 | assert os.path.isfile(model_path), model_path 17 | self.sp_model = SentencePieceProcessor(model_file=model_path) 18 | logger.info(f"Reloaded SentencePiece model from {model_path}") 19 | 20 | # BOS / EOS token IDs 21 | self.n_words: int = self.sp_model.vocab_size() 22 | self.bos_id: int = self.sp_model.bos_id() 23 | self.eos_id: int = self.sp_model.eos_id() 24 | self.pad_id: int = self.sp_model.pad_id() 25 | logger.info(f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}") 26 | assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() 27 | 28 | def encode(self, s: str, bos: bool, eos: bool) -> List[int]: 29 | assert type(s) is str 30 | t = self.sp_model.encode(s) 31 | if bos: 32 | t = [self.bos_id] + t 33 | if eos: 34 | t = t + [self.eos_id] 35 | return t 36 | 37 | def decode(self, t: List[int]) -> str: 38 | return self.sp_model.decode(t) 39 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/models_llama_adapter.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import torch 4 | 5 | from llama import ModelArgs, Tokenizer, Transformer 6 | 7 | 8 | def Llama7B_adapter(args, **kwargs): 9 | 10 | llama_model_path = args.llama_model_path 11 | model_name = "7B" 12 | 13 | checkpoint = torch.load(llama_model_path + "/consolidated.00.pth", map_location="cpu") 14 | print(llama_model_path + "/consolidated.00.pth") 15 | 16 | with open(llama_model_path + "/params.json", "r") as f: 17 | params = json.loads(f.read()) 18 | 19 | model_args: ModelArgs = ModelArgs( 20 | max_seq_len=args.max_seq_len, 21 | max_batch_size=32, 22 | adapter_len=args.adapter_len, 23 | adapter_layer=args.adapter_layer, 24 | **params 25 | ) 26 | tokenizer = Tokenizer(model_path=llama_model_path + "/tokenizer.model") 27 | 28 | model_args.vocab_size = tokenizer.n_words 29 | torch.set_default_tensor_type(torch.cuda.HalfTensor) 30 | model_llama_adapter = Transformer(model_args) 31 | torch.set_default_tensor_type(torch.FloatTensor) 32 | model_llama_adapter.load_state_dict(checkpoint, strict=False) 33 | 34 | for name, param in model_llama_adapter.named_parameters(): 35 | if "adapter" not in name: 36 | param.requires_grad = False 37 | else: 38 | param.requires_grad = True 39 | param.data = param.data.float() 40 | 41 | for name, param in model_llama_adapter.layers[-1 * args.adapter_layer :].named_parameters(): 42 | if "gate" in name or "adapter" in name: 43 | param.data = param.data.float() 44 | param.requires_grad = True 45 | 46 | return model_llama_adapter 47 | 48 | 49 | # set recommended archs 50 | Llama7B_adapter = Llama7B_adapter 51 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/util/datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # DeiT: https://github.com/facebookresearch/deit 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | 13 | import PIL 14 | from timm.data import create_transform 15 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 16 | from torchvision import datasets, transforms 17 | 18 | 19 | def build_dataset(is_train, args): 20 | transform = build_transform(is_train, args) 21 | 22 | root = os.path.join(args.data_path, "train" if is_train else "val") 23 | dataset = datasets.ImageFolder(root, transform=transform) 24 | 25 | print(dataset) 26 | 27 | return dataset 28 | 29 | 30 | def build_transform(is_train, args): 31 | mean = IMAGENET_DEFAULT_MEAN 32 | std = IMAGENET_DEFAULT_STD 33 | # train transform 34 | if is_train: 35 | # this should always dispatch to transforms_imagenet_train 36 | transform = create_transform( 37 | input_size=args.input_size, 38 | is_training=True, 39 | color_jitter=args.color_jitter, 40 | auto_augment=args.aa, 41 | interpolation="bicubic", 42 | re_prob=args.reprob, 43 | re_mode=args.remode, 44 | re_count=args.recount, 45 | mean=mean, 46 | std=std, 47 | ) 48 | return transform 49 | 50 | # eval transform 51 | t = [] 52 | if args.input_size <= 224: 53 | crop_pct = 224 / 256 54 | else: 55 | crop_pct = 1.0 56 | size = int(args.input_size / crop_pct) 57 | t.append( 58 | transforms.Resize(size, interpolation=PIL.Image.BICUBIC), # to maintain same ratio w.r.t. 224 images 59 | ) 60 | t.append(transforms.CenterCrop(args.input_size)) 61 | 62 | t.append(transforms.ToTensor()) 63 | t.append(transforms.Normalize(mean, std)) 64 | return transforms.Compose(t) 65 | -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/util/lars.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # LARS optimizer, implementation from MoCo v3: 8 | # https://github.com/facebookresearch/moco-v3 9 | # -------------------------------------------------------- 10 | 11 | import torch 12 | 13 | 14 | class LARS(torch.optim.Optimizer): 15 | """ 16 | LARS optimizer, no rate scaling or weight decay for parameters <= 1D. 17 | """ 18 | def __init__(self, params, lr=0, weight_decay=0, momentum=0.9, trust_coefficient=0.001): 19 | defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum, trust_coefficient=trust_coefficient) 20 | super().__init__(params, defaults) 21 | 22 | @torch.no_grad() 23 | def step(self): 24 | for g in self.param_groups: 25 | for p in g['params']: 26 | dp = p.grad 27 | 28 | if dp is None: 29 | continue 30 | 31 | if p.ndim > 1: # if not normalization gamma/beta or bias 32 | dp = dp.add(p, alpha=g['weight_decay']) 33 | param_norm = torch.norm(p) 34 | update_norm = torch.norm(dp) 35 | one = torch.ones_like(param_norm) 36 | q = torch.where(param_norm > 0., 37 | torch.where(update_norm > 0, 38 | (g['trust_coefficient'] * param_norm / update_norm), one), 39 | one) 40 | dp = dp.mul(q) 41 | 42 | param_state = self.state[p] 43 | if 'mu' not in param_state: 44 | param_state['mu'] = torch.zeros_like(p) 45 | mu = param_state['mu'] 46 | mu.mul_(g['momentum']).add_(dp) 47 | p.add_(mu, alpha=-g['lr']) -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/util/lr_decay.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # ELECTRA https://github.com/google-research/electra 9 | # BEiT: https://github.com/microsoft/unilm/tree/master/beit 10 | # -------------------------------------------------------- 11 | 12 | import json 13 | 14 | 15 | def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75): 16 | """ 17 | Parameter groups for layer-wise lr decay 18 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58 19 | """ 20 | param_group_names = {} 21 | param_groups = {} 22 | 23 | num_layers = len(model.blocks) + 1 24 | 25 | layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1)) 26 | 27 | for n, p in model.named_parameters(): 28 | if not p.requires_grad: 29 | continue 30 | 31 | # no decay: all 1D parameters and model specific ones 32 | if p.ndim == 1 or n in no_weight_decay_list: 33 | g_decay = "no_decay" 34 | this_decay = 0. 35 | else: 36 | g_decay = "decay" 37 | this_decay = weight_decay 38 | 39 | layer_id = get_layer_id_for_vit(n, num_layers) 40 | group_name = "layer_%d_%s" % (layer_id, g_decay) 41 | 42 | if group_name not in param_group_names: 43 | this_scale = layer_scales[layer_id] 44 | 45 | param_group_names[group_name] = { 46 | "lr_scale": this_scale, 47 | "weight_decay": this_decay, 48 | "params": [], 49 | } 50 | param_groups[group_name] = { 51 | "lr_scale": this_scale, 52 | "weight_decay": this_decay, 53 | "params": [], 54 | } 55 | 56 | param_group_names[group_name]["params"].append(n) 57 | param_groups[group_name]["params"].append(p) 58 | 59 | # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2)) 60 | 61 | return list(param_groups.values()) 62 | 63 | 64 | def get_layer_id_for_vit(name, num_layers): 65 | """ 66 | Assign a parameter with its layer id 67 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33 68 | """ 69 | if name in ['cls_token', 'pos_embed']: 70 | return 0 71 | elif name.startswith('patch_embed'): 72 | return 0 73 | elif name.startswith('blocks'): 74 | return int(name.split('.')[1]) + 1 75 | else: 76 | return num_layers -------------------------------------------------------------------------------- /gorilla/alpaca_finetuning_v1/util/lr_sched.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | 10 | def adjust_learning_rate(optimizer, epoch, args): 11 | """Decay the learning rate with half-cycle cosine after warmup""" 12 | if epoch < args.warmup_epochs: 13 | lr = args.lr * epoch / args.warmup_epochs 14 | else: 15 | lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * ( 16 | 1.0 + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)) 17 | ) 18 | for param_group in optimizer.param_groups: 19 | if "lr_scale" in param_group: 20 | param_group["lr"] = lr * param_group["lr_scale"] 21 | else: 22 | param_group["lr"] = lr 23 | return lr 24 | -------------------------------------------------------------------------------- /gorilla/finetune/configs/finetune/EN.yaml: -------------------------------------------------------------------------------- 1 | META: 2 | - '../data/alpaca_gpt4_data.json' -------------------------------------------------------------------------------- /gorilla/finetune/configs/finetune/gorilla_hf.yaml: -------------------------------------------------------------------------------- 1 | META: 2 | - '../gorilla-main/data/apibench/huggingface_train.json' -------------------------------------------------------------------------------- /gorilla/finetune/configs/finetune/gorilla_tf.yaml: -------------------------------------------------------------------------------- 1 | META: 2 | - '../gorilla-main/data/apibench/tensorflow_train.json' -------------------------------------------------------------------------------- /gorilla/finetune/configs/finetune/gorilla_th.yaml: -------------------------------------------------------------------------------- 1 | META: 2 | - '../gorilla-main/data/apibench/torchhub_train.json' -------------------------------------------------------------------------------- /gorilla/finetune/data_preprocess.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | PROMPT_DICT = { 4 | "prompt_input": ( 5 | "Below is an instruction that describes a task, paired with an input that provides further context. " 6 | "Write a response that appropriately completes the request.\n\n" 7 | "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:" 8 | ), 9 | "prompt_no_input": ( 10 | "Below is an instruction that describes a task. " 11 | "Write a response that appropriately completes the request.\n\n" 12 | "### Instruction:\n{instruction}\n\n### Response:" 13 | ), 14 | } 15 | import pdb 16 | pdb.set_trace() 17 | 18 | datas = json.load(open('/home/pgao/stanford_alpaca/stanford_alpaca/alpaca_data.json')) 19 | prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"] 20 | sources = [ 21 | prompt_input.format_map(example) if example.get("input", "") != "" else prompt_no_input.format_map(example) 22 | for example in datas 23 | ] 24 | 25 | 26 | targets = [f"{example['output']}" for example in datas] 27 | examples = [s + t for s, t in zip(sources, targets)] 28 | for strings in (examples, sources): 29 | print(strings) 30 | 31 | -------------------------------------------------------------------------------- /gorilla/finetune/exps/train/base/run.sh: -------------------------------------------------------------------------------- 1 | exp_name="train/base_wamrup2000_lr_decay_1800000_batch16_32gpu_accmulation_4_lr_0005_adam_095_09_weight_decay_0_dot_1_clip_2" 2 | mkdir -p output/"$exp_name" 3 | 4 | srun -p alpha_vl --gres=gpu:8 --cpus-per-task 16 -n32 \ 5 | --ntasks-per-node=8 --quotatype=reserved python -u main_pretrain.py --batch_size 16 \ 6 | --llama_type llama --weight_decay 0.1 --output_dir output/"$exp_name" \ 7 | --accum_iter 4 --warmup_iters 2000 --lr_decay_iters 1800000 --lr 0.0005 --min_lr 0.00005 --clip_grad 2 \ 8 | 2>&1 | tee -a output/"$exp_name"/output.log 9 | -------------------------------------------------------------------------------- /gorilla/finetune/exps/train/rev/run.sh: -------------------------------------------------------------------------------- 1 | exp_name="train/rev/wamrup2000_lr_decay_1800000_batch16_32gpu_accmulation_4_lr_0005_adam_095_09_weight_decay_0_dot_1_clip_2" 2 | mkdir -p output/"$exp_name" 3 | 4 | srun -p alpha_vl --gres=gpu:8 --cpus-per-task 16 -n32 \ 5 | --ntasks-per-node=8 --quotatype=reserved python -u main_pretrain.py --batch_size 16 \ 6 | --llama_type revllama --reversible_grad --weight_decay 0.1 --output_dir output/"$exp_name" \ 7 | --accum_iter 4 --warmup_iters 2000 --lr_decay_iters 1800000 --lr 0.0005 --min_lr 0.00005 --clip_grad 2 \ 8 | 2>&1 | tee -a output/"$exp_name"/output.log 9 | -------------------------------------------------------------------------------- /gorilla/finetune/global_configs.py: -------------------------------------------------------------------------------- 1 | tokenizer_path = '/data1/llma/tokenizer.model' 2 | petrel_conf = "/mnt/petrelfs/share_data/gaopeng/ldy/petreloss_all.conf" 3 | petrel_prefix = "cluster_p_ssd:s3://falcon-refinedweb/data" 4 | data_meta_path = "/mnt/petrelfs/share_data/gaopeng/ldy/falcon_list.json" 5 | -------------------------------------------------------------------------------- /gorilla/finetune/model/LLM/__init__.py: -------------------------------------------------------------------------------- 1 | from . import llama 2 | from . import revllama -------------------------------------------------------------------------------- /gorilla/finetune/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/LLaMA-Adapter/521a09da84f70f6913d54b7421afa24010319e47/gorilla/finetune/model/__init__.py -------------------------------------------------------------------------------- /gorilla/finetune/model/meta.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import json 4 | from .tokenizer import Tokenizer 5 | from . import LLM 6 | from global_configs import tokenizer_path 7 | 8 | 9 | class MetaModel(nn.Module): 10 | """ Masked Autoencoder with VisionTransformer backbone 11 | """ 12 | def __init__(self, llama_type, reversible_grad: bool, llama_config): 13 | super().__init__() 14 | 15 | self.criterion = torch.nn.CrossEntropyLoss(ignore_index=0) 16 | 17 | ModelArgs = LLM.__dict__[llama_type].ModelArgs 18 | Transformer = LLM.__dict__[llama_type].Transformer 19 | 20 | with open(llama_config, "r") as f: 21 | params = json.loads(f.read()) 22 | model_args: ModelArgs = ModelArgs( 23 | max_seq_len=2048, max_batch_size=32, **params 24 | ) 25 | tokenizer = Tokenizer(model_path=tokenizer_path) 26 | model_args.vocab_size = tokenizer.n_words 27 | if reversible_grad: 28 | if hasattr(model_args, "reversible_gradient"): 29 | model_args.reversible_gradient = True 30 | else: 31 | raise KeyError (f"{ModelArgs} object has no attribute reversible_gradient") 32 | 33 | model = Transformer(model_args) 34 | self.llma = model 35 | for name, param in self.named_parameters(): 36 | if param.requires_grad: 37 | print(f"Trainable param: {name}, {param.shape}, {param.dtype}") 38 | count = sum(p.numel() for p in self.parameters() if p.requires_grad) 39 | print(f"Parameter count : {count}") 40 | 41 | def forward(self, examples, labels): 42 | output = self.llma(examples) 43 | output = output[:, :-1, :] 44 | labels = labels[:, 1:] 45 | 46 | if labels.sum() == 0: 47 | c_loss = output.mean() * 0 48 | else: 49 | c_loss = self.criterion(output.reshape(-1, 32000), labels.flatten()) 50 | pred = 0 51 | mask = 0 52 | return c_loss, c_loss, pred, mask -------------------------------------------------------------------------------- /gorilla/finetune/model/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | from sentencepiece import SentencePieceProcessor 5 | from logging import getLogger 6 | from typing import List 7 | import os 8 | 9 | 10 | logger = getLogger() 11 | 12 | 13 | class Tokenizer: 14 | def __init__(self, model_path: str): 15 | # reload tokenizer 16 | assert os.path.isfile(model_path), model_path 17 | self.sp_model = SentencePieceProcessor(model_file=model_path) 18 | logger.info(f"Reloaded SentencePiece model from {model_path}") 19 | 20 | # BOS / EOS token IDs 21 | self.n_words: int = self.sp_model.vocab_size() 22 | self.bos_id: int = self.sp_model.bos_id() 23 | self.eos_id: int = self.sp_model.eos_id() 24 | self.pad_id: int = self.sp_model.pad_id() 25 | logger.info( 26 | f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}" 27 | ) 28 | assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() 29 | 30 | def encode(self, s: str, bos: bool, eos: bool) -> List[int]: 31 | assert type(s) is str 32 | t = self.sp_model.encode(s) 33 | if bos: 34 | t = [self.bos_id] + t 35 | if eos: 36 | t = t + [self.eos_id] 37 | return t 38 | 39 | def decode(self, t: List[int]) -> str: 40 | return self.sp_model.decode(t) 41 | -------------------------------------------------------------------------------- /gorilla/finetune/params.json: -------------------------------------------------------------------------------- 1 | {"dim": 2048, "multiple_of": 256, "n_heads": 16, "n_layers": 12, "norm_eps": 1e-06, "vocab_size": -1} 2 | -------------------------------------------------------------------------------- /gorilla/finetune/scripts/finetune/finetune_7B_gorilla_hf.sh: -------------------------------------------------------------------------------- 1 | data_parallel="$1" 2 | mp="$2" 3 | llama_path="/data1/llma/7B" 4 | 5 | exp_name=finetune_"$data_parallel"_mp"$mp"_bsz2_accum_4_gpu8_lr_00002_warmup1_epoch3_max_len512_gorilla_huggingface_consolidate 6 | mkdir -p output/"$exp_name" 7 | 8 | python -u -m torch.distributed.launch --master_port=1112 --nproc_per_node=8 --use_env main_finetune.py \ 9 | --llama_type llama --weight_decay 0.1 --output_dir output/"$exp_name" \ 10 | --accum_iter 4 --batch_size 2 --warmup_epochs 1 --epochs 3 --lr 0.00002 --min_lr 0.000005 --clip_grad 2 \ 11 | --llama_config "$llama_path"/params.json \ 12 | --data_parallel "$data_parallel" --model_parallel_size "$mp" \ 13 | --max_words 512 --data_config configs/finetune/gorilla_hf.yaml --llama_tokenizer_path "$llama_path"/tokenizer.model \ 14 | --pretrained_path "$llama_path" --pretrained_type meta_ori --checkpointing \ 15 | --save_consolidated 2>&1 | tee -a output/"$exp_name"/output.log -------------------------------------------------------------------------------- /gorilla/finetune/scripts/finetune/finetune_7B_gorilla_tf.sh: -------------------------------------------------------------------------------- 1 | data_parallel="$1" 2 | mp="$2" 3 | llama_path="/data1/llma/7B" 4 | 5 | exp_name=finetune_"$data_parallel"_mp"$mp"_bsz2_accum_4_gpu8_lr_00002_warmup1_epoch3_max_len512_gorilla_tensorflow_consolidate 6 | mkdir -p output/"$exp_name" 7 | 8 | python -u -m torch.distributed.launch --master_port=1112 --nproc_per_node=8 --use_env main_finetune.py \ 9 | --llama_type llama --weight_decay 0.1 --output_dir output/"$exp_name" \ 10 | --accum_iter 4 --batch_size 2 --warmup_epochs 1 --epochs 3 --lr 0.00002 --min_lr 0.000005 --clip_grad 2 \ 11 | --llama_config "$llama_path"/params.json \ 12 | --data_parallel "$data_parallel" --model_parallel_size "$mp" \ 13 | --max_words 512 --data_config configs/finetune/gorilla_tf.yaml --llama_tokenizer_path "$llama_path"/tokenizer.model \ 14 | --pretrained_path "$llama_path" --pretrained_type meta_ori --checkpointing \ 15 | --save_consolidated 2>&1 | tee -a output/"$exp_name"/output.log 16 | -------------------------------------------------------------------------------- /gorilla/finetune/scripts/finetune/finetune_7B_gorilla_th.sh: -------------------------------------------------------------------------------- 1 | data_parallel="$1" 2 | mp="$2" 3 | llama_path="/data1/llma/7B" 4 | 5 | exp_name=finetune_"$data_parallel"_mp"$mp"_bsz2_accum_4_gpu8_lr_00002_warmup1_epoch3_max_len512_gorilla_torchhub_consolidate 6 | mkdir -p output/"$exp_name" 7 | 8 | python -u -m torch.distributed.launch --master_port=1112 --nproc_per_node=8 --use_env main_finetune.py \ 9 | --llama_type llama --weight_decay 0.1 --output_dir output/"$exp_name" \ 10 | --accum_iter 4 --batch_size 2 --warmup_epochs 1 --epochs 3 --lr 0.00002 --min_lr 0.000005 --clip_grad 2 \ 11 | --llama_config "$llama_path"/params.json \ 12 | --data_parallel "$data_parallel" --model_parallel_size "$mp" \ 13 | --max_words 512 --data_config configs/finetune/gorilla_th.yaml --llama_tokenizer_path "$llama_path"/tokenizer.model \ 14 | --pretrained_path "$llama_path" --pretrained_type meta_ori --checkpointing \ 15 | --save_consolidated 2>&1 | tee -a output/"$exp_name"/output.log 16 | -------------------------------------------------------------------------------- /gorilla/finetune/scripts/tools/debug_finetune_131_7B.sh: -------------------------------------------------------------------------------- 1 | load_dir="$1" 2 | save_dir="$2" 3 | 4 | exp_name=tool/get_consolidated_ckpt 5 | mkdir -p output/"$exp_name" 6 | mkdir -p "$save_dir" 7 | 8 | python -u -m torch.distributed.launch --master_port=1112 --nproc_per_node=8 --use_env \ 9 | tools/get_consolidated_ckpt.py \ 10 | --llama_type llama \ 11 | --llama_config /data1/llma/7B/params.json \ 12 | --data_parallel sdp --model_parallel_size 1 \ 13 | --load_dir "$load_dir" --save_dir "$save_dir" \ 14 | 2>&1 | tee -a output/"$exp_name"/output.log 15 | -------------------------------------------------------------------------------- /gorilla/finetune/scripts/tools/merge130.sh: -------------------------------------------------------------------------------- 1 | load_dir="$1" 2 | save_dir="$2" 3 | 4 | exp_name=tool/get_consolidated_ckpt 5 | mkdir -p output/"$exp_name" 6 | mkdir -p "$save_dir" 7 | 8 | python -u -m torch.distributed.launch --master_port=1112 --nproc_per_node=8 --use_env \ 9 | tools/get_consolidated_ckpt.py \ 10 | --llama_type llama \ 11 | --llama_config /data1/llma/7B/params.json \ 12 | --data_parallel sdp --model_parallel_size 1 \ 13 | --load_dir "$load_dir" --save_dir "$save_dir" \ 14 | 2>&1 | tee -a output/"$exp_name"/output.log 15 | -------------------------------------------------------------------------------- /gorilla/finetune/util/crop.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | import torch 10 | 11 | from torchvision import transforms 12 | from torchvision.transforms import functional as F 13 | 14 | 15 | class RandomResizedCrop(transforms.RandomResizedCrop): 16 | """ 17 | RandomResizedCrop for matching TF/TPU implementation: no for-loop is used. 18 | This may lead to results different with torchvision's version. 19 | Following BYOL's TF code: 20 | https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206 21 | """ 22 | @staticmethod 23 | def get_params(img, scale, ratio): 24 | width, height = F._get_image_size(img) 25 | area = height * width 26 | 27 | target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() 28 | log_ratio = torch.log(torch.tensor(ratio)) 29 | aspect_ratio = torch.exp( 30 | torch.empty(1).uniform_(log_ratio[0], log_ratio[1]) 31 | ).item() 32 | 33 | w = int(round(math.sqrt(target_area * aspect_ratio))) 34 | h = int(round(math.sqrt(target_area / aspect_ratio))) 35 | 36 | w = min(w, width) 37 | h = min(h, height) 38 | 39 | i = torch.randint(0, height - h + 1, size=(1,)).item() 40 | j = torch.randint(0, width - w + 1, size=(1,)).item() 41 | 42 | return i, j, h, w -------------------------------------------------------------------------------- /gorilla/finetune/util/lr_decay.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # ELECTRA https://github.com/google-research/electra 9 | # BEiT: https://github.com/microsoft/unilm/tree/master/beit 10 | # -------------------------------------------------------- 11 | 12 | import json 13 | 14 | 15 | def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75): 16 | """ 17 | Parameter groups for layer-wise lr decay 18 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58 19 | """ 20 | param_group_names = {} 21 | param_groups = {} 22 | 23 | num_layers = len(model.blocks) + 1 24 | 25 | layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1)) 26 | 27 | for n, p in model.named_parameters(): 28 | if not p.requires_grad: 29 | continue 30 | 31 | # no decay: all 1D parameters and model specific ones 32 | if p.ndim == 1 or n in no_weight_decay_list: 33 | g_decay = "no_decay" 34 | this_decay = 0. 35 | else: 36 | g_decay = "decay" 37 | this_decay = weight_decay 38 | 39 | layer_id = get_layer_id_for_vit(n, num_layers) 40 | group_name = "layer_%d_%s" % (layer_id, g_decay) 41 | 42 | if group_name not in param_group_names: 43 | this_scale = layer_scales[layer_id] 44 | 45 | param_group_names[group_name] = { 46 | "lr_scale": this_scale, 47 | "weight_decay": this_decay, 48 | "params": [], 49 | } 50 | param_groups[group_name] = { 51 | "lr_scale": this_scale, 52 | "weight_decay": this_decay, 53 | "params": [], 54 | } 55 | 56 | param_group_names[group_name]["params"].append(n) 57 | param_groups[group_name]["params"].append(p) 58 | 59 | # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2)) 60 | 61 | return list(param_groups.values()) 62 | 63 | 64 | def get_layer_id_for_vit(name, num_layers): 65 | """ 66 | Assign a parameter with its layer id 67 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33 68 | """ 69 | if name in ['cls_token', 'pos_embed']: 70 | return 0 71 | elif name.startswith('patch_embed'): 72 | return 0 73 | elif name.startswith('blocks'): 74 | return int(name.split('.')[1]) + 1 75 | else: 76 | return num_layers -------------------------------------------------------------------------------- /gorilla/finetune/util/lr_sched.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | def adjust_learning_rate(optimizer, it, args): 10 | """Decay the learning rate with half-cycle cosine after warmup""" 11 | if it < args.warmup_iters: # 1) linear warmup for warmup_iters steps 12 | lr = args.lr * it / args.warmup_iters 13 | elif it > args.lr_decay_iters: # 2) if it > lr_decay_iters, return min learning rate 14 | lr = args.min_lr 15 | else: # 3) in between, use cosine decay down to min learning rate 16 | decay_ratio = (it - args.warmup_iters) / (args.lr_decay_iters - args.warmup_iters) 17 | assert 0 <= decay_ratio <= 1 18 | coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) # coeff ranges 0..1 19 | lr = args.min_lr + (args.lr - args.min_lr) * coeff 20 | 21 | for param_group in optimizer.param_groups: 22 | if "lr_scale" in param_group: 23 | param_group["lr"] = lr * param_group["lr_scale"] 24 | else: 25 | param_group["lr"] = lr 26 | return lr 27 | 28 | 29 | def adjust_learning_rate_epoch(optimizer, epoch, args): 30 | """Decay the learning rate with half-cycle cosine after warmup""" 31 | if epoch < args.warmup_epochs: 32 | lr = args.lr * epoch / args.warmup_epochs 33 | else: 34 | lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \ 35 | (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs))) 36 | for param_group in optimizer.param_groups: 37 | if "lr_scale" in param_group: 38 | param_group["lr"] = lr * param_group["lr_scale"] 39 | else: 40 | param_group["lr"] = lr 41 | return lr 42 | -------------------------------------------------------------------------------- /gorilla/gorilla-main/.github/ISSUE_TEMPLATE/apibench.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: APIBench 3 | about: Create a report to help us improve APIBench 4 | title: "[Apibench] " 5 | labels: apibench-data 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the issue** 11 | A clear and concise description of what the issue is. 12 | 13 | **ID datapoint** 14 | 1. Datapoint permalink: (If more than one, include as a python list of strings) 15 | 2. Provider: TorchHub/HuggingFace/PyTorch Hub 16 | 2. Gorilla repo commit #: 17 | 18 | **What is the issue** 19 | 20 | **Proposed Changes** 21 | 22 | { 23 | 'previous_datapoint':[], 24 | 'updated_datapoint':[] 25 | } 26 | 27 | **Additional context** 28 | Add any other context about the problem here. 29 | -------------------------------------------------------------------------------- /gorilla/gorilla-main/.github/ISSUE_TEMPLATE/custom-template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom template 3 | about: Custom template 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /gorilla/gorilla-main/.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[feature] " 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is the feature request related to a problem?** 11 | Ex. I'm like to see [...] 12 | 13 | **Describe the solution you'd like** 14 | When I run [X], I want to see [Y] 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /gorilla/gorilla-main/.github/ISSUE_TEMPLATE/hosted-gorilla-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'Hosted Gorilla ' 3 | about: Issues when using hosted Gorilla 4 | title: "[bug] Hosted Gorilla: " 5 | labels: hosted-gorilla 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. I tried '...' on and