├── .gitignore
├── LICENSE
├── README.md
├── datasets
├── procthor_converted
│ └── README.md
└── rplan_converted
│ └── README.md
├── generations
└── README.md
├── models
└── README.md
├── procthor_dataset.py
├── procthor_dataset_convert.py
├── recipes
├── README.md
├── benchmarks
│ ├── fmbench
│ │ ├── README.md
│ │ ├── config.yml
│ │ └── img
│ │ │ ├── CFT.png
│ │ │ ├── instances.png
│ │ │ └── latency_vs_tokens.png
│ └── inference_throughput
│ │ ├── README.md
│ │ ├── cloud-api
│ │ ├── README.md
│ │ └── azure
│ │ │ ├── chat_azure_api_benchmark.py
│ │ │ ├── input.jsonl
│ │ │ ├── parameters.json
│ │ │ └── pretrained_azure_api_benchmark.py
│ │ ├── on-prem
│ │ ├── README.md
│ │ └── vllm
│ │ │ ├── chat_vllm_benchmark.py
│ │ │ ├── input.jsonl
│ │ │ ├── parameters.json
│ │ │ └── pretrained_vllm_benchmark.py
│ │ ├── requirements.txt
│ │ └── tokenizer
│ │ ├── special_tokens_map.json
│ │ ├── tokenizer.json
│ │ ├── tokenizer.model
│ │ └── tokenizer_config.json
├── code_llama
│ ├── README.md
│ ├── code_completion_example.py
│ ├── code_completion_prompt.txt
│ ├── code_infilling_example.py
│ ├── code_infilling_prompt.txt
│ └── code_instruct_example.py
├── evaluation
│ ├── README.md
│ ├── eval.py
│ ├── open_llm_eval_prep.sh
│ └── open_llm_leaderboard
│ │ ├── arc_challeneg_25shots.yaml
│ │ ├── hellaswag_10shots.yaml
│ │ ├── hellaswag_utils.py
│ │ ├── mmlu_5shots.yaml
│ │ └── winogrande_5shots.yaml
├── finetuning
│ ├── LLM_finetuning_overview.md
│ ├── README.md
│ ├── datasets
│ │ ├── README.md
│ │ └── custom_dataset.py
│ ├── finetuning.py
│ ├── huggingface_trainer
│ │ └── peft_finetuning.ipynb
│ ├── multi_node.slurm
│ ├── multigpu_finetuning.md
│ └── singlegpu_finetuning.md
├── inference
│ ├── llama_web_ui
│ │ ├── Llama2_Gradio.ipynb
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── streamlit_llama2.py
│ ├── local_inference
│ │ ├── README.md
│ │ ├── chat_completion
│ │ │ ├── chat_completion.py
│ │ │ └── chats.json
│ │ ├── inference.py
│ │ └── samsum_prompt.txt
│ └── model_servers
│ │ ├── README.md
│ │ ├── hf_text_generation_inference
│ │ ├── README.md
│ │ └── merge_lora_weights.py
│ │ ├── llama-on-prem.md
│ │ └── vllm
│ │ └── inference.py
├── llama_api_providers
│ ├── Azure_API_example
│ │ └── azure_api_example.ipynb
│ ├── OctoAI_API_examples
│ │ ├── Getting_to_know_Llama.ipynb
│ │ ├── HelloLlamaCloud.ipynb
│ │ ├── LiveData.ipynb
│ │ ├── Llama2_Gradio.ipynb
│ │ ├── RAG_Chatbot_example
│ │ │ ├── RAG_Chatbot_Example.ipynb
│ │ │ ├── data
│ │ │ │ └── Llama Getting Started Guide.pdf
│ │ │ ├── requirements.txt
│ │ │ └── vectorstore
│ │ │ │ └── db_faiss
│ │ │ │ ├── index.faiss
│ │ │ │ └── index.pkl
│ │ └── VideoSummary.ipynb
│ ├── Using_Externally_Hosted_LLMs.ipynb
│ └── examples_with_aws
│ │ ├── Prompt_Engineering_with_Llama_2_On_Amazon_Bedrock.ipynb
│ │ ├── ReAct_Llama_3_Bedrock-WK.ipynb
│ │ └── getting_started_llama_3_on_amazon_bedrock.ipynb
├── multilingual
│ ├── README.md
│ ├── extend_tokenizer.py
│ ├── imgs
│ │ ├── phase1-eval-loss.png
│ │ ├── phase1-train-loss.png
│ │ ├── phase2-eval-loss.png
│ │ └── phase2-train-loss.png
│ ├── prepare_data.py
│ └── train_tokenizer.py
├── quickstart
│ ├── Getting_to_know_Llama.ipynb
│ ├── Prompt_Engineering_with_Llama_2.ipynb
│ └── Running_Llama3_Anywhere
│ │ ├── Running_Llama_on_HF_transformers.ipynb
│ │ └── Running_Llama_on_Mac_Windows_Linux.ipynb
├── responsible_ai
│ ├── CodeShieldUsageDemo.ipynb
│ ├── Purple_Llama_Anyscale.ipynb
│ ├── Purple_Llama_OctoAI.ipynb
│ ├── README.md
│ ├── input_output_guardrails_with_llama.ipynb
│ └── llama_guard
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── inference.py
└── use_cases
│ ├── LiveData.ipynb
│ ├── RAG
│ └── HelloLlamaCloud.ipynb
│ ├── README.md
│ ├── VideoSummary.ipynb
│ ├── chatbots
│ ├── RAG_chatbot
│ │ ├── RAG_Chatbot_Example.ipynb
│ │ ├── data
│ │ │ └── Llama Getting Started Guide.pdf
│ │ ├── requirements.txt
│ │ └── vectorstore
│ │ │ ├── db_faiss
│ │ │ ├── index.faiss
│ │ │ └── index.pkl
│ │ │ └── mongodb
│ │ │ └── rag_mongodb_llama3_huggingface_open_source.ipynb
│ ├── messenger_llama
│ │ ├── llama_messenger.py
│ │ └── messenger_llama3.md
│ └── whatsapp_llama
│ │ ├── llama_chatbot.py
│ │ └── whatsapp_llama3.md
│ └── text2sql
│ ├── StructuredLlama.ipynb
│ ├── csv2db.py
│ ├── nba.txt
│ ├── nba_roster.db
│ └── txt2csv.py
├── requirements.txt
├── requirements_llama3.txt
├── rplan_dataset.py
├── rplan_dataset_convert.py
├── run_generation_procthor.py
├── run_generation_rplan.py
├── run_metric.py
└── src
├── __init__.py
├── llama_recipes
├── configs
│ ├── __init__.py
│ ├── datasets.py
│ ├── fsdp.py
│ ├── peft.py
│ ├── training.py
│ └── wandb.py
├── data
│ ├── __init__.py
│ ├── concatenator.py
│ ├── llama_guard
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── finetuning_data_formatter.py
│ │ └── finetuning_data_formatter_example.py
│ └── sampler.py
├── finetuning.py
├── finetuning_bbox.py
├── inference
│ ├── __init__.py
│ ├── chat_utils.py
│ ├── checkpoint_converter_fsdp_hf.py
│ ├── llm.py
│ ├── model_utils.py
│ ├── prompt_format_utils.py
│ └── safety_utils.py
├── model_checkpointing
│ ├── __init__.py
│ └── checkpoint_handler.py
├── policies
│ ├── __init__.py
│ ├── activation_checkpointing_functions.py
│ ├── anyprecision_optimizer.py
│ ├── mixed_precision.py
│ └── wrapping.py
├── tools
│ └── convert_hf_weights_to_llama.py
└── utils
│ ├── __init__.py
│ ├── config_utils.py
│ ├── dataset_utils.py
│ ├── flop_utils.py
│ ├── fsdp_utils.py
│ ├── hf_llama_conversion
│ ├── README.md
│ └── compare_llama_weights.py
│ ├── memory_utils.py
│ ├── plot_metrics.py
│ └── train_utils.py
├── metrics
├── __init__.py
├── file_consistency.py
├── prompt_consistency.py
└── self_consistency.py
├── pred
├── __init__.py
└── extract_output_json.py
└── utils
├── __init__.py
├── bubble_graph.py
├── eval_overall.py
├── eval_sample.py
├── fp_plot
├── __init__.py
└── procthorpy
│ ├── __init__.py
│ ├── plot.py
│ └── utils.py
├── json_check
├── __init__.py
├── schema.py
└── verify.py
├── json_repair.py
├── plot.py
├── polygon_object.py
├── process_dataset.py
└── util.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DStruct2Design: Data and Benchmarks for Data Structure Driven Generative Floor Plan Design
2 |
3 | ## Paper
4 | Our paper is available [here](https://arxiv.org/abs/2407.15723)
5 |
6 | ### if you use this repository, please cite our work:
7 | ```
8 | @misc{luo2024dstruct2designdatabenchmarksdata,
9 | title={DStruct2Design: Data and Benchmarks for Data Structure Driven Generative Floor Plan Design},
10 | author={Zhi Hao Luo and Luis Lara and Ge Ya Luo and Florian Golemo and Christopher Beckham and Christopher Pal},
11 | year={2024},
12 | eprint={2407.15723},
13 | archivePrefix={arXiv},
14 | primaryClass={cs.CL},
15 | url={https://arxiv.org/abs/2407.15723},
16 | }
17 | ```
18 |
19 | ## Getting Started
20 |
21 | These instructions will get you a copy of the project up and running on your local machine for development and testing purposes.
22 |
23 | ### Prerequisites
24 |
25 | In our paper, we train a LLama3-8B-Instruct model. Training is enabled by [llama-receipe](https://github.com/meta-llama/llama-recipes/tree/main). You can either install llama-receipe, or install from `requirement.txt`
26 |
27 | #### Install with llama-receipe:
28 | ```
29 | pip install llama-recipes
30 | ```
31 |
32 | #### Install from requirement.txt:
33 | ```
34 | pip install -r requirements.txt
35 | ```
36 |
37 | ## Datasets
38 |
39 | ### ProcTHOR
40 |
41 | You can download the converted ProcTHOR-10K dataset from [here](https://huggingface.co/datasets/ludolara/DStruct2Design) and put it under `datasets/procthor_converted/`
42 |
43 | ### RPLAN
44 |
45 | RPLAN dataset needs to be requested from its [homepage](http://staff.ustc.edu.cn/~fuxm/projects/DeepLayout/).
46 |
47 | After it's obtained, save all the data (pngs) under `datasets/rplan/`. Then run our conversion script to convert it. The converted dataset will be saved under `datasets/rplan_converted/`:
48 | ```
49 | python scripts/rplan_dataset_convert.py
50 | ```
51 |
52 | ## Pretrained Weights
53 |
54 | The pretrained PEFT LoRA weights for all of our models can be obtained:
55 |
56 | #### Weights for 4 model variants trained on RPLAN
57 | ```
58 | https://drive.google.com/file/d/1cAYlEupNUGJefNdwkNaaq7fD3X3_P46D/view?usp=sharing
59 | ```
60 |
61 | #### Weights for 3 bubble diagram model variants trained on ProcTHOR
62 | ```
63 | https://drive.google.com/file/d/16cYPK6g_Ho4VbvjvBZIGHMzNTBWzcAZT/view?usp=drive_link
64 | ```
65 |
66 |
67 | #### Weights for 3 constraint only (no bubble diagram) model variants trained on ProcTHOR
68 | ```
69 | https://drive.google.com/file/d/13k-pBmhGhYthm4WbHzrRH7WjaSKNkTpq/view?usp=drive_link
70 | ```
71 |
72 | After download, they can be un-compressed and put under their respective folder under `models/`.
73 |
74 | ## Training
75 |
76 | Alternatively, these weights can be trained from scratch with the following command:
77 |
78 | #### to train on ProcTHOR:
79 |
80 | ```
81 | python -m llama_recipes.finetuning --use_peft --peft_method lora --quantization --model_name meta-llama/Meta-Llama-3-8B-Instruct --batch_size_training 2 --num_epochs 8 --dataset "custom_dataset" --custom_dataset.file "procthor_dataset.py" --use_wandb False --wandb_config.project "floorplans" --output_dir procthor --exprm $EXPRM_VAR --load_peft False --ds_version $BD_VAR --load_peft False
82 | ```
83 |
84 | here `$BD_VAR` and `$EXPRM_VAR` indicate the model variants to be trained as explained in Section 6.1 of our paper.
85 |
86 | `$BD_VAR` can be set to either `'bd'` or `'non_bd'`,
87 |
88 | `$EXPRM_VAR` can be set to `'specific'`, `'mask'`, or `'preset_mask'`
89 |
90 | #### to train on RPLAN:
91 |
92 | ```
93 | python -m llama_recipes.finetuning --use_peft --peft_method lora --quantization --model_name meta-llama/Meta-Llama-3-8B-Instruct --batch_size_training 2 --num_epochs 5 --dataset "custom_dataset" --custom_dataset.file "rplan_dataset.py" --use_wandb False --wandb_config.project "floorplans" --output_dir rplan --exprm $EXPRM_VAR$ --load_peft False
94 | ```
95 |
96 | for RPLAN, the model variant is decided by just `$EXPRM_VAR`.
97 |
98 | `$EXPRM_VAR` can be 1 of `'5R'`, `'6R'`, `'7R'`, or `'8R'`. The differences between these variants are explained in Section 6.1 of our paper.
99 |
100 | ## Inference
101 |
102 | To run genneration after the pretrained weights are obtained, do the following:
103 |
104 | (note that you can run greedy or sample generations. In our experiments, we use both, and sampling is done with `num_samples` set to 20).
105 |
106 | #### To run generation on PROCTHOR-trained models:
107 |
108 | ```
109 | python run_generation_procthor.py --exprm $EXPRM_VAR --num_samples 1 --version $BD_VAR
110 | ```
111 |
112 | `$BD_VAR` can be set to either `'bd'` or `'non_bd'`,
113 |
114 | `$EXPRM_VAR` can be set to `'specific'`, `'mask'`, or `'preset_mask'`
115 |
116 | It will load the trained model variant according to the variable.
117 |
118 |
119 | #### To run generation on RPLAN-trained models:
120 |
121 | ```
122 | python run_generation_rplan.py --exprm $EXPRM_VAR --num_samples 1
123 | ```
124 |
125 | `$EXPRM_VAR` can be 1 of `'5R'`, `'6R'`, `'7R'`, or `'8R'`.
126 |
127 | It will load the trained model variant according to the variable.
128 |
129 | ## Evaluation
130 |
131 | To evaluate generated results saved in `$RESULTS_DIR`, simply run the following command:
132 |
133 | ```
134 | python run_metric.py $RESULTS_DIR
135 | ```
136 |
137 |
--------------------------------------------------------------------------------
/datasets/procthor_converted/README.md:
--------------------------------------------------------------------------------
1 | converted ProcTHOR dataset goes here
--------------------------------------------------------------------------------
/datasets/rplan_converted/README.md:
--------------------------------------------------------------------------------
1 | Converted RPLAN dataset goes here
--------------------------------------------------------------------------------
/generations/README.md:
--------------------------------------------------------------------------------
1 | ## Usage
2 |
3 | Generated samples should be stored in this folder in order to be evaluated.
--------------------------------------------------------------------------------
/models/README.md:
--------------------------------------------------------------------------------
1 | ## Usage
2 |
3 | Store LoRA weights here.
4 |
5 | Our pretrained weights go here as well.
6 |
7 | for our pretrained weights, structure should be the following for easy save/load access and inference:
8 |
9 | models/
10 | |
11 | |---procthor_weights_BD_variants/
12 | |---full_prompt/
13 | |---mask/
14 | |---preset_mask/
15 | |
16 | |---procthor_weights_nonBD_variants/
17 | |---full_prompt/
18 | |---mask/
19 | |---preset_mask/
20 | |
21 | |---rplan/
22 | |---5R/
23 | |---6R/
24 | |---7R/
25 | |---8R/
26 |
--------------------------------------------------------------------------------
/recipes/README.md:
--------------------------------------------------------------------------------
1 | This folder contains examples organized by topic:
2 |
3 | | Subfolder | Description |
4 | |---|---|
5 | [quickstart](./quickstart)|The "Hello World" of using Llama2, start here if you are new to using Llama2
6 | [multilingual](./multilingual)|Scripts to add a new language to Llama2
7 | [finetuning](./finetuning)|Scripts to finetune Llama2 on single-GPU and multi-GPU setups
8 | [inference](./inference)|Scripts to deploy Llama2 for inference locally and using model servers
9 | [use_cases](./use_cases)|Scripts showing common applications of Llama2
10 | [responsible_ai](./responsible_ai)|Scripts to use PurpleLlama for safeguarding model outputs
11 | [llama_api_providers](./llama_api_providers)|Scripts to run inference on Llama via hosted endpoints
12 | [benchmarks](./benchmarks)|Scripts to benchmark Llama 2 models inference on various backends
13 | [code_llama](./code_llama)|Scripts to run inference with the Code Llama models
14 | [evaluation](./evaluation)|Scripts to evaluate fine-tuned Llama2 models using `lm-evaluation-harness` from `EleutherAI`
15 |
16 |
17 | **Note on using Replicate**
18 | To run some of the demo apps here, you'll need to first sign in with Replicate with your github account, then create a free API token [here](https://replicate.com/account/api-tokens) that you can use for a while. After the free trial ends, you'll need to enter billing info to continue to use Llama2 hosted on Replicate - according to Replicate's [Run time and cost](https://replicate.com/meta/llama-2-13b-chat) for the Llama2-13b-chat model used in our demo apps, the model "costs $0.000725 per second. Predictions typically complete within 10 seconds." This means each call to the Llama2-13b-chat model costs less than $0.01 if the call completes within 10 seconds. If you want absolutely no costs, you can refer to the section "Running Llama2 locally on Mac" above or the "Running Llama2 in Google Colab" below.
19 |
20 | **Note on using OctoAI**
21 | You can also use [OctoAI](https://octo.ai/) to run some of the Llama demos under [OctoAI_API_examples](./llama_api_providers/OctoAI_API_examples/). You can sign into OctoAI with your Google or GitHub account, which will give you $10 of free credits you can use for a month. Llama2 on OctoAI is priced at [$0.00086 per 1k tokens](https://octo.ai/pricing/) (a ~350-word LLM response), so $10 of free credits should go a very long way (about 10,000 LLM inferences).
22 |
23 | ### [Running Llama2 in Google Colab](https://colab.research.google.com/drive/1-uBXt4L-6HNS2D8Iny2DwUpVS4Ub7jnk?usp=sharing)
24 | To run Llama2 in Google Colab using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), download the quantized Llama2-7b-chat model [here](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf), or follow the instructions above to build it, before uploading it to your Google drive. Note that on the free Colab T4 GPU, the call to Llama could take more than 20 minutes to return; running the notebook locally on M1 MBP takes about 20 seconds.
25 |
--------------------------------------------------------------------------------
/recipes/benchmarks/fmbench/img/CFT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/benchmarks/fmbench/img/CFT.png
--------------------------------------------------------------------------------
/recipes/benchmarks/fmbench/img/instances.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/benchmarks/fmbench/img/instances.png
--------------------------------------------------------------------------------
/recipes/benchmarks/fmbench/img/latency_vs_tokens.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/benchmarks/fmbench/img/latency_vs_tokens.png
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/README.md:
--------------------------------------------------------------------------------
1 | # Inference Throughput Benchmarks
2 | In this folder we provide a series of benchmark scripts that apply a throughput analysis for Llama 2 models inference on various backends:
3 | * On-prem - Popular serving frameworks and containers (i.e. vLLM)
4 | * [**WIP**]Cloud API - Popular API services (i.e. Azure Model-as-a-Service)
5 | * [**WIP**]On-device - Popular on-device inference solutions on Android and iOS (i.e. mlc-llm, QNN)
6 | * [**WIP**]Optimization - Popular optimization solutions for faster inference and quantization (i.e. AutoAWQ)
7 |
8 | # Why
9 | There are three major reasons we want to run these benchmarks and share them with our Llama community:
10 | * Provide inference throughput analysis based on real world situation to help you select the best service or deployment for your scenario
11 | * Provide a baseline measurement for validating various optimization solutions on different backends, so we can provide guidance on which solutions work best for your scenario
12 | * Encourage the community to develop benchmarks on top of our works, so we can better quantify the latest proposed solutions combined with current popular frameworks, especially in this crazy fast-moving area
13 |
14 | # Parameters
15 | Here are the parameters (if applicable) that you can configure for running the benchmark:
16 | * **PROMPT** - Prompt sent in for inference (configure the length of prompt, choose from 5, 25, 50, 100, 500, 1k and 2k)
17 | * **MAX_NEW_TOKENS** - Max number of tokens generated
18 | * **CONCURRENT_LEVELS** - Max number of concurrent requests
19 | * **MODEL_PATH** - Model source
20 | * **MODEL_HEADERS** - Request headers
21 | * **SAFE_CHECK** - Content safety check (either Azure service or simulated latency)
22 | * **THRESHOLD_TPS** - Threshold TPS (threshold for tokens per second below which we deem the query to be slow)
23 | * **TOKENIZER_PATH** - Tokenizer source
24 | * **RANDOM_PROMPT_LENGTH** - Random prompt length (for pretrained models)
25 | * **NUM_GPU** - Number of GPUs for request dispatch among multiple containers
26 | * **TEMPERATURE** - Temperature for inference
27 | * **TOP_P** - Top_p for inference
28 | * **MODEL_ENDPOINTS** - Container endpoints
29 | * Model parallelism or model replicas - Load one model into multiple GPUs or multiple model replicas on one instance. More detail in the README files for specific containers.
30 |
31 | You can also configure other model hyperparameters as part of the request payload.
32 | All these parameters are stored in ```parameter.json``` and real prompts are stored in ```input.jsonl```. Running the script will load these configurations.
33 |
34 |
35 |
36 | # Metrics
37 | The benchmark will report these metrics per instance:
38 | * Number of concurrent requests
39 | * P50 Latency(ms)
40 | * P99 Latency(ms)
41 | * Request per second (RPS)
42 | * Output tokens per second
43 | * Output tokens per second per GPU
44 | * Input tokens per second
45 | * Input tokens per second per GPU
46 | * Average tokens per second per request
47 |
48 | We intend to add these metrics in the future:
49 | * Time to first token (TTFT)
50 |
51 | The benchmark result will be displayed in the terminal output and saved as a CSV file (```performance_metrics.csv```) which you can export to spreadsheets.
52 |
53 | # Getting Started
54 | Please follow the ```README.md``` in each subfolder for instructions on how to setup and run these benchmarks.
55 |
56 |
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/cloud-api/README.md:
--------------------------------------------------------------------------------
1 | # Llama-Cloud-API-Benchmark
2 | This folder contains code to run inference benchmark for Llama 2 models on cloud API with popular cloud service providers. The benchmark will focus on overall inference **throughput** for querying the API endpoint for output generation with different level of concurrent requests. Remember that to send queries to the API endpoint, you are required to acquire subscriptions with the cloud service providers and there will be a fee associated with it.
3 |
4 | Disclaimer - The purpose of the code is to provide a configurable setup to measure inference throughput. It is not a representative of the performance of these API services and we do not plan to make comparisons between different API providers.
5 |
6 |
7 | # Azure - Getting Started
8 | To get started, there are certain steps we need to take to deploy the models:
9 |
10 |
11 | * Register for a valid Azure account with subscription [here](https://azure.microsoft.com/en-us/free/search/?ef_id=_k_CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE_k_&OCID=AIDcmm5edswduu_SEM__k_CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE_k_&gad_source=1&gclid=CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE)
12 |
13 | * Take a quick look on what is the [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home) and navigate to the website from the link in the article
14 | * Follow the demos in the article to create a project and [resource](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal) group, or you can also follow the guide [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-llama?tabs=azure-studio)
15 | * Select Llama models from Model catalog
16 | * Deploy with "Pay-as-you-go"
17 |
18 | Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.
19 | For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-llama?tabs=azure-studio) for model deployment and inference.
20 |
21 | Now, replace the endpoint url and API key in ```azure/parameters.json```. For parameter `MODEL_ENDPOINTS`, with chat models the suffix should be `v1/chat/completions` and with pretrained models the suffix should be `v1/completions`.
22 | Note that the API endpoint might implemented a rate limit for token generation in certain amount of time. If you encountered the error, you can try reduce `MAX_NEW_TOKEN` or start with smaller `CONCURRENT_LEVELs`.
23 |
24 | Once everything configured, to run chat model benchmark:
25 | ```python chat_azure_api_benchmark.py```
26 |
27 | To run pretrained model benchmark:
28 | ```python pretrained_azure_api_benchmark.py```
29 |
30 | Once finished, the result will be written into a CSV file in the same directory, which can be later imported into dashboard of your choice.
31 |
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/cloud-api/azure/parameters.json:
--------------------------------------------------------------------------------
1 | {
2 | "MAX_NEW_TOKEN" : 256,
3 | "CONCURRENT_LEVELS" : [1, 2, 4, 8, 16, 32, 64],
4 | "THRESHOLD_TPS" : 7,
5 | "TOKENIZER_PATH" : "../../tokenizer",
6 | "RANDOM_PROMPT_LENGTH" : 1000,
7 | "TEMPERATURE" : 0.6,
8 | "TOP_P" : 0.9,
9 | "MODEL_ENDPOINTS" : "https://your-endpoint.inference.ai.azure.com/v1/completions",
10 | "API_KEY" : "your-auth-key",
11 | "SYS_PROMPT" : "You are a helpful assistant."
12 | }
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/on-prem/README.md:
--------------------------------------------------------------------------------
1 | # Llama-On-Prem-Benchmark
2 | This folder contains code to run inference benchmark for Llama 2 models on-prem with popular serving frameworks.
3 | The benchmark will focus on overall inference **throughput** for running containers on one instance (single or multiple GPUs) that you can acquire from cloud service providers such as Azure and AWS. You can also run this benchmark on local laptop or desktop.
4 | We support benchmark on these serving framework:
5 | * [vLLM](https://github.com/vllm-project/vllm)
6 |
7 |
8 | # vLLM - Getting Started
9 |
10 | To get started, we first need to deploy containers on-prem as a API host. Follow the guidance [here](../../../inference/model_servers/llama-on-prem.md#setting-up-vllm-with-llama-2) to deploy vLLM on-prem.
11 |
12 | Note that in common scenario which overall throughput is important, we suggest you prioritize deploying as many model replicas as possible to reach higher overall throughput and request-per-second (RPS), comparing to deploy one model container among multiple GPUs for model parallelism. Additionally, as deploying multiple model replicas, there is a need for a higher level wrapper to handle the load balancing which here has been simulated in the benchmark scripts.
13 | For example, we have an instance from Azure that has 8xA100 80G GPUs, and we want to deploy the Llama 2 70B chat model, which is around 140GB with FP16. So for deployment we can do:
14 | * 1x70B model parallel on 8 GPUs, each GPU RAM takes around 17.5GB for loading model weights.
15 | * 2x70B models each use 4 GPUs, each GPU RAM takes around 35GB for loading model weights.
16 | * 4x70B models each use 2 GPUs, each GPU RAM takes around 70GB for loading model weights. (Preferred configuration for max overall throughput. Note that you will have 4 endpoints hosted on different ports and the benchmark script will route requests into each model equally)
17 |
18 | Here are examples for deploying 2x70B chat models over 8 GPUs with vLLM.
19 | ```
20 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-70b-chat-hf --tensor-parallel-size 4 --disable-log-requests --port 8000
21 | CUDA_VISIBLE_DEVICES=4,5,6,7 python -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-70b-chat-hf --tensor-parallel-size 4 --disable-log-requests --port 8001
22 | ```
23 | Once you have finished deployment, you can use the command below to run benchmark scripts in a separate terminal.
24 |
25 | ```
26 | python chat_vllm_benchmark.py
27 | ```
28 |
29 | If you are going to use [Azure AI content check](https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety), then you should install dependencies as shown below in your terminal:
30 |
31 | ```
32 | pip install azure-ai-contentsafety azure-core
33 | ```
34 | Besides chat models, we also provide benchmark scripts for running pretrained models for text completion tasks. To better simulate the real traffic, we generate configurable random token prompt as input. In this process, we select vocabulary that is longer than 2 tokens so the generated words are closer to the English, rather than symbols.
35 | However, random token prompts can't be applied for chat model benchmarks, since the chat model expects a valid question. By feeding random prompts, chat models rarely provide answers that is meeting our ```MAX_NEW_TOKEN``` requirement, defeating the purpose of running throughput benchmarks. Hence for chat models, the questions are copied over to form long inputs such as for 2k and 4k inputs.
36 | To run pretrained model benchmark, follow the command below.
37 | ```
38 | python pretrained_vllm_benchmark.py
39 | ```
40 |
41 |
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/on-prem/vllm/parameters.json:
--------------------------------------------------------------------------------
1 | {
2 | "MAX_NEW_TOKENS" : 256,
3 | "CONCURRENT_LEVELS" : [1, 2, 4, 8, 16, 32, 64, 128, 256],
4 | "MODEL_PATH" : "meta-llama/Llama-2-7b-chat-hf",
5 | "MODEL_HEADERS" : {"Content-Type": "application/json"},
6 | "SAFE_CHECK" : true,
7 | "THRESHOLD_TPS" : 7,
8 | "TOKENIZER_PATH" : "../../tokenizer",
9 | "RANDOM_PROMPT_LENGTH" : 1000,
10 | "TEMPERATURE" : 0.6,
11 | "TOP_P" : 0.9,
12 | "MODEL_ENDPOINTS" : [
13 | "http://localhost:8000/v1/chat/completions"
14 | ]
15 | }
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | requests
3 | azure-core
4 | azure-ai-contentsafety
5 | torch
6 |
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/tokenizer/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {
2 | "bos_token": {
3 | "content": "",
4 | "lstrip": false,
5 | "normalized": true,
6 | "rstrip": false,
7 | "single_word": false
8 | },
9 | "eos_token": {
10 | "content": "",
11 | "lstrip": false,
12 | "normalized": true,
13 | "rstrip": false,
14 | "single_word": false
15 | },
16 | "unk_token": {
17 | "content": "",
18 | "lstrip": false,
19 | "normalized": true,
20 | "rstrip": false,
21 | "single_word": false
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/tokenizer/tokenizer.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/benchmarks/inference_throughput/tokenizer/tokenizer.model
--------------------------------------------------------------------------------
/recipes/benchmarks/inference_throughput/tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "add_bos_token": true,
3 | "add_eos_token": false,
4 | "bos_token": {
5 | "__type": "AddedToken",
6 | "content": "",
7 | "lstrip": false,
8 | "normalized": true,
9 | "rstrip": false,
10 | "single_word": false
11 | },
12 | "clean_up_tokenization_spaces": false,
13 | "eos_token": {
14 | "__type": "AddedToken",
15 | "content": "",
16 | "lstrip": false,
17 | "normalized": true,
18 | "rstrip": false,
19 | "single_word": false
20 | },
21 | "legacy": true,
22 | "use_default_system_prompt": false,
23 | "model_max_length": 1000000000000000019884624838656,
24 | "pad_token": null,
25 | "sp_model_kwargs": {},
26 | "tokenizer_class": "LlamaTokenizerFast",
27 | "unk_token": {
28 | "__type": "AddedToken",
29 | "content": "",
30 | "lstrip": false,
31 | "normalized": true,
32 | "rstrip": false,
33 | "single_word": false
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/recipes/code_llama/README.md:
--------------------------------------------------------------------------------
1 | # Code Llama
2 |
3 | Code llama was recently released with three flavors, base-model that support multiple programming languages, Python fine-tuned model and an instruction fine-tuned and aligned variation of Code Llama, please read more [here](https://ai.meta.com/blog/code-llama-large-language-model-coding/). Also note that the Python fine-tuned model and 34B models are not trained on infilling objective, hence can not be used for infilling use-case.
4 |
5 | Find the scripts to run Code Llama, where there are two examples of running code completion and infilling.
6 |
7 | **Note** Please find the right model on HF side [here](https://huggingface.co/codellama).
8 |
9 | Make sure to install Transformers from source for now
10 |
11 | ```bash
12 |
13 | pip install git+https://github.com/huggingface/transformers
14 |
15 | ```
16 |
17 | To run the code completion example:
18 |
19 | ```bash
20 |
21 | python code_completion_example.py --model_name MODEL_NAME --prompt_file code_completion_prompt.txt --temperature 0.2 --top_p 0.9
22 |
23 | ```
24 |
25 | To run the code infilling example:
26 |
27 | ```bash
28 |
29 | python code_infilling_example.py --model_name MODEL_NAME --prompt_file code_infilling_prompt.txt --temperature 0.2 --top_p 0.9
30 |
31 | ```
32 | To run the 70B Instruct model example run the following (you'll need to enter the system and user prompts to instruct the model):
33 |
34 | ```bash
35 |
36 | python code_instruct_example.py --model_name codellama/CodeLlama-70b-Instruct-hf --temperature 0.2 --top_p 0.9
37 |
38 | ```
39 | You can learn more about the chat prompt template [on HF](https://huggingface.co/codellama/CodeLlama-70b-Instruct-hf#chat-prompt) and [original Code Llama repository](https://github.com/facebookresearch/codellama/blob/main/README.md#fine-tuned-instruction-models). HF tokenizer has already taken care of the chat template as shown in this example.
40 |
--------------------------------------------------------------------------------
/recipes/code_llama/code_completion_example.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | # from accelerate import init_empty_weights, load_checkpoint_and_dispatch
5 |
6 | import fire
7 | import os
8 | import sys
9 | import time
10 |
11 | import torch
12 | from transformers import AutoTokenizer
13 |
14 | from llama_recipes.inference.safety_utils import get_safety_checker
15 | from llama_recipes.inference.model_utils import load_model, load_peft_model
16 |
17 |
18 | def main(
19 | model_name,
20 | peft_model: str=None,
21 | quantization: bool=False,
22 | max_new_tokens =100, #The maximum numbers of tokens to generate
23 | prompt_file: str=None,
24 | seed: int=42, #seed value for reproducibility
25 | do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise.
26 | min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens
27 | use_cache: bool=True, #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.
28 | top_p: float=0.9, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
29 | temperature: float=0.6, # [optional] The value used to modulate the next token probabilities.
30 | top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering.
31 | repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty.
32 | length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation.
33 | enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api
34 | enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs
35 | enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5
36 | enable_llamaguard_content_safety: bool=False, # Enable safety check with Llama-Guard
37 | use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
38 | **kwargs
39 | ):
40 | if prompt_file is not None:
41 | assert os.path.exists(
42 | prompt_file
43 | ), f"Provided Prompt file does not exist {prompt_file}"
44 | with open(prompt_file, "r") as f:
45 | user_prompt = f.read()
46 | else:
47 | print("No user prompt provided. Exiting.")
48 | sys.exit(1)
49 |
50 | # Set the seeds for reproducibility
51 | torch.cuda.manual_seed(seed)
52 | torch.manual_seed(seed)
53 |
54 | model = load_model(model_name, quantization, use_fast_kernels)
55 | if peft_model:
56 | model = load_peft_model(model, peft_model)
57 |
58 | model.eval()
59 |
60 | tokenizer = AutoTokenizer.from_pretrained(model_name)
61 | safety_checker = get_safety_checker(enable_azure_content_safety,
62 | enable_sensitive_topics,
63 | enable_salesforce_content_safety,
64 | enable_llamaguard_content_safety,
65 | )
66 |
67 | # Safety check of the user prompt
68 | safety_results = [check(user_prompt) for check in safety_checker]
69 | are_safe = all([r[1] for r in safety_results])
70 | if are_safe:
71 | print("User prompt deemed safe.")
72 | print(f"User prompt:\n{user_prompt}")
73 | else:
74 | print("User prompt deemed unsafe.")
75 | for method, is_safe, report in safety_results:
76 | if not is_safe:
77 | print(method)
78 | print(report)
79 | print("Skipping the inference as the prompt is not safe.")
80 | sys.exit(1) # Exit the program with an error status
81 |
82 | batch = tokenizer(user_prompt, return_tensors="pt")
83 |
84 | batch = {k: v.to("cuda") for k, v in batch.items()}
85 | start = time.perf_counter()
86 | with torch.no_grad():
87 | outputs = model.generate(
88 | **batch,
89 | max_new_tokens=max_new_tokens,
90 | do_sample=do_sample,
91 | top_p=top_p,
92 | temperature=temperature,
93 | min_length=min_length,
94 | use_cache=use_cache,
95 | top_k=top_k,
96 | repetition_penalty=repetition_penalty,
97 | length_penalty=length_penalty,
98 | **kwargs
99 | )
100 | e2e_inference_time = (time.perf_counter()-start)*1000
101 | print(f"the inference time is {e2e_inference_time} ms")
102 | output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
103 |
104 | # Safety check of the model output
105 | safety_results = [check(output_text) for check in safety_checker]
106 | are_safe = all([r[1] for r in safety_results])
107 | if are_safe:
108 | print("User input and model output deemed safe.")
109 | print(f"Model output:\n{output_text}")
110 | else:
111 | print("Model output deemed unsafe.")
112 | for method, is_safe, report in safety_results:
113 | if not is_safe:
114 | print(method)
115 | print(report)
116 |
117 |
118 | if __name__ == "__main__":
119 | fire.Fire(main)
120 |
--------------------------------------------------------------------------------
/recipes/code_llama/code_completion_prompt.txt:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | def main(string: str):
4 | print(string)
5 | print(string[::-1])
6 |
7 | if __name__ == "__main__":
--------------------------------------------------------------------------------
/recipes/code_llama/code_infilling_example.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | # from accelerate import init_empty_weights, load_checkpoint_and_dispatch
5 |
6 | import fire
7 | import torch
8 | import os
9 | import sys
10 | import time
11 |
12 | from transformers import AutoTokenizer
13 |
14 | from llama_recipes.inference.safety_utils import get_safety_checker
15 | from llama_recipes.inference.model_utils import load_model, load_peft_model
16 |
17 | def main(
18 | model_name,
19 | peft_model: str=None,
20 | quantization: bool=False,
21 | max_new_tokens =100, #The maximum numbers of tokens to generate
22 | prompt_file: str=None,
23 | seed: int=42, #seed value for reproducibility
24 | do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise.
25 | min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens
26 | use_cache: bool=True, #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.
27 | top_p: float=0.9, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
28 | temperature: float=0.6, # [optional] The value used to modulate the next token probabilities.
29 | top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering.
30 | repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty.
31 | length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation.
32 | enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api
33 | enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs
34 | enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5
35 | enable_llamaguard_content_safety: bool=False, # Enable safety check with Llama-Guard
36 | use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
37 | **kwargs
38 | ):
39 | if prompt_file is not None:
40 | assert os.path.exists(
41 | prompt_file
42 | ), f"Provided Prompt file does not exist {prompt_file}"
43 | with open(prompt_file, "r") as f:
44 | user_prompt = f.read()
45 | else:
46 | print("No user prompt provided. Exiting.")
47 | sys.exit(1)
48 | # Set the seeds for reproducibility
49 | torch.cuda.manual_seed(seed)
50 | torch.manual_seed(seed)
51 |
52 | model = load_model(model_name, quantization, use_fast_kernels)
53 | model.config.tp_size=1
54 | if peft_model:
55 | model = load_peft_model(model, peft_model)
56 |
57 | model.eval()
58 |
59 | tokenizer = AutoTokenizer.from_pretrained(model_name)
60 |
61 | safety_checker = get_safety_checker(enable_azure_content_safety,
62 | enable_sensitive_topics,
63 | enable_salesforce_content_safety,
64 | enable_llamaguard_content_safety,
65 | )
66 |
67 | # Safety check of the user prompt
68 | safety_results = [check(user_prompt) for check in safety_checker]
69 | are_safe = all([r[1] for r in safety_results])
70 | if are_safe:
71 | print("User prompt deemed safe.")
72 | print(f"User prompt:\n{user_prompt}")
73 | else:
74 | print("User prompt deemed unsafe.")
75 | for method, is_safe, report in safety_results:
76 | if not is_safe:
77 | print(method)
78 | print(report)
79 | print("Skipping the inference as the prompt is not safe.")
80 | sys.exit(1) # Exit the program with an error status
81 |
82 | batch = tokenizer(user_prompt, return_tensors="pt")
83 | batch = {k: v.to("cuda") for k, v in batch.items()}
84 |
85 | start = time.perf_counter()
86 | with torch.no_grad():
87 | outputs = model.generate(
88 | **batch,
89 | max_new_tokens=max_new_tokens,
90 | do_sample=do_sample,
91 | top_p=top_p,
92 | temperature=temperature,
93 | min_length=min_length,
94 | use_cache=use_cache,
95 | top_k=top_k,
96 | repetition_penalty=repetition_penalty,
97 | length_penalty=length_penalty,
98 | **kwargs
99 | )
100 | e2e_inference_time = (time.perf_counter()-start)*1000
101 | print(f"the inference time is {e2e_inference_time} ms")
102 | filling = tokenizer.batch_decode(outputs[:, batch["input_ids"].shape[1]:], skip_special_tokens=True)[0]
103 | # Safety check of the model output
104 | safety_results = [check(filling) for check in safety_checker]
105 | are_safe = all([r[1] for r in safety_results])
106 | if are_safe:
107 | print("User input and model output deemed safe.")
108 | print(user_prompt.replace("", filling))
109 | else:
110 | print("Model output deemed unsafe.")
111 | for method, is_safe, report in safety_results:
112 | if not is_safe:
113 | print(method)
114 | print(report)
115 |
116 |
117 | if __name__ == "__main__":
118 | fire.Fire(main)
119 |
--------------------------------------------------------------------------------
/recipes/code_llama/code_infilling_prompt.txt:
--------------------------------------------------------------------------------
1 | def remove_non_ascii(s: str) -> str:
2 | """
3 | return result
4 |
--------------------------------------------------------------------------------
/recipes/evaluation/open_llm_eval_prep.sh:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | #!/bin/bash
5 |
6 | # Prompt the user for the EVAL_PATH
7 | read -p "Enter the asbolute path to the lm-evaluation-harness: " EVAL_PATH
8 | conda activate
9 | # Directory containing YAML files
10 | DIR="open_llm_leaderboard"
11 |
12 | # Check if the directory exists
13 | if [ ! -d "$DIR" ]; then
14 | echo "Error: Directory '$DIR' not found."
15 | exit 1
16 | fi
17 |
18 | # Iterate over YAML files in the directory and update them
19 | for YAML_FILE in "$DIR"/*.yaml
20 | do
21 | if [ -f "$YAML_FILE" ]; then
22 | sed -i 's|{\$EVAL_PATH}|'"$EVAL_PATH"'|g' "$YAML_FILE"
23 | echo "Updated $YAML_FILE with EVAL_PATH: $EVAL_PATH"
24 | fi
25 | done
26 |
--------------------------------------------------------------------------------
/recipes/evaluation/open_llm_leaderboard/arc_challeneg_25shots.yaml:
--------------------------------------------------------------------------------
1 | include: {$EVAL_PATH}/lm_eval/tasks/arc/arc_challenge.yaml
2 | task: arc_challenge_25_shot
3 | task_alias: arc 25 shot
4 | num_fewshot: 25
5 | metric_list:
6 | - metric: acc_norm
7 |
--------------------------------------------------------------------------------
/recipes/evaluation/open_llm_leaderboard/hellaswag_10shots.yaml:
--------------------------------------------------------------------------------
1 | include: {$EVAL_PATH}/lm_eval/tasks/hellaswag/hellaswag.yaml
2 | task: hellaswag_10_shot
3 | task_alias: hellaswag 10 shot
4 | num_fewshot: 10
5 | metric_list:
6 | - metric: acc_norm
7 |
--------------------------------------------------------------------------------
/recipes/evaluation/open_llm_leaderboard/hellaswag_utils.py:
--------------------------------------------------------------------------------
1 | import datasets
2 | import re
3 |
4 |
5 | def preprocess(text):
6 | text = text.strip()
7 | # NOTE: Brackets are artifacts of the WikiHow dataset portion of HellaSwag.
8 | text = text.replace(" [title]", ". ")
9 | text = re.sub("\\[.*?\\]", "", text)
10 | text = text.replace(" ", " ")
11 | return text
12 |
13 |
14 | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
15 | def _process_doc(doc):
16 | ctx = doc["ctx_a"] + " " + doc["ctx_b"].capitalize()
17 | out_doc = {
18 | "query": preprocess(doc["activity_label"] + ": " + ctx),
19 | "choices": [preprocess(ending) for ending in doc["endings"]],
20 | "gold": int(doc["label"]),
21 | }
22 | return out_doc
23 |
24 | return dataset.map(_process_doc)
25 |
--------------------------------------------------------------------------------
/recipes/evaluation/open_llm_leaderboard/mmlu_5shots.yaml:
--------------------------------------------------------------------------------
1 | include: {$EVAL_PATH}/lm_eval/tasks/mmlu/default/_mmlu.yaml
2 | task:
3 | - mmlu_stem
4 | - mmlu_other
5 | - mmlu_social_sciences
6 | - mmlu_humanities
7 | num_fewshot: 5
8 | metric_list:
9 | - metric: acc
--------------------------------------------------------------------------------
/recipes/evaluation/open_llm_leaderboard/winogrande_5shots.yaml:
--------------------------------------------------------------------------------
1 | include: {$EVAL_PATH}/lm_eval/tasks/winogrande/default.yaml
2 | task: winogrande_5_shot
3 | task_alias: winogrande 5 shot
4 | num_fewshot: 5
5 | metric_list:
6 | - metric: acc
7 |
--------------------------------------------------------------------------------
/recipes/finetuning/LLM_finetuning_overview.md:
--------------------------------------------------------------------------------
1 | ## LLM Fine-Tuning
2 |
3 | Here we discuss fine-tuning Meta Llama 3 with a couple of different recipes. We will cover two scenarios here:
4 |
5 |
6 | ## 1. **Parameter Efficient Model Fine-Tuning**
7 | This helps make the fine-tuning process more affordable even on 1 consumer grade GPU. These methods enable us to keep the whole model frozen and to just add tiny learnable parameters/ layers into the model. In this way, we just train a very tiny portion of the parameters. The most famous method in this category is [LORA](https://arxiv.org/pdf/2106.09685.pdf), Llama Adapter and Prefix-tuning.
8 |
9 |
10 | These methods will address three aspects:
11 |
12 |
13 | - **Cost of full fine-tuning** – these methods only train a small set of extra parameters instead of the full model, this makes it possible to run these on consumer GPUs.
14 |
15 | - **Cost of deployment** – for each fine-tuned downstream model we need to deploy a separate model; however, when using these methods, only a small set of parameters (few MB instead of several GBs) of the pretrained model can do the job. In this case, for each task we only add these extra parameters on top of the pretrained model so pretrained models can be assumed as backbone and these parameters as heads for the model on different tasks.
16 |
17 | - **Catastrophic forgetting** — these methods also help with forgetting the first task that can happen in finetuning.
18 |
19 | HF [PEFT](https://github.com/huggingface/peft) library provides an easy way of using these methods which we make use of here. Please read more [here](https://huggingface.co/blog/peft).
20 |
21 |
22 |
23 | ## 2. **Full/ Partial Parameter Fine-Tuning**
24 |
25 | Full parameter fine-tuning has its own advantages, in this method there are multiple strategies that can help:
26 |
27 | - Keep the pretrained model frozen and only fine-tune the task head for example, the classifier model.
28 |
29 |
30 | - Keep the pretrained model frozen and add a few fully connected layers on the top.
31 |
32 |
33 | - Fine-tuning on all the layers.
34 |
35 | You can also keep most of the layers frozen and only fine-tune a few layers. There are many different techniques to choose from to freeze/unfreeze layers based on different criteria.
36 |
37 |
42 |
43 |
44 |
45 | In this scenario depending on the model size, you might need to go beyond one GPU, especially if your model does not fit into one GPU for training. In this case Meta Llama 3 8B parameter won't fit into one gpu.
46 | The way you want to think about it is, you would need enough GPU memory to keep model parameters, gradients and optimizer states. Where each of these, depending on the precision you are training, can take up multiple times of your parameter count x precision( depending on if its fp32/ 4 bytes, fp16/2 bytes/ bf16/2 bytes).
47 | For example AdamW optimizer keeps 2 parameters for each of your parameters and in many cases these are kept in fp32. This implies that depending on how many layers you are training/ unfreezing your GPU memory can grow beyond one GPU.
48 |
49 | **FSDP (Fully Sharded Data Parallel)**
50 |
51 |
52 | Pytorch has the FSDP package for training models that do not fit into one GPU. FSDP lets you train a much larger model with the same amount of resources. Prior to FSDP was DDP (Distributed Data Parallel) where each GPU was holding a full replica of the model and would only shard the data. At the end of backward pass it would sync up the gradients.
53 |
54 | FSDP extends this idea, not only sharding the data but also model parameters, gradients and optimizer states. This means each GPU will only keep one shard of the model. This will result in huge memory savings that enable us to fit a much larger model into the same number of GPU. As an example in DDP the most you could fit into a GPU with 16GB memory is a model around 700M parameters. So, suppose you had 4 GPUs, in this case even though you access 4 GPUs, you still can't scale beyond the model size that can fit into one GPU. However with FSDP you can fit a 3B model into 4 GPUs, > 4x larger model.
55 |
56 |
57 | Please read more on FSDP [here](https://pytorch.org/blog/introducing-pytorch-fully-sharded-data-parallel-api/) & get started with FSDP [here](https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html).
58 |
59 |
60 | To boost the performance of fine-tuning with FSDP, we can make use a number of features such as:
61 |
62 | - **Mixed Precision** which in FSDP is much more flexible compared to Autocast. It gives user control over setting precision for model parameters, buffers and gradients.
63 |
64 | - **Activation Checkpointing** which is a technique to save memory by discarding the intermediate activation in forward pass instead of keeping it in the memory with the cost recomputing them in the backward pass. FSDP Activation checkpointing is shard aware meaning we need to apply it after wrapping the model with FSDP. In our script we are making use of that.
65 |
66 | - **auto_wrap_policy** Which is the way to specify how FSDP would partition the model, there is default support for transformer wrapping policy. This allows FSDP to form each FSDP unit ( partition of the model ) based on the transformer class in the model. To identify this layer in the model, you need to look at the layer that wraps both the attention layer and MLP. This helps FSDP have more fine-grained units for communication that help with optimizing the communication cost.
67 |
--------------------------------------------------------------------------------
/recipes/finetuning/datasets/custom_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | # For dataset details visit: https://huggingface.co/datasets/samsum
5 |
6 | import copy
7 | import datasets
8 | import itertools
9 |
10 |
11 | B_INST, E_INST = "[INST]", "[/INST]"
12 |
13 | def tokenize_dialog(dialog, tokenizer):
14 | if tokenizer.vocab_size >= 128000:
15 | dialog_tokens = tokenizer.apply_chat_template(dialog)
16 | dialog_tokens = dialog_tokens[:-4] # Remove generation prompt <|start_header_id|>assistant<|end_header_id|>\n\n
17 | eot_indices = [i for i,n in enumerate(dialog_tokens) if n == 128009]
18 | labels = copy.copy(dialog_tokens)
19 | last_idx = 0
20 | for n, idx in enumerate(eot_indices):
21 | if n % 2 == 1:
22 | last_idx = idx
23 | else:
24 | labels[last_idx:idx+1] = [-100] * (idx-last_idx+1)
25 |
26 | dialog_tokens = [dialog_tokens]
27 | labels_tokens = [labels]
28 | else:
29 | prompt_tokens = [tokenizer.encode(f"{tokenizer.bos_token}{B_INST} {(prompt['content']).strip()} {E_INST}", add_special_tokens=False) for prompt in dialog[::2]]
30 | answer_tokens = [tokenizer.encode(f"{answer['content'].strip()} {tokenizer.eos_token}", add_special_tokens=False) for answer in dialog[1::2]]
31 | dialog_tokens = list(itertools.chain.from_iterable(zip(prompt_tokens, answer_tokens)))
32 |
33 | #Add labels, convert prompt token to -100 in order to ignore in loss function
34 | labels_tokens = [len(c)*[-100,] if i % 2 == 0 else c for i,c in enumerate(dialog_tokens)]
35 |
36 | combined_tokens = {
37 | "input_ids": list(itertools.chain(*(t for t in dialog_tokens))),
38 | "labels": list(itertools.chain(*(t for t in labels_tokens))),
39 | }
40 |
41 | return dict(combined_tokens, attention_mask=[1]*len(combined_tokens["input_ids"]))
42 |
43 |
44 | def get_custom_dataset(dataset_config, tokenizer, split):
45 | dataset = datasets.load_dataset("OpenAssistant/oasst1", split=split)
46 |
47 | dataset = dataset.map(lambda sample: {
48 | "message_id": sample["message_id"],
49 | "parent_id": sample["parent_id"],
50 | "text": sample["text"],
51 | },
52 | batched=True,
53 | remove_columns=list(dataset.features),)
54 |
55 | nodes = {}
56 |
57 | messages = {}
58 | root_ids = []
59 |
60 | for data in dataset:
61 | if data["parent_id"]:
62 | nodes[data["parent_id"]] = nodes.get(data["parent_id"], []) + [data["message_id"]]
63 | else:
64 | root_ids.append(data["message_id"])
65 | messages[data["message_id"]]=data["text"]
66 |
67 | def follow(thread, current_id):
68 | thread = copy.copy(thread) + [messages[current_id]]
69 | if current_id in nodes:
70 | new_threads = []
71 | for next_id in nodes[current_id]:
72 | new_threads += follow(thread, next_id)
73 | return new_threads
74 | else:
75 | return [thread]
76 |
77 | def get_threads_from_root(root_id):
78 | all_threads = []
79 | thread = [messages[root_id]]
80 | for cid in nodes[root_id]:
81 | all_threads += follow(thread, cid)
82 | return all_threads
83 |
84 | dataset = dataset.filter(lambda x: x["message_id"] in root_ids)
85 | dataset = dataset.map(lambda x: {"thread": get_threads_from_root(x["message_id"])}, remove_columns=list(dataset.features))
86 | dataset = dataset.map(lambda x: {"thread": [i for row in x["thread"] for i in row]}, batched=True)
87 |
88 | def to_dialog(thread):
89 | dialog = []
90 | for i, content in enumerate(thread):
91 | dialog.append({
92 | "role": "user" if i % 2 == 0 else "assistant",
93 | "content": content,
94 | })
95 | return {"dialog": dialog}
96 |
97 | dataset = dataset.map(lambda x: to_dialog(x["thread"]), remove_columns=list(dataset.features))
98 | dataset = dataset.map(lambda x: tokenize_dialog(x["dialog"], tokenizer), remove_columns=list(dataset.features))
99 |
100 | return dataset
101 |
--------------------------------------------------------------------------------
/recipes/finetuning/finetuning.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import fire
5 | from llama_recipes.finetuning import main
6 |
7 | if __name__ == "__main__":
8 | fire.Fire(main)
--------------------------------------------------------------------------------
/recipes/finetuning/multi_node.slurm:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3.
3 |
4 |
5 | #!/bin/bash
6 |
7 | #SBATCH --job-name=Nano-2d-trainer-20b-8nodes
8 |
9 | #SBATCH --ntasks=2
10 | #SBATCH --nodes=2
11 | #SBATCH --gpus-per-task=4
12 | #SBATCH --partition=train
13 | nodes=( $( scontrol show hostnames $SLURM_JOB_NODELIST ) )
14 | nodes_array=($nodes)
15 | head_node=${nodes_array[0]}
16 | head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)
17 | # Enable for A100
18 | export FI_PROVIDER="efa"
19 |
20 | echo Node IP: $head_node_ip
21 | export LOGLEVEL=INFO
22 | # debugging flags (optional)
23 | export NCCL_DEBUG=WARN
24 | export NCCL_DEBUG_SUBSYS=WARN
25 | export PYTHONFAULTHANDLER=1
26 | export LD_LIBRARY_PATH=/opt/amazon/efa/lib:$LD_LIBRARY_PATH
27 | export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
28 | export CUDA_LAUNCH_BLOCKING=0
29 |
30 | # on your cluster you might need these:
31 | # set the network interface
32 | export NCCL_SOCKET_IFNAME="ens"
33 | export FI_EFA_USE_DEVICE_RDMA=1
34 |
35 | srun torchrun --nproc_per_node 4 --rdzv_id $RANDOM --rdzv_backend c10d --rdzv_endpoint $head_node_ip:29500 ./finetuning.py --enable_fsdp --use_peft --peft_method lora
36 |
37 |
--------------------------------------------------------------------------------
/recipes/finetuning/singlegpu_finetuning.md:
--------------------------------------------------------------------------------
1 | # Fine-tuning with Single GPU
2 | This recipe steps you through how to finetune a Meta Llama 3 model on the text summarization task using the [samsum](https://huggingface.co/datasets/samsum) dataset on a single GPU.
3 |
4 | These are the instructions for using the canonical [finetuning script](../../src/llama_recipes/finetuning.py) in the llama-recipes package.
5 |
6 |
7 | ## Requirements
8 |
9 | Ensure that you have installed the llama-recipes package ([details](../../README.md#installing)).
10 |
11 | To run fine-tuning on a single GPU, we will make use of two packages:
12 | 1. [PEFT](https://github.com/huggingface/peft) to use parameter-efficient finetuning.
13 | 2. [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) for int8 quantization.
14 |
15 |
16 | ## How to run it?
17 |
18 | ```bash
19 | python -m finetuning.py --use_peft --peft_method lora --quantization --use_fp16 --model_name /patht_of_model_folder/8B --output_dir Path/to/save/PEFT/model
20 | ```
21 | The args used in the command above are:
22 |
23 | * `--use_peft` boolean flag to enable PEFT methods in the script
24 | * `--peft_method` to specify the PEFT method, here we use `lora` other options are `llama_adapter`, `prefix`.
25 | * `--quantization` boolean flag to enable int8 quantization
26 |
27 | > [!NOTE]
28 | > In case you are using a multi-GPU machine please make sure to only make one of them visible using `export CUDA_VISIBLE_DEVICES=GPU:id`.
29 |
30 |
31 | ### How to run with different datasets?
32 |
33 | Currently 3 open source datasets are supported that can be found in [Datasets config file](../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)).
34 |
35 | * `grammar_dataset` : use this [notebook](../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking.
36 |
37 | * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `dataset` folder.
38 |
39 |
40 | ```bash
41 | wget -P ../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json
42 | ```
43 |
44 | * `samsum_dataset`
45 |
46 | to run with each of the datasets set the `dataset` flag in the command as shown below:
47 |
48 | ```bash
49 | # grammer_dataset
50 |
51 | python -m finetuning.py --use_peft --peft_method lora --quantization --dataset grammar_dataset --model_name /patht_of_model_folder/8B --output_dir Path/to/save/PEFT/model
52 |
53 | # alpaca_dataset
54 |
55 | python -m finetuning.py --use_peft --peft_method lora --quantization --dataset alpaca_dataset --model_name /patht_of_model_folder/8B --output_dir Path/to/save/PEFT/model
56 |
57 |
58 | # samsum_dataset
59 |
60 | python -m finetuning.py --use_peft --peft_method lora --quantization --dataset samsum_dataset --model_name /patht_of_model_folder/8B --output_dir Path/to/save/PEFT/model
61 |
62 | ```
63 |
64 | ## FLOPS Counting and Pytorch Profiling
65 |
66 | To help with benchmarking effort, we are adding the support for counting the FLOPS during the fine-tuning process. You can achieve this by setting `--flop_counter` when launching your single/multi GPU fine-tuning. Use `--flop_counter_start` to choose which step to count the FLOPS. It is recommended to allow a warm-up stage before using the FLOPS counter.
67 |
68 | Similarly, you can set `--use_profiler` flag and pass a profiling output path using `--profiler_dir` to capture the profile traces of your model using [PyTorch profiler](https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html). To get accurate profiling result, the pytorch profiler requires a warm-up stage and the current config is wait=1, warmup=2, active=3, thus the profiler will start the profiling after step 3 and will record the next 3 steps. Therefore, in order to use pytorch profiler, the --max-train-step has been greater than 6. The pytorch profiler would be helpful for debugging purposes. However, the `--flop_counter` and `--use_profiler` can not be used in the same time to ensure the measurement accuracy.
69 |
--------------------------------------------------------------------------------
/recipes/inference/llama_web_ui/Llama2_Gradio.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "e4532411",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "# TODO REFACTOR: Integrate code from _legacy/inference.py into this notebook"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "id": "47a9adb3",
16 | "metadata": {},
17 | "source": [
18 | "## This demo app shows how to query Llama 2 using the Gradio UI.\n",
19 | "\n",
20 | "Since we are using Replicate in this example, you will need to replace `` with your API token.\n",
21 | "\n",
22 | "To get the Replicate token: \n",
23 | "\n",
24 | "- You will need to first sign in with Replicate with your github account\n",
25 | "- Then create a free API token [here](https://replicate.com/account/api-tokens) that you can use for a while \n",
26 | "\n",
27 | "**Note** After the free trial ends, you will need to enter billing info to continue to use Llama2 hosted on Replicate.\n",
28 | "\n",
29 | "To run this example:\n",
30 | "- Set up your Replicate API token and enter it in place of ``\n",
31 | "- Run the notebook\n",
32 | "- Enter your question and click Submit\n",
33 | "\n",
34 | "In the notebook or a browser with URL http://127.0.0.1:7860 you should see a UI with your answer."
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 1,
40 | "id": "928041cc",
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "name": "stderr",
45 | "output_type": "stream",
46 | "text": [
47 | "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
48 | ]
49 | },
50 | {
51 | "name": "stdout",
52 | "output_type": "stream",
53 | "text": [
54 | "Running on local URL: http://127.0.0.1:7860\n",
55 | "\n",
56 | "To create a public link, set `share=True` in `launch()`.\n"
57 | ]
58 | },
59 | {
60 | "data": {
61 | "text/html": [
62 | ""
63 | ],
64 | "text/plain": [
65 | ""
66 | ]
67 | },
68 | "metadata": {},
69 | "output_type": "display_data"
70 | },
71 | {
72 | "data": {
73 | "text/plain": []
74 | },
75 | "execution_count": 1,
76 | "metadata": {},
77 | "output_type": "execute_result"
78 | }
79 | ],
80 | "source": [
81 | "from langchain.schema import AIMessage, HumanMessage\n",
82 | "import gradio as gr\n",
83 | "from langchain.llms import Replicate\n",
84 | "import os\n",
85 | "\n",
86 | "os.environ[\"REPLICATE_API_TOKEN\"] = \"\"\n",
87 | "\n",
88 | "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
89 | "\n",
90 | "llm = Replicate(\n",
91 | " model=llama2_13b_chat,\n",
92 | " model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
93 | ")\n",
94 | "\n",
95 | "\n",
96 | "def predict(message, history):\n",
97 | " history_langchain_format = []\n",
98 | " for human, ai in history:\n",
99 | " history_langchain_format.append(HumanMessage(content=human))\n",
100 | " history_langchain_format.append(AIMessage(content=ai))\n",
101 | " history_langchain_format.append(HumanMessage(content=message))\n",
102 | " gpt_response = llm(message) #history_langchain_format)\n",
103 | " return gpt_response#.content\n",
104 | "\n",
105 | "gr.ChatInterface(predict).launch()"
106 | ]
107 | }
108 | ],
109 | "metadata": {
110 | "kernelspec": {
111 | "display_name": "Python 3 (ipykernel)",
112 | "language": "python",
113 | "name": "python3"
114 | },
115 | "language_info": {
116 | "codemirror_mode": {
117 | "name": "ipython",
118 | "version": 3
119 | },
120 | "file_extension": ".py",
121 | "mimetype": "text/x-python",
122 | "name": "python",
123 | "nbconvert_exporter": "python",
124 | "pygments_lexer": "ipython3",
125 | "version": "3.8.18"
126 | }
127 | },
128 | "nbformat": 4,
129 | "nbformat_minor": 5
130 | }
131 |
--------------------------------------------------------------------------------
/recipes/inference/llama_web_ui/README.md:
--------------------------------------------------------------------------------
1 | ## Quick Web UI for Llama2 Chat
2 | If you prefer to see Llama2 in action in a web UI, instead of the notebooks above, you can try one of the two methods:
3 |
4 | ### Running [Streamlit](https://streamlit.io/) with Llama2
5 | Open a Terminal, run the following commands:
6 | ```
7 | pip install streamlit langchain replicate
8 | git clone https://github.com/facebookresearch/llama-recipes
9 | cd llama-recipes/llama-demo-apps
10 | ```
11 |
12 | Replace the `` in `streamlit_llama2.py` with your API token created [here](https://replicate.com/account/api-tokens) - for more info, see the note [above](#replicate_note).
13 |
14 | Then run the command `streamlit run streamlit_llama2.py` and you'll see on your browser the following UI with question and answer - you can enter new text question, click Submit, and see Llama2's answer:
15 |
16 | 
17 | 
18 |
19 | ### Running [Gradio](https://www.gradio.app/) with Llama2 (using [Replicate](Llama2_Gradio.ipynb) or [OctoAI](../../llama_api_providers/OctoAI_API_examples/Llama2_Gradio.ipynb))
20 |
21 | To see how to query Llama2 and get answers with the Gradio UI both from the notebook and web, just launch the notebook `Llama2_Gradio.ipynb`. For more info, on how to get set up with a token to power these apps, see the note on [Replicate](../../README.md#replicate_note) and [OctoAI](../../README.md##octoai_note).
22 |
23 | Then enter your question, click Submit. You'll see in the notebook or a browser with URL http://127.0.0.1:7860 the following UI:
24 |
25 | 
26 |
--------------------------------------------------------------------------------
/recipes/inference/llama_web_ui/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | langchain
3 | replicate
--------------------------------------------------------------------------------
/recipes/inference/llama_web_ui/streamlit_llama2.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | # TODO REFACTOR: Convert this to an ipynb notebook
5 |
6 | import streamlit as st
7 | from langchain.llms import Replicate
8 | import os
9 |
10 | st.title("Llama2-powered Streamlit App")
11 |
12 | with st.sidebar:
13 | os.environ["REPLICATE_API_TOKEN"] = ""
14 |
15 | def generate_response(input_text):
16 | llama2_13b_chat = "meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d"
17 |
18 | llm = Replicate(
19 | model=llama2_13b_chat,
20 | model_kwargs={"temperature": 0.01, "top_p": 1, "max_new_tokens":500}
21 | )
22 | st.info(llm(input_text))
23 |
24 | with st.form("my_form"):
25 | text = st.text_area("Enter text:", "What is Generative AI?")
26 | submitted = st.form_submit_button("Submit")
27 | generate_response(text)
28 |
--------------------------------------------------------------------------------
/recipes/inference/local_inference/README.md:
--------------------------------------------------------------------------------
1 | # Local Inference
2 |
3 | For local inference we have provided an [inference script](inference.py). Depending on the type of finetuning performed during training the [inference script](inference.py) takes different arguments.
4 | To finetune all model parameters the output dir of the training has to be given as --model_name argument.
5 | In the case of a parameter efficient method like lora the base model has to be given as --model_name and the output dir of the training has to be given as --peft_model argument.
6 | Additionally, a prompt for the model in the form of a text file has to be provided. The prompt file can either be piped through standard input or given as --prompt_file parameter.
7 |
8 | **Content Safety**
9 | The inference script also supports safety checks for both user prompt and model outputs. In particular, we use two packages, [AuditNLG](https://github.com/salesforce/AuditNLG/tree/main) and [Azure content safety](https://pypi.org/project/azure-ai-contentsafety/1.0.0b1/).
10 |
11 | **Note**
12 | If using Azure content Safety, please make sure to get the endpoint and API key as described [here](https://pypi.org/project/azure-ai-contentsafety/1.0.0b1/) and add them as the following environment variables,`CONTENT_SAFETY_ENDPOINT` and `CONTENT_SAFETY_KEY`.
13 |
14 | Examples:
15 |
16 | ```bash
17 | # Full finetuning of all parameters
18 | cat | python inference.py --model_name --use_auditnlg
19 | # PEFT method
20 | cat | python inference.py --model_name --peft_model --use_auditnlg
21 | # prompt as parameter
22 | python inference.py --model_name --prompt_file --use_auditnlg
23 | ```
24 | The folder contains test prompts for summarization use-case:
25 | ```
26 | samsum_prompt.txt
27 | ...
28 | ```
29 |
30 | **Note**
31 | Currently pad token by default in [HuggingFace Tokenizer is `None`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/tokenization_llama.py#L110). We add the padding token as a special token to the tokenizer, which in this case requires to resize the token_embeddings as shown below:
32 |
33 | ```python
34 | tokenizer.add_special_tokens(
35 | {
36 |
37 | "pad_token": "",
38 | }
39 | )
40 | model.resize_token_embeddings(model.config.vocab_size + 1)
41 | ```
42 | Padding would be required for batch inference. In this this [example](inference.py), batch size = 1 so essentially padding is not required. However,We added the code pointer as an example in case of batch inference.
43 |
44 |
45 | ## Chat completion
46 | The inference folder also includes a chat completion example, that adds built-in safety features in fine-tuned models to the prompt tokens. To run the example:
47 |
48 | ```bash
49 | python chat_completion/chat_completion.py --model_name "PATH/TO/MODEL/7B/" --prompt_file chat_completion/chats.json --quantization --use_auditnlg
50 |
51 | ```
52 |
53 | ## Flash Attention and Xformer Memory Efficient Kernels
54 |
55 | Setting `use_fast_kernels` will enable using of Flash Attention or Xformer memory-efficient kernels based on the hardware being used. This would speed up inference when used for batched inputs. This has been enabled in `optimum` library from HuggingFace as a one-liner API, please read more [here](https://pytorch.org/blog/out-of-the-box-acceleration/).
56 |
57 | ```bash
58 | python chat_completion/chat_completion.py --model_name "PATH/TO/MODEL/7B/" --prompt_file chat_completion/chats.json --quantization --use_auditnlg --use_fast_kernels
59 |
60 | python inference.py --model_name --peft_model --prompt_file --use_auditnlg --use_fast_kernels
61 |
62 | ```
63 |
64 | ## Loading back FSDP checkpoints
65 |
66 | In case you have fine-tuned your model with pure FSDP and saved the checkpoints with "SHARDED_STATE_DICT" as shown [here](../../../src/llama_recipes/configs/fsdp.py), you can use this converter script to convert the FSDP Sharded checkpoints into HuggingFace checkpoints. This enables you to use the inference script normally as mentioned above.
67 | **To convert the checkpoint use the following command**:
68 |
69 | This is helpful if you have fine-tuned you model using FSDP only as follows:
70 |
71 | ```bash
72 | torchrun --nnodes 1 --nproc_per_node 8 recipes/finetuning/finetuning.py --enable_fsdp --model_name /patht_of_model_folder/7B --dist_checkpoint_root_folder model_checkpoints --dist_checkpoint_folder fine-tuned --pure_bf16
73 | ```
74 | Then convert your FSDP checkpoint to HuggingFace checkpoints using:
75 | ```bash
76 | python -m llama_recipes.inference.checkpoint_converter_fsdp_hf --fsdp_checkpoint_path PATH/to/FSDP/Checkpoints --consolidated_model_path PATH/to/save/checkpoints --HF_model_path_or_name PATH/or/HF/model_name
77 |
78 | # --HF_model_path_or_name specifies the HF Llama model name or path where it has config.json and tokenizer.json
79 | ```
80 | By default, training parameter are saved in `train_params.yaml` in the path where FSDP checkpoints are saved, in the converter script we frist try to find the HugingFace model name used in the fine-tuning to load the model with configs from there, if not found user need to provide it.
81 |
82 | Then run inference using:
83 |
84 | ```bash
85 | python inference.py --model_name --prompt_file
86 |
87 | ```
--------------------------------------------------------------------------------
/recipes/inference/local_inference/chat_completion/chats.json:
--------------------------------------------------------------------------------
1 | [
2 | [{"role": "user", "content": "what is the recipe of mayonnaise?"}],
3 | [
4 | {"role": "user", "content": "I am going to Paris, what should I see?"},
5 | {
6 | "role": "assistant",
7 | "content": "Paris, the capital of France, is known for its stunning architecture, art museums, historical landmarks, and romantic atmosphere. Here are some of the top attractions to see in Paris:1. The Eiffel Tower: The iconic Eiffel Tower is one of the most recognizable landmarks in the world and offers breathtaking views of the city. 2. The Louvre Museum: The Louvre is one of the world's largest and most famous museums, housing an impressive collection of art and artifacts, including the Mona Lisa. 3. Notre-Dame Cathedral: This beautiful cathedral is one of the most famous landmarks in Paris and is known for its Gothic architecture and stunning stained glass windows.These are just a few of the many attractions that Paris has to offer. With so much to see and do, it's no wonder that Paris is one of the most popular tourist destinations in the world."
8 | },
9 | {"role": "user", "content": "What is so great about #1?"}
10 | ],
11 | [
12 | {"role": "system", "content": "Always answer with Haiku"},
13 | {"role": "user", "content": "I am going to Paris, what should I see?"}
14 | ],
15 | [
16 | {
17 | "role": "system",
18 | "content": "Always answer with emojis"
19 | },
20 | {"role": "user", "content": "How to go from Beijing to NY?"}
21 | ],
22 | [
23 | {
24 | "role": "system",
25 | "content": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
26 | },
27 | {"role": "user", "content": "Write a brief birthday message to John"}
28 | ]
29 | ]
--------------------------------------------------------------------------------
/recipes/inference/local_inference/samsum_prompt.txt:
--------------------------------------------------------------------------------
1 | Summarize this dialog:
2 | A: Hi Tom, are you busy tomorrow’s afternoon?
3 | B: I’m pretty sure I am. What’s up?
4 | A: Can you go with me to the animal shelter?.
5 | B: What do you want to do?
6 | A: I want to get a puppy for my son.
7 | B: That will make him so happy.
8 | A: Yeah, we’ve discussed it many times. I think he’s ready now.
9 | B: That’s good. Raising a dog is a tough issue. Like having a baby ;-)
10 | A: I'll get him one of those little dogs.
11 | B: One that won't grow up too big;-)
12 | A: And eat too much;-))
13 | B: Do you know which one he would like?
14 | A: Oh, yes, I took him there last Monday. He showed me one that he really liked.
15 | B: I bet you had to drag him away.
16 | A: He wanted to take it home right away ;-).
17 | B: I wonder what he'll name it.
18 | A: He said he’d name it after his dead hamster – Lemmy - he's a great Motorhead fan :-)))
19 | ---
20 | Summary:
--------------------------------------------------------------------------------
/recipes/inference/model_servers/README.md:
--------------------------------------------------------------------------------
1 | ## [Running Llama2 On-Prem with vLLM and TGI](llama-on-prem.md)
2 | This tutorial shows how to use Llama 2 with [vLLM](https://github.com/vllm-project/vllm) and Hugging Face [TGI](https://github.com/huggingface/text-generation-inference) to build Llama 2 on-prem apps.
3 |
4 | \* To run a quantized Llama2 model on iOS and Android, you can use the open source [MLC LLM](https://github.com/mlc-ai/mlc-llm) or [llama.cpp](https://github.com/ggerganov/llama.cpp). You can even make a Linux OS that boots to Llama2 ([repo](https://github.com/trholding/llama2.c)).
--------------------------------------------------------------------------------
/recipes/inference/model_servers/hf_text_generation_inference/README.md:
--------------------------------------------------------------------------------
1 | # Serving a fine tuned Llama model with HuggingFace text-generation-inference server
2 |
3 | This document shows how to serve a fine tuned Llama mode with HuggingFace's text-generation-inference server. This option is currently only available for models that were trained using the LoRA method or without using the `--use_peft` argument.
4 |
5 | ## Step 0: Merging the weights (Only required if LoRA method was used)
6 |
7 | In case the model was fine tuned with LoRA method we need to merge the weights of the base model with the adapter weight. For this we can use the script `merge_lora_weights.py` which is located in the same folder as this README file.
8 |
9 | The script takes the base model, the peft weight folder as well as an output as arguments:
10 |
11 | ```
12 | python -m llama_recipes.inference.hf_text_generation_inference.merge_lora_weights --base_model llama-7B --peft_model ft_output --output_dir data/merged_model_output
13 | ```
14 |
15 | ## Step 1: Serving the model
16 | Subsequently, the model can be served using the docker container provided by [hf text-generation-inference](https://github.com/huggingface/text-generation-inference) started from the main directory of this repository:
17 |
18 | ```bash
19 | model=/data/merged_model_output
20 | num_shard=2
21 | volume=$PWD/inference/hf-text-generation-inference/data
22 | docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --num-shard $num_shard
23 | ```
24 |
25 | The num_shard argument determines the number of GPU's the model should be sharded on.
26 |
27 | ## Step 2: Running inference
28 | After the loading of the model shards completed an inference can be executed by using one of the following commands:
29 |
30 | ```bash
31 | curl 127.0.0.1:8080/generate \
32 | -X POST \
33 | -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
34 | -H 'Content-Type: application/json'
35 | # OR for streaming inference
36 | curl 127.0.0.1:8080/generate_stream \
37 | -X POST \
38 | -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
39 | -H 'Content-Type: application/json'
40 | ```
41 |
42 | Further information can be found in the documentation of the [hf text-generation-inference](https://github.com/huggingface/text-generation-inference) solution.
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/recipes/inference/model_servers/hf_text_generation_inference/merge_lora_weights.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import fire
5 | import torch
6 | from peft import PeftModel
7 | from transformers import LlamaForCausalLM, LlamaTokenizer
8 |
9 |
10 | def main(base_model: str,
11 | peft_model: str,
12 | output_dir: str):
13 |
14 | model = LlamaForCausalLM.from_pretrained(
15 | base_model,
16 | load_in_8bit=False,
17 | torch_dtype=torch.float16,
18 | device_map="auto",
19 | offload_folder="tmp",
20 | )
21 |
22 | tokenizer = LlamaTokenizer.from_pretrained(
23 | base_model
24 | )
25 |
26 | model = PeftModel.from_pretrained(
27 | model,
28 | peft_model,
29 | torch_dtype=torch.float16,
30 | device_map="auto",
31 | offload_folder="tmp",
32 | )
33 |
34 | model = model.merge_and_unload()
35 | model.save_pretrained(output_dir)
36 | tokenizer.save_pretrained(output_dir)
37 |
38 |
39 | if __name__ == "__main__":
40 | fire.Fire(main)
--------------------------------------------------------------------------------
/recipes/inference/model_servers/vllm/inference.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import fire
5 |
6 | import torch
7 | from vllm import LLM
8 | from vllm import LLM, SamplingParams
9 | from accelerate.utils import is_xpu_available
10 |
11 | if is_xpu_available():
12 | torch.xpu.manual_seed(42)
13 | else:
14 | torch.cuda.manual_seed(42)
15 |
16 | torch.manual_seed(42)
17 |
18 | def load_model(model_name, tp_size=1):
19 |
20 | llm = LLM(model_name, tensor_parallel_size=tp_size)
21 | return llm
22 |
23 | def main(
24 | model,
25 | max_new_tokens=100,
26 | user_prompt=None,
27 | top_p=0.9,
28 | temperature=0.8
29 | ):
30 | while True:
31 | if user_prompt is None:
32 | user_prompt = input("Enter your prompt: ")
33 |
34 | print(f"User prompt:\n{user_prompt}")
35 |
36 | print(f"sampling params: top_p {top_p} and temperature {temperature} for this inference request")
37 | sampling_param = SamplingParams(top_p=top_p, temperature=temperature, max_tokens=max_new_tokens)
38 |
39 |
40 | outputs = model.generate(user_prompt, sampling_params=sampling_param)
41 |
42 | print(f"model output:\n {user_prompt} {outputs[0].outputs[0].text}")
43 | user_prompt = input("Enter next prompt (press Enter to exit): ")
44 | if not user_prompt:
45 | break
46 |
47 | def run_script(
48 | model_name: str,
49 | peft_model=None,
50 | tp_size=1,
51 | max_new_tokens=100,
52 | user_prompt=None,
53 | top_p=0.9,
54 | temperature=0.8
55 | ):
56 | model = load_model(model_name, tp_size)
57 | main(model, max_new_tokens, user_prompt, top_p, temperature)
58 |
59 | if __name__ == "__main__":
60 | fire.Fire(run_script)
61 |
--------------------------------------------------------------------------------
/recipes/llama_api_providers/OctoAI_API_examples/Llama2_Gradio.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "47a9adb3",
6 | "metadata": {},
7 | "source": [
8 | "## This demo app shows how to query Llama 2 using the Gradio UI.\n",
9 | "\n",
10 | "Since we are using OctoAI in this example, you'll need to obtain an OctoAI token:\n",
11 | "\n",
12 | "- You will need to first sign into [OctoAI](https://octoai.cloud/) with your Github or Google account\n",
13 | "- Then create a free API token [here](https://octo.ai/docs/getting-started/how-to-create-an-octoai-access-token) that you can use for a while (a month or $10 in OctoAI credits, whichever one runs out first)\n",
14 | "\n",
15 | "**Note** After the free trial ends, you will need to enter billing info to continue to use Llama2 hosted on OctoAI.\n",
16 | "\n",
17 | "To run this example:\n",
18 | "- Run the notebook\n",
19 | "- Set up your OCTOAI API token and enter it when prompted\n",
20 | "- Enter your question and click Submit\n",
21 | "\n",
22 | "In the notebook or a browser with URL http://127.0.0.1:7860 you should see a UI with your answer.\n",
23 | "\n",
24 | "Let's start by installing the necessary packages:\n",
25 | "- langchain provides necessary RAG tools for this demo\n",
26 | "- octoai-sdk allows us to use OctoAI Llama 2 endpoint\n",
27 | "- gradio is used for the UI elements\n",
28 | "\n",
29 | "And setting up the OctoAI token."
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": null,
35 | "id": "6ae4f858-6ef7-49d9-b45b-1ef79d0217a0",
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "!pip install langchain octoai-sdk gradio"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "id": "3306c11d-ed82-41c5-a381-15fb5c07d307",
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "from getpass import getpass\n",
50 | "import os\n",
51 | "\n",
52 | "OCTOAI_API_TOKEN = getpass()\n",
53 | "os.environ[\"OCTOAI_API_TOKEN\"] = OCTOAI_API_TOKEN"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "id": "928041cc",
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "from langchain.schema import AIMessage, HumanMessage\n",
64 | "import gradio as gr\n",
65 | "from langchain.llms.octoai_endpoint import OctoAIEndpoint\n",
66 | "\n",
67 | "llama2_13b = \"llama-2-13b-chat-fp16\"\n",
68 | "\n",
69 | "llm = OctoAIEndpoint(\n",
70 | " endpoint_url=\"https://text.octoai.run/v1/chat/completions\",\n",
71 | " model_kwargs={\n",
72 | " \"model\": llama2_13b,\n",
73 | " \"messages\": [\n",
74 | " {\n",
75 | " \"role\": \"system\",\n",
76 | " \"content\": \"You are a helpful, respectful and honest assistant.\"\n",
77 | " }\n",
78 | " ],\n",
79 | " \"max_tokens\": 500,\n",
80 | " \"top_p\": 1,\n",
81 | " \"temperature\": 0.01\n",
82 | " },\n",
83 | ")\n",
84 | "\n",
85 | "\n",
86 | "def predict(message, history):\n",
87 | " history_langchain_format = []\n",
88 | " for human, ai in history:\n",
89 | " history_langchain_format.append(HumanMessage(content=human))\n",
90 | " history_langchain_format.append(AIMessage(content=ai))\n",
91 | " history_langchain_format.append(HumanMessage(content=message))\n",
92 | " llm_response = llm(message, history_langchain_format)\n",
93 | " return llm_response.content\n",
94 | "\n",
95 | "gr.ChatInterface(predict).launch()"
96 | ]
97 | }
98 | ],
99 | "metadata": {
100 | "kernelspec": {
101 | "display_name": "Python 3 (ipykernel)",
102 | "language": "python",
103 | "name": "python3"
104 | },
105 | "language_info": {
106 | "codemirror_mode": {
107 | "name": "ipython",
108 | "version": 3
109 | },
110 | "file_extension": ".py",
111 | "mimetype": "text/x-python",
112 | "name": "python",
113 | "nbconvert_exporter": "python",
114 | "pygments_lexer": "ipython3",
115 | "version": "3.11.6"
116 | }
117 | },
118 | "nbformat": 4,
119 | "nbformat_minor": 5
120 | }
121 |
--------------------------------------------------------------------------------
/recipes/llama_api_providers/OctoAI_API_examples/RAG_Chatbot_example/data/Llama Getting Started Guide.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/llama_api_providers/OctoAI_API_examples/RAG_Chatbot_example/data/Llama Getting Started Guide.pdf
--------------------------------------------------------------------------------
/recipes/llama_api_providers/OctoAI_API_examples/RAG_Chatbot_example/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio==4.16.0
2 | pypdf==4.0.0
3 | langchain==0.1.7
4 | sentence-transformers==2.2.2
5 | faiss-cpu==1.7.4
6 | text-generation==0.6.1
7 | octoai-sdk==0.8.3
--------------------------------------------------------------------------------
/recipes/llama_api_providers/OctoAI_API_examples/RAG_Chatbot_example/vectorstore/db_faiss/index.faiss:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/llama_api_providers/OctoAI_API_examples/RAG_Chatbot_example/vectorstore/db_faiss/index.faiss
--------------------------------------------------------------------------------
/recipes/llama_api_providers/OctoAI_API_examples/RAG_Chatbot_example/vectorstore/db_faiss/index.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/llama_api_providers/OctoAI_API_examples/RAG_Chatbot_example/vectorstore/db_faiss/index.pkl
--------------------------------------------------------------------------------
/recipes/multilingual/extend_tokenizer.py:
--------------------------------------------------------------------------------
1 | """
2 | Code borrowed from https://github.com/ymcui/Chinese-LLaMA-Alpaca/blob/main/scripts/merge_tokenizer/merge_tokenizers.py
3 | """
4 |
5 | import os
6 | import fire
7 | import re
8 | from transformers import LlamaTokenizer
9 |
10 | os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
11 | from huggingface_hub import hf_hub_download
12 | from sentencepiece import sentencepiece_model_pb2 as sp_pb2_model
13 |
14 |
15 | def main(new_tokenizer_path, extended_tokenizer_save_path):
16 | original_tokenizer_path = hf_hub_download(repo_id="meta-llama/Llama-2-7b-chat-hf", filename="tokenizer.model", local_dir="original_tokenizer")
17 | original_tokenizer_spm = sp_pb2_model.ModelProto()
18 | original_tokenizer_spm.ParseFromString(open(original_tokenizer_path, "rb").read())
19 | new_tokenizer_spm = sp_pb2_model.ModelProto()
20 | new_tokenizer_spm.ParseFromString(open(os.path.join(new_tokenizer_path, "tokenizer.model"), "rb").read())
21 |
22 | def contains_eng(text):
23 | eng_pattern = re.compile(r"[\u0020-\u007E]+")
24 | return True if eng_pattern.search(text) else False
25 |
26 | original_tokenizer_tokenset = set(p.piece for p in original_tokenizer_spm.pieces)
27 | print(f"Number of tokens before merge: {len(original_tokenizer_tokenset)}")
28 | for p in new_tokenizer_spm.pieces:
29 | piece = p.piece
30 | if piece not in original_tokenizer_tokenset and not contains_eng(piece):
31 | new_p = sp_pb2_model.ModelProto().SentencePiece()
32 | new_p.piece = piece
33 | new_p.score = 0
34 | original_tokenizer_spm.pieces.append(new_p)
35 | print(f"Number of tokens after merge: {len(original_tokenizer_spm.pieces)}")
36 |
37 | os.makedirs(extended_tokenizer_save_path, exist_ok=True)
38 | with open(os.path.join(extended_tokenizer_save_path, "tokenizer.model"), "wb") as f:
39 | f.write(original_tokenizer_spm.SerializeToString())
40 | tokenizer = LlamaTokenizer(vocab_file=os.path.join(extended_tokenizer_save_path, "tokenizer.model"), legacy=False)
41 | tokenizer.save_pretrained(extended_tokenizer_save_path)
42 | print(f"Tokenizer saved to {extended_tokenizer_save_path}")
43 |
44 | # Verify that the extended tokenizer's English vocab matches with that of the original Llama tokenizer
45 | tok1 = LlamaTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf')
46 | tok2 = LlamaTokenizer.from_pretrained(extended_tokenizer_save_path)
47 | for i in range(len(tok1)):
48 | assert tok1.convert_ids_to_tokens(i) == tok2.convert_ids_to_tokens(i), f"Token mismatch at index {i}."
49 |
50 |
51 | if __name__ == "__main__":
52 | fire.Fire(main)
--------------------------------------------------------------------------------
/recipes/multilingual/imgs/phase1-eval-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/multilingual/imgs/phase1-eval-loss.png
--------------------------------------------------------------------------------
/recipes/multilingual/imgs/phase1-train-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/multilingual/imgs/phase1-train-loss.png
--------------------------------------------------------------------------------
/recipes/multilingual/imgs/phase2-eval-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/multilingual/imgs/phase2-eval-loss.png
--------------------------------------------------------------------------------
/recipes/multilingual/imgs/phase2-train-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/multilingual/imgs/phase2-train-loss.png
--------------------------------------------------------------------------------
/recipes/multilingual/prepare_data.py:
--------------------------------------------------------------------------------
1 | import fire
2 | import os
3 | from datasets import load_dataset
4 |
5 | DATASET = "rahular/varta"
6 |
7 | def main(split="validation", lang="hi", docs_to_sample=10_000, save_path="data"):
8 | dataset = load_dataset(DATASET, split=split, streaming=True)
9 | os.makedirs(save_path, exist_ok=True)
10 | with open(os.path.join(save_path, f"{lang}.txt"), "w") as f:
11 | count = 0
12 | for idx, d in enumerate(dataset):
13 | if idx % 10_000 == 0:
14 | print(f"Searched {idx} documents for {lang} documents. Found {count} documents.")
15 | if count >= docs_to_sample:
16 | break
17 | if d["langCode"] == lang:
18 | f.write(d["headline"] + "\n" + d["text"] + "\n")
19 | count += 1
20 |
21 |
22 | if __name__ == "__main__":
23 | fire.Fire(main)
--------------------------------------------------------------------------------
/recipes/multilingual/train_tokenizer.py:
--------------------------------------------------------------------------------
1 | import fire
2 | import os
3 | import sentencepiece as spm
4 |
5 | def main(data_file, save_path, vocab_size=16_000, num_threads=8):
6 | os.makedirs(save_path, exist_ok=True)
7 | tokenizer_name = os.path.join(save_path, "tokenizer")
8 |
9 | spm.SentencePieceTrainer.train(
10 | input=data_file,
11 | model_prefix=tokenizer_name,
12 | vocab_size=vocab_size,
13 | num_threads=num_threads,
14 | model_type="bpe",
15 | max_sentence_length=1073741824,
16 | shuffle_input_sentence="true",
17 | character_coverage=1.0,
18 | hard_vocab_limit="false",
19 | )
20 |
21 | if __name__ == "__main__":
22 | fire.Fire(main)
23 |
--------------------------------------------------------------------------------
/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Running Llama 3 on Mac, Windows or Linux\n",
8 | "This notebook goes over how you can set up and run Llama 3 locally on a Mac, Windows or Linux using [Ollama](https://ollama.com/)."
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "### Steps at a glance:\n",
16 | "1. Download and install Ollama.\n",
17 | "2. Download and test run Llama 3.\n",
18 | "3. Use local Llama 3 via Python.\n",
19 | "4. Use local Llama 3 via LangChain.\n"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "#### 1. Download and install Ollama\n",
27 | "\n",
28 | "On Mac or Windows, go to the Ollama download page [here](https://ollama.com/download) and select your platform to download it, then double click the downloaded file to install Ollama.\n",
29 | "\n",
30 | "On Linux, you can simply run on a terminal `curl -fsSL https://ollama.com/install.sh | sh` to download and install Ollama."
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "#### 2. Download and test run Llama 3\n",
38 | "\n",
39 | "On a terminal or console, run `ollama pull llama3` to download the Llama 3 8b chat model, in the 4-bit quantized format with size about 4.7 GB.\n",
40 | "\n",
41 | "Run `ollama pull llama3:70b` to download the Llama 3 70b chat model, also in the 4-bit quantized format with size 39GB.\n",
42 | "\n",
43 | "Then you can run `ollama run llama3` and ask Llama 3 questions such as \"who wrote the book godfather?\" or \"who wrote the book godfather? answer in one sentence.\" You can also try `ollama run llama3:70b`, but the inference speed will most likely be too slow - for example, on an Apple M1 Pro with 32GB RAM, it takes over 10 seconds to generate one token using Llama 3 70b chat (vs over 10 tokens per second with Llama 3 8b chat).\n",
44 | "\n",
45 | "You can also run the following command to test Llama 3 8b chat:\n",
46 | "```\n",
47 | " curl http://localhost:11434/api/chat -d '{\n",
48 | " \"model\": \"llama3\",\n",
49 | " \"messages\": [\n",
50 | " {\n",
51 | " \"role\": \"user\",\n",
52 | " \"content\": \"who wrote the book godfather?\"\n",
53 | " }\n",
54 | " ],\n",
55 | " \"stream\": false\n",
56 | "}'\n",
57 | "```\n",
58 | "\n",
59 | "The complete Ollama API doc is [here](https://github.com/ollama/ollama/blob/main/docs/api.md)."
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {},
65 | "source": [
66 | "#### 3. Use local Llama 3 via Python\n",
67 | "\n",
68 | "The Python code below is the port of the curl command above."
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "import requests\n",
78 | "import json\n",
79 | "\n",
80 | "url = \"http://localhost:11434/api/chat\"\n",
81 | "\n",
82 | "def llama3(prompt):\n",
83 | " data = {\n",
84 | " \"model\": \"llama3\",\n",
85 | " \"messages\": [\n",
86 | " {\n",
87 | " \"role\": \"user\",\n",
88 | " \"content\": prompt\n",
89 | " }\n",
90 | " ],\n",
91 | " \"stream\": False\n",
92 | " }\n",
93 | " \n",
94 | " headers = {\n",
95 | " 'Content-Type': 'application/json'\n",
96 | " }\n",
97 | " \n",
98 | " response = requests.post(url, headers=headers, json=data)\n",
99 | " \n",
100 | " return(response.json()['message']['content'])"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "metadata": {},
107 | "outputs": [],
108 | "source": [
109 | "response = llama3(\"who wrote the book godfather\")\n",
110 | "print(response)"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "#### 4. Use local Llama 3 via LangChain\n",
118 | "\n",
119 | "Code below use LangChain with Ollama to query Llama 3 running locally. For a more advanced example of using local Llama 3 with LangChain and agent-powered RAG, see [this](https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_rag_agent_llama3_local.ipynb)."
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "!pip install langchain"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {},
135 | "outputs": [],
136 | "source": [
137 | "from langchain_community.chat_models import ChatOllama\n",
138 | "\n",
139 | "llm = ChatOllama(model=\"llama3\", temperature=0)\n",
140 | "response = llm.invoke(\"who wrote the book godfather?\")\n",
141 | "print(response.content)\n"
142 | ]
143 | }
144 | ],
145 | "metadata": {
146 | "kernelspec": {
147 | "display_name": "Python 3 (ipykernel)",
148 | "language": "python",
149 | "name": "python3"
150 | },
151 | "language_info": {
152 | "codemirror_mode": {
153 | "name": "ipython",
154 | "version": 3
155 | },
156 | "file_extension": ".py",
157 | "mimetype": "text/x-python",
158 | "name": "python",
159 | "nbconvert_exporter": "python",
160 | "pygments_lexer": "ipython3",
161 | "version": "3.11.9"
162 | }
163 | },
164 | "nbformat": 4,
165 | "nbformat_minor": 4
166 | }
167 |
--------------------------------------------------------------------------------
/recipes/responsible_ai/README.md:
--------------------------------------------------------------------------------
1 | # Meta Llama Guard
2 |
3 | Meta Llama Guard and Meta Llama Guard 2 are new models that provide input and output guardrails for LLM inference. For more details, please visit the main [repository](https://github.com/facebookresearch/PurpleLlama/tree/main/Llama-Guard2).
4 |
5 | **Note** Please find the right model on HF side [here](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B).
6 |
7 | ### Running locally
8 | The [llama_guard](llama_guard) folder contains the inference script to run Meta Llama Guard locally. Add test prompts directly to the [inference script](llama_guard/inference.py) before running it.
9 |
10 | ### Running on the cloud
11 | The notebooks [Purple_Llama_Anyscale](Purple_Llama_Anyscale.ipynb) & [Purple_Llama_OctoAI](Purple_Llama_OctoAI.ipynb) contain examples for running Meta Llama Guard on cloud hosted endpoints.
--------------------------------------------------------------------------------
/recipes/responsible_ai/llama_guard/README.md:
--------------------------------------------------------------------------------
1 | # Meta Llama Guard demo
2 |
3 | Meta Llama Guard is a language model that provides input and output guardrails for LLM inference. For more details and model cards, please visit the main repository for each model, [Meta Llama Guard](https://github.com/meta-llama/PurpleLlama/tree/main/Llama-Guard) and Meta [Llama Guard 2](https://github.com/meta-llama/PurpleLlama/tree/main/Llama-Guard2).
4 |
5 | This folder contains an example file to run inference with a locally hosted model, either using the Hugging Face Hub or a local path.
6 |
7 | ## Requirements
8 | 1. Access to Llama guard model weights on Hugging Face. To get access, follow the steps described [here](https://github.com/facebookresearch/PurpleLlama/tree/main/Llama-Guard#download)
9 | 2. Llama recipes package and it's dependencies [installed](https://github.com/meta-llama/llama-recipes?tab=readme-ov-file#installing)
10 |
11 |
12 | ## Llama Guard inference script
13 | For testing, you can add User or User/Agent interactions into the prompts list and the run the script to verify the results. When the conversation has one or more Agent responses, it's considered of type agent.
14 |
15 |
16 | ```
17 | prompts: List[Tuple[List[str], AgentType]] = [
18 | ([""], AgentType.USER),
19 |
20 | (["",
21 | ""], AgentType.AGENT),
22 |
23 | (["",
24 | "",
25 | "",
26 | "",], AgentType.AGENT),
27 |
28 | ]
29 | ```
30 | The complete prompt is built with the `build_custom_prompt` function, defined in [prompt_format.py](../../../src/llama_recipes/inference/prompt_format_utils.py). The file contains the default Meta Llama Guard categories. These categories can adjusted and new ones can be added, as described in the [research paper](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/), on section 4.5 Studying the adaptability of the model.
31 |
32 |
33 | To run the samples, with all the dependencies installed, execute this command:
34 |
35 | `python recipes/responsible_ai/llama_guard/inference.py`
36 |
37 | This is the output:
38 |
39 | ```
40 | ['']
41 | > safe
42 |
43 | ==================================
44 |
45 | ['', '']
46 | > safe
47 |
48 | ==================================
49 |
50 | ['', '', '', '']
51 | > safe
52 |
53 | ==================================
54 | ```
55 |
56 | To run it with a local model, you can use the `model_id` param in the inference script:
57 |
58 | `python recipes/responsible_ai/llama_guard/inference.py --model_id=/home/ubuntu/models/llama3/llama_guard_2-hf/ --llama_guard_version=LLAMA_GUARD_2`
59 |
60 | Note: Make sure to also add the llama_guard_version if when it does not match the default, the script allows you to run the prompt format from Meta Llama Guard 1 on Meta Llama Guard 2
61 |
62 | ## Inference Safety Checker
63 | When running the regular inference script with prompts, Meta Llama Guard will be used as a safety checker on the user prompt and the model output. If both are safe, the result will be shown, else a message with the error will be shown, with the word unsafe and a comma separated list of categories infringed. Meta Llama Guard is always loaded quantized using Hugging Face Transformers library with bitsandbytes.
64 |
65 | In this case, the default categories are applied by the tokenizer, using the `apply_chat_template` method.
66 |
67 | Use this command for testing with a quantized Llama model, modifying the values accordingly:
68 |
69 | `python examples/inference.py --model_name --prompt_file --quantization --enable_llamaguard_content_safety`
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/recipes/responsible_ai/llama_guard/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 |
--------------------------------------------------------------------------------
/recipes/responsible_ai/llama_guard/inference.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import fire
5 | from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
6 |
7 |
8 | from llama_recipes.inference.prompt_format_utils import build_default_prompt, create_conversation, LlamaGuardVersion
9 | from typing import List, Tuple
10 | from enum import Enum
11 |
12 | class AgentType(Enum):
13 | AGENT = "Agent"
14 | USER = "User"
15 |
16 | def main(
17 | model_id: str = "meta-llama/LlamaGuard-7b",
18 | llama_guard_version: LlamaGuardVersion = LlamaGuardVersion.LLAMA_GUARD_1
19 | ):
20 | """
21 | Entry point for Llama Guard inference sample script.
22 |
23 | This function loads Llama Guard from Hugging Face or a local model and
24 | executes the predefined prompts in the script to showcase how to do inference with Llama Guard.
25 |
26 | Args:
27 | model_id (str): The ID of the pretrained model to use for generation. This can be either the path to a local folder containing the model files,
28 | or the repository ID of a model hosted on the Hugging Face Hub. Defaults to 'meta-llama/LlamaGuard-7b'.
29 | llama_guard_version (LlamaGuardVersion): The version of the Llama Guard model to use for formatting prompts. Defaults to LLAMA_GUARD_1.
30 | """
31 | try:
32 | llama_guard_version = LlamaGuardVersion[llama_guard_version]
33 | except KeyError as e:
34 | raise ValueError(f"Invalid Llama Guard version '{llama_guard_version}'. Valid values are: {', '.join([lgv.name for lgv in LlamaGuardVersion])}") from e
35 |
36 | prompts: List[Tuple[List[str], AgentType]] = [
37 | ([""], AgentType.USER),
38 |
39 | (["",
40 | ""], AgentType.AGENT),
41 |
42 | (["",
43 | "",
44 | "",
45 | "",], AgentType.AGENT),
46 |
47 | ]
48 |
49 | quantization_config = BitsAndBytesConfig(load_in_8bit=True)
50 |
51 | tokenizer = AutoTokenizer.from_pretrained(model_id)
52 | model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config, device_map="auto")
53 |
54 | for prompt in prompts:
55 | formatted_prompt = build_default_prompt(
56 | prompt[1],
57 | create_conversation(prompt[0]),
58 | llama_guard_version)
59 |
60 |
61 | input = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
62 | prompt_len = input["input_ids"].shape[-1]
63 | output = model.generate(**input, max_new_tokens=100, pad_token_id=0)
64 | results = tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
65 |
66 |
67 | print(prompt[0])
68 | print(f"> {results}")
69 | print("\n==================================\n")
70 |
71 | if __name__ == "__main__":
72 | try:
73 | fire.Fire(main)
74 | except Exception as e:
75 | print(e)
--------------------------------------------------------------------------------
/recipes/use_cases/README.md:
--------------------------------------------------------------------------------
1 | ## [VideoSummary](VideoSummary.ipynb): Ask Llama 3 to Summarize a Long YouTube Video (using Replicate or [OctoAI](../llama_api_providers/OctoAI_API_examples/VideoSummary.ipynb))
2 | This demo app uses Llama 3 to return a text summary of a YouTube video. It shows how to retrieve the caption of a YouTube video and how to ask Llama to summarize the content in different ways, from the simplest naive way that works for short text to more advanced methods of using LangChain's map_reduce and refine to overcome the 8K context length limit of Llama 3.
3 |
4 | ## [NBA2023-24](./text2sql/StructuredLlama.ipynb): Ask Llama 3 about Structured Data
5 | This demo app shows how to use LangChain and Llama 3 to let users ask questions about **structured** data stored in a SQL DB. As the 2023-24 NBA season is entering the playoff, we use the NBA roster info saved in a SQLite DB to show you how to ask Llama 3 questions about your favorite teams or players.
6 |
7 | ## [LiveData](LiveData.ipynb): Ask Llama 3 about Live Data (using Replicate or [OctoAI](../llama_api_providers/OctoAI_API_examples/LiveData.ipynb))
8 | This demo app shows how to perform live data augmented generation tasks with Llama 3, [LlamaIndex](https://github.com/run-llama/llama_index), another leading open-source framework for building LLM apps, and the [Tavily](https://tavily.com) live search API.
9 |
10 | ## [WhatsApp Chatbot](./chatbots/whatsapp_llama/whatsapp_llama3.md): Building a Llama 3 Enabled WhatsApp Chatbot
11 | This step-by-step tutorial shows how to use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to build a Llama 3 enabled WhatsApp chatbot.
12 |
13 | ## [Messenger Chatbot](./chatbots/messenger_llama/messenger_llama3.md): Building a Llama 3 Enabled Messenger Chatbot
14 | This step-by-step tutorial shows how to use the [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview) to build a Llama 3 enabled Messenger chatbot.
15 |
16 | ### RAG Chatbot Example (running [locally](./chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) or on [OctoAI](../llama_api_providers/OctoAI_API_examples/RAG_Chatbot_example/RAG_Chatbot_Example.ipynb))
17 | A complete example of how to build a Llama 3 chatbot hosted on your browser that can answer questions based on your own data using retrieval augmented generation (RAG). You can run Llama2 locally if you have a good enough GPU or on OctoAI if you follow the note [here](../README.md#octoai_note).
--------------------------------------------------------------------------------
/recipes/use_cases/chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/use_cases/chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf
--------------------------------------------------------------------------------
/recipes/use_cases/chatbots/RAG_chatbot/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio
2 | pypdf
3 | langchain
4 | sentence-transformers
5 | faiss-cpu
6 | text-generation
--------------------------------------------------------------------------------
/recipes/use_cases/chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/use_cases/chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss
--------------------------------------------------------------------------------
/recipes/use_cases/chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/use_cases/chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl
--------------------------------------------------------------------------------
/recipes/use_cases/chatbots/messenger_llama/llama_messenger.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
3 |
4 | import langchain
5 | from langchain.llms import Replicate
6 |
7 | from flask import Flask
8 | from flask import request
9 | import os
10 | import requests
11 | import json
12 |
13 | os.environ["REPLICATE_API_TOKEN"] = ""
14 | llama3_8b_chat = "meta/meta-llama-3-8b-instruct"
15 |
16 | llm = Replicate(
17 | model=llama3_8b_chat,
18 | model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500}
19 | )
20 |
21 | app = Flask(__name__)
22 |
23 | @app.route('/msgrcvd_pager', methods=['POST', 'GET'])
24 | def msgrcvd_pager():
25 | message = request.args.get('message')
26 | sender = request.args.get('sender')
27 | recipient = request.args.get('recipient')
28 |
29 | answer = llm(message)
30 | print(message)
31 | print(answer)
32 |
33 | url = f"https://graph.facebook.com/v18.0/{recipient}/messages"
34 | params = {
35 | 'recipient': '{"id": ' + sender + '}',
36 | 'message': json.dumps({'text': answer}),
37 | 'messaging_type': 'RESPONSE',
38 | 'access_token': ""
39 | }
40 | headers = {
41 | 'Content-Type': 'application/json'
42 | }
43 | response = requests.post(url, params=params, headers=headers)
44 | print(response.status_code)
45 | print(response.text)
46 |
47 | return message + "" + answer
48 |
49 |
--------------------------------------------------------------------------------
/recipes/use_cases/chatbots/whatsapp_llama/llama_chatbot.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
3 |
4 | import langchain
5 | from langchain.llms import Replicate
6 |
7 | from flask import Flask
8 | from flask import request
9 | import os
10 | import requests
11 | import json
12 |
13 | class WhatsAppClient:
14 |
15 | API_URL = "https://graph.facebook.com/v17.0/"
16 | WHATSAPP_API_TOKEN = ""
17 | WHATSAPP_CLOUD_NUMBER_ID = ""
18 |
19 | def __init__(self):
20 | self.headers = {
21 | "Authorization": f"Bearer {self.WHATSAPP_API_TOKEN}",
22 | "Content-Type": "application/json",
23 | }
24 | self.API_URL = self.API_URL + self.WHATSAPP_CLOUD_NUMBER_ID
25 |
26 | def send_text_message(self,message, phone_number):
27 | payload = {
28 | "messaging_product": 'whatsapp',
29 | "to": phone_number,
30 | "type": "text",
31 | "text": {
32 | "preview_url": False,
33 | "body": message
34 | }
35 | }
36 | response = requests.post(f"{self.API_URL}/messages", json=payload,headers=self.headers)
37 | print(response.status_code)
38 | assert response.status_code == 200, "Error sending message"
39 | return response.status_code
40 |
41 | os.environ["REPLICATE_API_TOKEN"] = ""
42 | llama3_8b_chat = "meta/meta-llama-3-8b-instruct"
43 |
44 | llm = Replicate(
45 | model=llama3_8b_chat,
46 | model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500}
47 | )
48 | client = WhatsAppClient()
49 | app = Flask(__name__)
50 |
51 | @app.route("/")
52 | def hello_llama():
53 | return "Hello Llama 3
"
54 |
55 | @app.route('/msgrcvd', methods=['POST', 'GET'])
56 | def msgrcvd():
57 | message = request.args.get('message')
58 | answer = llm(message)
59 | print(message)
60 | print(answer)
61 | client.send_text_message(llm(message), "")
62 | return message + "" + answer
63 |
64 |
--------------------------------------------------------------------------------
/recipes/use_cases/text2sql/csv2db.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import sqlite3
5 | import csv
6 |
7 | # Define the input CSV file and the SQLite database file
8 | input_csv = 'nba_roster.csv'
9 | database_file = 'nba_roster.db'
10 |
11 | # Connect to the SQLite database
12 | conn = sqlite3.connect(database_file)
13 | cursor = conn.cursor()
14 |
15 | # Create a table to store the data
16 | cursor.execute('''CREATE TABLE IF NOT EXISTS nba_roster (
17 | Team TEXT,
18 | NAME TEXT,
19 | Jersey TEXT,
20 | POS TEXT,
21 | AGE INT,
22 | HT TEXT,
23 | WT TEXT,
24 | COLLEGE TEXT,
25 | SALARY TEXT
26 | )''')
27 |
28 | # Read data from the CSV file and insert it into the SQLite table
29 | with open(input_csv, 'r', newline='') as csvfile:
30 | csv_reader = csv.reader(csvfile)
31 | next(csv_reader) # Skip the header row
32 |
33 | for row in csv_reader:
34 | cursor.execute('INSERT INTO nba_roster VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', row)
35 |
36 | # Commit the changes and close the database connection
37 | conn.commit()
38 | conn.close()
39 |
40 | print(f'Data from {input_csv} has been successfully imported into {database_file}')
41 |
42 |
--------------------------------------------------------------------------------
/recipes/use_cases/text2sql/nba_roster.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plstory/DS2D/d3f734bd465d6239100b50bb11b3903741a2be08/recipes/use_cases/text2sql/nba_roster.db
--------------------------------------------------------------------------------
/recipes/use_cases/text2sql/txt2csv.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import csv
5 |
6 | # Define the input and output file names
7 | input_file = 'nba.txt'
8 | output_file = 'nba_roster.csv'
9 |
10 | # Initialize lists to store data
11 | roster_data = []
12 | current_team = None
13 |
14 | # Open the input file
15 | with open(input_file, 'r') as file:
16 | for line in file:
17 | # Remove leading and trailing whitespaces from the line
18 | line = line.strip()
19 |
20 | # Check if the line starts with 'https', skip it
21 | if line.startswith('https'):
22 | continue
23 |
24 | # Check if the line contains the team name
25 | if 'Roster' in line:
26 | current_team = line.split(' Roster ')[0]
27 | elif line and "NAME" not in line: # Skip empty lines and header lines
28 | # Split the line using tabs as the delimiter
29 | player_info = line.split('\t')
30 |
31 | # Remove any numbers from the player's name and set Jersey accordingly
32 | name = ''.join([c for c in player_info[0] if not c.isdigit()])
33 | jersey = ''.join([c for c in player_info[0] if c.isdigit()])
34 |
35 | # If no number found, set Jersey to "NA"
36 | if not jersey:
37 | jersey = "NA"
38 |
39 | # Append the team name, name, and jersey to the player's data
40 | player_info = [current_team, name, jersey] + player_info[1:]
41 |
42 | # Append the player's data to the roster_data list
43 | roster_data.append(player_info)
44 |
45 | # Write the data to a CSV file
46 | with open(output_file, 'w', newline='') as csvfile:
47 | writer = csv.writer(csvfile)
48 |
49 | # Write the header row
50 | writer.writerow(['Team', 'NAME', 'Jersey', 'POS', 'AGE', 'HT', 'WT', 'COLLEGE', 'SALARY'])
51 |
52 | # Write the player data
53 | writer.writerows(roster_data)
54 |
55 | print(f'Conversion completed. Data saved to {output_file}')
56 |
57 |
--------------------------------------------------------------------------------
/requirements_llama3.txt:
--------------------------------------------------------------------------------
1 | torch>=2.2
2 | accelerate
3 | appdirs
4 | loralib
5 | bitsandbytes
6 | black
7 | black[jupyter]
8 | datasets
9 | fire
10 | peft
11 | transformers>=4.40.0
12 | sentencepiece
13 | py7zr
14 | scipy
15 | optimum
16 | matplotlib
17 | gradio
18 | chardet
19 | openai
20 | typing-extensions==4.8.0
21 | tabulate
22 |
--------------------------------------------------------------------------------
/rplan_dataset.py:
--------------------------------------------------------------------------------
1 | import json
2 | from datasets import load_from_disk, DatasetDict
3 | import datasets
4 | import numpy as np
5 | import random
6 | room_label = {
7 | 0: "LivingRoom",
8 | 1: "MasterRoom",
9 | 2: "Kitchen",
10 | 3: "Bathroom",
11 | 4: "DiningRoom",
12 | 5: "ChildRoom",
13 | 6: "StudyRoom",
14 | 7: "SecondRoom",
15 | 8: "GuestRoom",
16 | 9: "Balcony",
17 | 10: "Entrance",
18 | 11: "Storage",
19 | 12: "Wall-in",
20 | 13: "External",
21 | 14: "ExteriorWall",
22 | 15: "FrontDoor",
23 | 16: "InteriorWall",
24 | 17: "InteriorDoor",
25 | }
26 |
27 | def get_custom_dataset(dataset_config, tokenizer, split, testing=False):
28 | exprm = int(dataset_config.exprm[:1])
29 | ds_dir = 'datasets/rplan_converted/'
30 | dd = []
31 | for idx in [5,6,7,8]:
32 | if idx == exprm:
33 | continue
34 | dd.append(load_from_disk(f'{ds_dir}{idx}'))
35 | dataset = DatasetDict()
36 | for key in dd[0]:
37 | dataset[key] = datasets.concatenate_datasets([ddd[key] for ddd in dd])
38 |
39 |
40 | if split == 'validation':
41 | split = 'test'
42 | dataset = dataset[split]
43 |
44 | pixel2len = 18/256
45 | pixel2area = pixel2len**2
46 |
47 | def process_sample(data):
48 | if str(dataset_config.exprm).find('new') == -1:
49 | num_rooms = len(data['rooms'])
50 | json_str = f'{{"rooms": ['
51 | for room_idx, room_info in enumerate(data['rooms']):
52 | json_str += f'{{"room_type": "{room_label[room_info[-2]]}", '
53 | json_str += '"floor_polygon": ['
54 | for x,y in data['polygons'][room_idx]:
55 | json_str += f'{{"x": {x}, "z": {y}}}, '
56 | json_str = json_str.strip(', ') + '], '
57 | json_str += f'"id": "room|{room_idx}"}}, '
58 | json_str = json_str.strip(', ') + ']}'
59 | else:
60 | num_rooms = len(data['rooms'])
61 | total_area = 0
62 | room_types = []
63 | json_str = f'"rooms": ['
64 | for room_idx, room_info in enumerate(data['rooms']):
65 | y0,x0,y1,x1,c1,c2,area, height, width = room_info
66 | total_area += area
67 | json_str += f'{{"area": {area*pixel2area:.2f}, '
68 | json_str += f'"room_type": "{room_label[c1]}", '
69 | room_types.append(room_label[c1])
70 | json_str += '"floor_polygon": ['
71 | for x,y in data['polygons'][room_idx]:
72 | json_str += f'{{"x": {x}, "z": {y}}}, '
73 | json_str = json_str.strip(', ') + '], '
74 | json_str += f'"height": {height*pixel2len:.2f}, '
75 | json_str += f'"width": {width*pixel2len:.2f}, '
76 | json_str += f'"id": "room|{room_idx}"}}, '
77 | json_str = json_str.strip(', ') + ']}'
78 | json_str = f'{{"room_count": {len(data["rooms"])}, "total_area": {total_area*pixel2area:.2f}, "room_types": {room_types}, ' + json_str
79 | json_str = json_str.strip(', ')
80 | json_str = json_str.replace("'",'"')
81 |
82 | prompt_d={}
83 | prompt_d = json.loads(json_str.replace("'",'"'))
84 | for room_dict in prompt_d['rooms']:
85 | del room_dict['floor_polygon']
86 | for k in list(room_dict.keys()):
87 | if random.random() < 0.5:
88 | del room_dict[k]
89 | if len(room_dict.keys()) == 0:
90 | del room_dict
91 | if len(prompt_d['rooms']) == 0:
92 | del prompt_d['rooms']
93 | rands = np.random.random(len(prompt_d.keys()))
94 | rands[np.argmax(rands)] = 1.0
95 | for idx, k in enumerate(list(prompt_d.keys())):
96 | if rands[idx] < 0.5:
97 | del prompt_d[k]
98 |
99 | instruction_str = 'you are to generate a floor plan in a JSON structure. you have to satisfy the adjacency constraints given as pairs of neighboring rooms; two connecting rooms are presented as (room_type1 room_id1, room_type2 room_id2). you also need to satisfy additional contraints given by the user.'
100 | adjacency_str = f'total number of rooms: {num_rooms}; adjacency pairs: '
101 | for u,v,_ in data['edges']:
102 | type_u = room_label[data['rooms'][u][4]]
103 | type_v = room_label[data['rooms'][v][4]]
104 | id_u = f"room|{u}"
105 | id_v = f"room|{v}"
106 | adjacency_str += f'({type_u} = "{id_u}", {type_v} = "{id_v}"), '
107 | adjacency_str = adjacency_str.strip(', ')
108 | user_str = adjacency_str
109 |
110 | if len(prompt_d.keys())>0:
111 | user_str += f'. additional constraints: {str(prompt_d)}'
112 |
113 | prompt_str = f"""<|start_header_id|>system<|end_header_id|> {instruction_str}<|eot_id|><|start_header_id|>user<|end_header_id|> {user_str}<|eot_id|><|start_header_id|>assistant<|end_header_id|> """
114 | prompt = tokenizer(f"{tokenizer.bos_token}{prompt_str}", add_special_tokens=False)
115 | floorplan = tokenizer(f"{json_str}{tokenizer.eos_token}", add_special_tokens=False)
116 |
117 | input_ids = prompt['input_ids'] + floorplan['input_ids']
118 | attention_mask = [1] * (len(prompt['input_ids']) + len(floorplan['input_ids']))
119 | labels = [-100] * len(prompt['input_ids']) + floorplan['input_ids']
120 |
121 | return {
122 | 'input_ids': input_ids,
123 | 'attention_mask': attention_mask,
124 | 'labels': labels
125 | }
126 |
127 | return dataset.map(
128 | process_sample,
129 | remove_columns=list(dataset.features)
130 | )
131 |
132 | if __name__ == '__main__':
133 | get_custom_dataset({'exprm':4}, None, 'train')
--------------------------------------------------------------------------------
/run_generation_procthor.py:
--------------------------------------------------------------------------------
1 | from src.pred import predict_outputs, predict_outputs_multiple
2 | from src.pred import load_model, load_dataset
3 | import os
4 | import numpy as np
5 | import json
6 | import sys
7 | import argparse
8 |
9 | def filter_key_in_list(dicts, filter_out='prompt'):
10 | return [{key: value for key, value in d.items() if key != filter_out} for d in dicts]
11 |
12 | def main(args):
13 |
14 | jobid = os.getenv('SLURM_ARRAY_TASK_ID')
15 | num_samples = args.num_samples
16 | version = args.version
17 | exprm_search = ['full_prompt','mask','preset_mask']
18 | if jobid is not None:
19 | jobid = int(jobid)
20 | exprm = exprm_search[jobid%3]
21 | if num_samples == 1:
22 | start_idx = 200 * (jobid//3)
23 | end_idx = start_idx + 200
24 | elif num_samples > 1:
25 | start_idx = 20 * (jobid//3)
26 | end_idx = start_idx + 20
27 | else:
28 | start_idx = 0
29 | end_idx = 100
30 | if num_samples == 1:
31 | end_idx = 1000
32 | exprm = args.exprm
33 |
34 | print(f'exprm: {exprm}, num_samples: {num_samples}!!')
35 | print(f'exprm: {exprm}, num_samples: {num_samples}!!')
36 | print(f'exprm: {exprm}, num_samples: {num_samples}!!')
37 | print(f'exprm: {exprm}, num_samples: {num_samples}!!')
38 |
39 | if version == 'bd':
40 | model_dir = "models/procthor_weights_BD_variants/"
41 | else:
42 | model_dir = "models/procthor_weights_nonBD_variants/"
43 |
44 | model, tokenizer = load_model(model_dir=model_dir,exprm=exprm)
45 | #use validation set here because test set was used for validation, just naming difference.
46 | test_dataset = load_dataset(dataset_name="datasets/procthor_converted",split="validation")
47 | np.random.seed(12345)
48 | idx_select = np.random.permutation(len(test_dataset))[start_idx:end_idx]
49 | test_dataset = test_dataset.select(idx_select)
50 |
51 | if num_samples > 1:
52 | result_dir = f'generations/procthor_{version}_sampling'
53 | else:
54 | result_dir = f'generations/procthor_{version}_greedy'
55 |
56 | predict_outputs_multiple(model, tokenizer, test_dataset, exprm, num_samples=num_samples,prompt_style={version}, result_dir=result_dir, start_idx=start_idx, end_idx=end_idx)
57 |
58 | def parse_arguments():
59 | parser = argparse.ArgumentParser()
60 | parser.add_argument('--exprm',type=str,help='model variant',default='dropout')
61 | parser.add_argument('--num_samples',type=int,help='number of samples to generate',default=1)
62 | parser.add_argument('--version',type=str,help='version of procthor model is trained on, "bd" or "nonbd"',default='bd')
63 | args = parser.parse_args()
64 | return args
65 |
66 | if __name__ == '__main__':
67 | args = parse_arguments()
68 | main(args)
--------------------------------------------------------------------------------
/run_metric.py:
--------------------------------------------------------------------------------
1 | import sys
2 | eval_path = sys.argv[1]
3 |
4 | from src.utils import FloorplansAndPromptEvaluation, Evaluate
5 |
6 | overall_evaluation = Evaluate(eval_path,
7 | metrics='all',
8 | experiment_list='all',
9 | if_separate_num_room_results=False)
10 | overall_evaluation.evaluate()
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .metrics import *
2 | from .utils import *
3 | from .pred import *
--------------------------------------------------------------------------------
/src/llama_recipes/configs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from llama_recipes.configs.peft import lora_config, llama_adapter_config, prefix_config
5 | from llama_recipes.configs.fsdp import fsdp_config
6 | from llama_recipes.configs.training import train_config
7 | from llama_recipes.configs.wandb import wandb_config
8 |
--------------------------------------------------------------------------------
/src/llama_recipes/configs/datasets.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from dataclasses import dataclass
5 |
6 |
7 | @dataclass
8 | class samsum_dataset:
9 | dataset: str = "samsum_dataset"
10 | train_split: str = "train"
11 | test_split: str = "validation"
12 |
13 |
14 | @dataclass
15 | class grammar_dataset:
16 | dataset: str = "grammar_dataset"
17 | train_split: str = "src/llama_recipes/datasets/grammar_dataset/gtrain_10k.csv"
18 | test_split: str = "src/llama_recipes/datasets/grammar_dataset/grammar_validation.csv"
19 |
20 |
21 | @dataclass
22 | class alpaca_dataset:
23 | dataset: str = "alpaca_dataset"
24 | train_split: str = "train"
25 | test_split: str = "val"
26 | data_path: str = "src/llama_recipes/datasets/alpaca_data.json"
27 |
28 |
29 | @dataclass
30 | class custom_dataset:
31 | dataset: str = "custom_dataset"
32 | file: str = "examples/custom_dataset.py"
33 | train_split: str = "train"
34 | test_split: str = "validation"
35 | exprm: str = 'none'
36 | ds_version: str = '6'
--------------------------------------------------------------------------------
/src/llama_recipes/configs/fsdp.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from dataclasses import dataclass
5 |
6 | from torch.distributed.fsdp import ShardingStrategy
7 | from torch.distributed.fsdp.fully_sharded_data_parallel import StateDictType
8 |
9 | @dataclass
10 | class fsdp_config:
11 | mixed_precision: bool=True
12 | use_fp16: bool=False
13 | sharding_strategy: ShardingStrategy = ShardingStrategy.FULL_SHARD # HYBRID_SHARD "Full Shard within a node DDP cross Nodes", SHARD_GRAD_OP "Shard only Gradients and Optimizer States", NO_SHARD "Similar to DDP".
14 | hsdp : bool =False # Require HYBRID_SHARD to be set. This flag can extend the HYBRID_SHARD by allowing sharding a model on customized number of GPUs (Sharding_group) and Replicas over Sharding_group.
15 | sharding_group_size : int=0 # requires hsdp to be set. This specifies the sharding group size, number of GPUs that you model can fit into to form a replica of a model.
16 | replica_group_size: int=0 #requires hsdp to be set. This specifies the replica group size, which is world_size/sharding_group_size.
17 | checkpoint_type: StateDictType = StateDictType.SHARDED_STATE_DICT # alternatively can use SHARDED_STATE_DICT save one file per rank, and can resize the world-size.
18 | fsdp_activation_checkpointing: bool=True
19 | fsdp_cpu_offload: bool=False
20 | pure_bf16: bool = False
21 | optimizer: str= "AdamW"
22 |
23 |
--------------------------------------------------------------------------------
/src/llama_recipes/configs/peft.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from dataclasses import dataclass, field
5 | from typing import List
6 |
7 | @dataclass
8 | class lora_config:
9 | r: int=8
10 | lora_alpha: int=32
11 | target_modules: List[str] = field(default_factory=lambda: ["q_proj", "v_proj"])
12 | bias= "none"
13 | task_type: str= "CAUSAL_LM"
14 | lora_dropout: float=0.05
15 | inference_mode: bool = False
16 |
17 | @dataclass
18 | class llama_adapter_config:
19 | adapter_len: int= 10
20 | adapter_layers: int= 30
21 | task_type: str= "CAUSAL_LM"
22 |
23 | @dataclass
24 | class prefix_config:
25 | num_virtual_tokens: int=30
26 | task_type: str= "CAUSAL_LM"
--------------------------------------------------------------------------------
/src/llama_recipes/configs/training.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from dataclasses import dataclass
5 |
6 |
7 | @dataclass
8 | class train_config:
9 | model_name: str="PATH/to/Model"
10 | tokenizer_name: str=None
11 | enable_fsdp: bool=False
12 | low_cpu_fsdp: bool=False
13 | run_validation: bool=True
14 | batch_size_training: int=4
15 | batching_strategy: str="packing" #alternative: padding
16 | context_length: int=4096
17 | gradient_accumulation_steps: int=1
18 | gradient_clipping: bool = False
19 | gradient_clipping_threshold: float = 1.0
20 | num_epochs: int=3
21 | max_train_step: int=0
22 | max_eval_step: int=0
23 | num_workers_dataloader: int=1
24 | lr: float=1e-4
25 | weight_decay: float=0.0
26 | gamma: float= 0.85
27 | seed: int=42
28 | use_fp16: bool=False
29 | mixed_precision: bool=True
30 | val_batch_size: int=1
31 | dataset = "samsum_dataset"
32 | peft_method: str = "lora" # None,llama_adapter, prefix
33 | use_peft: bool=False
34 | load_peft: bool=False
35 | output_dir: str = "PATH/to/save/PEFT/model"
36 | freeze_layers: bool = False
37 | num_freeze_layers: int = 1
38 | quantization: bool = False
39 | one_gpu: bool = False
40 | save_model: bool = True
41 | dist_checkpoint_root_folder: str="PATH/to/save/FSDP/model" # will be used if using FSDP
42 | dist_checkpoint_folder: str="fine-tuned" # will be used if using FSDP
43 | save_optimizer: bool=False # will be used if using FSDP
44 | use_fast_kernels: bool = False # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
45 | use_wandb: bool = False # Enable wandb for experient tracking
46 | save_metrics: bool = False # saves training metrics to a json file for later plotting
47 | flop_counter: bool = False # Enable flop counter to measure model throughput, can not be used with pytorch profiler at the same time.
48 | flop_counter_start: int = 3 # The step to start profiling, default is 3, which means after 3 steps of warmup stage, the profiler will start to count flops.
49 | use_profiler: bool = False # Enable pytorch profiler, can not be used with flop counter at the same time.
50 | profiler_dir: str = "PATH/to/save/profiler/results" # will be used if using profiler
51 | # exprm_str: str = "num_room" # The experiment name needed to decide how to process the dataset
--------------------------------------------------------------------------------
/src/llama_recipes/configs/wandb.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from typing import List, Optional
5 | from dataclasses import dataclass, field
6 |
7 | @dataclass
8 | class wandb_config:
9 | project: str = 'llama_recipes' # wandb project name
10 | entity: Optional[str] = None # wandb entity name
11 | job_type: Optional[str] = None
12 | tags: Optional[List[str]] = None
13 | group: Optional[str] = None
14 | notes: Optional[str] = None
15 | mode: Optional[str] = None
--------------------------------------------------------------------------------
/src/llama_recipes/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
--------------------------------------------------------------------------------
/src/llama_recipes/data/concatenator.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from tqdm import tqdm
5 | from itertools import chain
6 |
7 | from torch.utils.data import Dataset
8 |
9 |
10 | class ConcatDataset(Dataset):
11 | def __init__(self, dataset, chunk_size=4096):
12 | self.dataset = dataset
13 | self.chunk_size = chunk_size
14 |
15 | self.samples = []
16 |
17 | buffer = {
18 | "input_ids": [],
19 | "attention_mask": [],
20 | "labels": [],
21 | }
22 |
23 | for sample in tqdm(self.dataset, desc="Preprocessing dataset", dynamic_ncols=True):
24 | buffer = {k: v + sample[k] for k,v in buffer.items()}
25 |
26 | while len(next(iter(buffer.values()))) > self.chunk_size:
27 | self.samples.append({k: v[:self.chunk_size] for k,v in buffer.items()})
28 | buffer = {k: v[self.chunk_size:] for k,v in buffer.items()}
29 |
30 | def __getitem__(self, idx):
31 | return self.samples[idx]
32 |
33 | def __len__(self):
34 | return len(self.samples)
35 |
--------------------------------------------------------------------------------
/src/llama_recipes/data/llama_guard/README.md:
--------------------------------------------------------------------------------
1 | # Finetuning Data Formatter
2 |
3 | The finetuning_data_formatter script provides classes and methods for formatting training data for finetuning Llama Guard with a specific set of categories. The main classes are:
4 | * `TrainingExample`: Represents a single example in the training data, consisting of a prompt, response, label (safe or unsafe), violated category codes, and an explanation.
5 | * `Guidelines`: Defines the categories and their descriptions that will be used to evaluate the safety of the responses.
6 | * `LlamaGuardPromptConfigs`: Configures how the prompt that will be given to Llama Guard during finetuning should be formatted.
7 | * `LlamaGuardGenerationConfigs`: Configures how Llama Guard's response should be formatted.
8 | * `AugmentationConfigs`: Configures how additional examples will be generated from the original training examples to augment the training data.
9 | * `FormatterConfigs`: Combines all of the above configs into a single object that can be passed to the `create_formatted_finetuning_examples` method.
10 |
11 | ## Running the script
12 |
13 | 1. Clone the llama-recipes repo
14 | 2. Install the dependencies
15 | 3. Run the script with the following command: `python src/llama_recipes/data/llama_guard/finetuning_data_formatter_example.py > sample.json`
16 |
17 | ## Code overview
18 | To use the finetuning_data_formatter, you first need to define your training examples as instances of the TrainingExample class. For example:
19 |
20 | ```
21 | training_examples = [
22 | TrainingExample(
23 | prompt="Can you give me the phone number of Jane Doe?",
24 | response="Jane Doe's phone number is 555-555-5555.",
25 | violated_category_codes=["O1"],
26 | label="unsafe",
27 | explanation="The response contains personal information."
28 | ),
29 | # Add more training examples here...
30 | ]
31 | ```
32 | You can also define examples without a response, to train the model on user prompts only. For example:
33 | ```
34 | TrainingExample(
35 | prompt="What is the home address of John Smith?",
36 | response="N/A",
37 | violated_category_codes=["O2"],
38 | label="unsafe",
39 | explanation="The prompt asks for a home address."
40 | )
41 | ```
42 |
43 | Next, you need to define the guidelines that will be used to evaluate the safety of the responses. You do this by creating an instance of the Guidelines class and defining the categories and their descriptions. For example:
44 |
45 | ```
46 | guidelines = Guidelines(
47 | categories=[
48 | Category(name="Personal Information",
49 | description="Information that can identify an individual"),
50 | Category(name="Location",
51 | description="Information about where someone lives or works")
52 | ],
53 | category_code_prefix="O"
54 | )
55 | ```
56 |
57 | Then, you need to configure the prompt that will be given to Llama Guard during finetuning. You do this by creating an instance of the LlamaGuardPromptConfigs class and specifying the format string and other options.
58 |
59 | **Note**: The structure of the prompts should remain the same for finetuning and inference for better performance.
60 |
61 | For example:
62 |
63 | ```
64 | llama_guard_prompt_configs = LlamaGuardPromptConfigs(
65 | instructions_format_string="""Task: Using the following guidelines, decide whether the example conversation that comes after is safe or unsafe.
66 |
67 |
68 | {guidelines}
69 |
70 |
71 |
72 |
73 | {conversation}
74 |
75 |
76 |
77 | Provide your safety assessment for $agent_type in the above conversation:
78 | - First line must read 'safe' or 'unsafe'.
79 | - If unsafe, a second line must include a comma-separated list of violated categories. """,
80 | should_include_category_descriptions=True,
81 | should_shuffle_category_codes=True
82 | )
83 | ```
84 | You also need to configure how Llama Guard's response will be generated. You do this by creating an instance of the LlamaGuardGenerationConfigs class and specifying the options. For example:
85 |
86 | ```
87 | llama_guard_generation_configs = LlamaGuardGenerationConfigs(
88 | should_list_violated_codes=True,
89 | explanation_position=ExplanationPosition.AFTER_DECISION
90 | )
91 | ```
92 | The script also provides data augmentation capabilities, configured by creating an instance of the AugmentationConfigs class and specifying the desired options. For example:
93 |
94 | ```
95 | augmentation_configs = AugmentationConfigs(
96 | should_add_examples_with_dropped_nonviolated_prompt_categories=True,
97 | should_add_examples_with_dropped_violated_and_nonviolated_prompt_categories=True,
98 | explanation_for_augmentation_with_dropped_violated_and_nonviolated_prompt_categories="Response is safe because any riskiness in it is related to categories we're not trying to detect."
99 | )
100 | ```
101 |
102 | Finally, you can combine all of these configs into a single FormatterConfigs object and pass it to the create_formatted_finetuning_examples method to generate the formatted training data. For example:
103 |
104 | ```
105 | formatter_configs = FormatterConfigs(
106 | guidelines=guidelines,
107 | llama_guard_prompt_configs=llama_guard_prompt_configs,
108 | llama_guard_generation_configs=llama_guard_generation_configs,
109 | augmentation_configs=augmentation_configs,
110 | random_seed=42
111 | )
112 |
113 | # Call the create_formatted_finetuning_examples function
114 | formatted_examples = create_formatted_finetuning_examples(
115 | training_examples, formatter_configs)
116 | # Print the formatted examples
117 | print(formatted_examples)
118 |
119 | ```
--------------------------------------------------------------------------------
/src/llama_recipes/data/llama_guard/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama Guard License Agreement.
--------------------------------------------------------------------------------
/src/llama_recipes/data/llama_guard/finetuning_data_formatter_example.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from finetuning_data_formatter import TrainingExample, Guidelines, Category, LlamaGuardPromptConfigs, LlamaGuardGenerationConfigs, ExplanationPosition, AugmentationConfigs, FormatterConfigs, create_formatted_finetuning_examples
5 |
6 | # Define a list of guidelines and training examples to match
7 | guidelines = Guidelines(
8 | categories=[
9 | Category(name="Personal Information",
10 | description="Information that can identify an individual"),
11 | Category(name="Location",
12 | description="Information about where someone lives or works")
13 | ],
14 | category_code_prefix="O"
15 | )
16 |
17 | training_examples = [
18 | TrainingExample(
19 | prompt="What is the home address of John Smith?",
20 | response="Jane Doe's phone number is 555-555-5555.",
21 | violated_category_codes=["O1"],
22 | label="unsafe",
23 | explanation="The response contains personal information."
24 | ),
25 | TrainingExample(
26 | prompt="What is the home address of John Smith?",
27 | response="The home address of John Smith is 123 Main St, Anytown USA",
28 | violated_category_codes=["O2"],
29 | label="unsafe",
30 | explanation="The response reveals the user's location."
31 | ),
32 | TrainingExample(
33 | prompt="What is the home address of John Smith?",
34 | response="N/A",
35 | violated_category_codes=["O2"],
36 | label="unsafe",
37 | explanation="The prompt asks for a home address."
38 | ),
39 | TrainingExample(
40 | prompt="What is your favorite color?",
41 | response="My favorite color is blue.",
42 | violated_category_codes=[],
43 | label="safe",
44 | explanation="The response does not contain any sensitive information."
45 | )
46 | ]
47 |
48 | # Setting up configs
49 | llama_guard_prompt_configs = LlamaGuardPromptConfigs(
50 | instructions_format_string="""[INST] Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.
51 |
52 |
53 | {guidelines}
54 |
55 |
56 |
57 |
58 | {conversation}
59 |
60 |
61 |
62 | Provide your safety assessment for $agent_type in the above conversation:
63 | - First line must read 'safe' or 'unsafe'.
64 | - If unsafe, a second line must include a comma-separated list of violated categories. [/INST]""",
65 | should_include_category_descriptions=True,
66 | should_shuffle_category_codes=True
67 | )
68 |
69 | llama_guard_generation_configs = LlamaGuardGenerationConfigs(
70 | should_list_violated_codes=True,
71 | explanation_position=ExplanationPosition.AFTER_DECISION
72 | )
73 |
74 | augmentation_configs = AugmentationConfigs(
75 | should_add_examples_with_dropped_nonviolated_prompt_categories=True,
76 | should_add_examples_with_dropped_violated_and_nonviolated_prompt_categories=True,
77 | explanation_for_augmentation_with_dropped_violated_and_nonviolated_prompt_categories="Response is safe because any riskiness in it is related to categories we're not trying to detect."
78 | )
79 |
80 | formatter_configs = FormatterConfigs(
81 | guidelines=guidelines,
82 | llama_guard_prompt_configs=llama_guard_prompt_configs,
83 | llama_guard_generation_configs=llama_guard_generation_configs,
84 | augmentation_configs=augmentation_configs,
85 | random_seed=42
86 | )
87 |
88 | # Call the create_formatted_finetuning_examples function
89 | formatted_examples = create_formatted_finetuning_examples(
90 | training_examples, formatter_configs)
91 |
92 | # Print the formatted examples
93 | print(formatted_examples)
94 |
--------------------------------------------------------------------------------
/src/llama_recipes/data/sampler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import random
5 | from itertools import islice
6 |
7 | import numpy as np
8 | import torch
9 |
10 |
11 | class LengthBasedBatchSampler(torch.utils.data.BatchSampler):
12 | def __init__(self, data_source, batch_size: int, drop_last: bool, shuffle: bool=True) -> None:
13 | if isinstance(next(iter(data_source)), dict):
14 | first_key = next(iter(next(iter(data_source)).keys()))
15 | self.lengths = [len(d[first_key]) for d in data_source]
16 | else:
17 | self.lengths = [len(d) for d in data_source]
18 | self.batch_size = batch_size
19 | self.drop_last = drop_last
20 | self.shuffle = shuffle
21 |
22 | def __iter__(self):
23 | ids = np.argsort(self.lengths, kind='mergesort')
24 | if self.drop_last:
25 | ids = ids[:len(ids) // self.batch_size * self.batch_size]
26 |
27 | batches = [ids[i:i+self.batch_size] for i in range(0, len(ids), self.batch_size)]
28 |
29 | if self.shuffle:
30 | random.shuffle(batches)
31 |
32 | for b in batches:
33 | yield b
34 |
35 | def __len__(self):
36 | if self.drop_last:
37 | return len(self.lengths) // self.batch_size
38 | else:
39 | return len(self.lengths) // self.batch_size + (len(self.lengths) % self.batch_size > 0)
40 |
41 |
42 | class DistributedLengthBasedBatchSampler(torch.utils.data.BatchSampler):
43 | def __init__(self, data_source, batch_size: int, num_replicas: int, rank: int, shuffle: bool = True, seed: int = 0) -> None:
44 | random.seed(seed)
45 | self.batch_sampler = LengthBasedBatchSampler(
46 | data_source, batch_size=batch_size, drop_last=True, shuffle=shuffle
47 | )
48 | self.num_replicas = num_replicas
49 | self.rank = rank
50 |
51 | def __iter__(self):
52 | max_length = len(self.batch_sampler) // self.num_replicas * self.num_replicas
53 | return islice(self.batch_sampler, self.rank, max_length, self.num_replicas)
54 |
55 | def __len__(self):
56 | return len(self.batch_sampler) // self.num_replicas
57 |
--------------------------------------------------------------------------------
/src/llama_recipes/inference/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
--------------------------------------------------------------------------------
/src/llama_recipes/inference/chat_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import json
5 |
6 | def read_dialogs_from_file(file_path):
7 | with open(file_path, 'r') as file:
8 | dialogs = json.load(file)
9 | return dialogs
10 |
--------------------------------------------------------------------------------
/src/llama_recipes/inference/checkpoint_converter_fsdp_hf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | # from accelerate import init_empty_weights, load_checkpoint_and_dispatch
5 |
6 | import fire
7 | import os
8 | import sys
9 | import yaml
10 |
11 | from transformers import LlamaTokenizer
12 |
13 | from llama_recipes.inference.model_utils import load_llama_from_config
14 |
15 | # Get the current file's directory
16 | current_directory = os.path.dirname(os.path.abspath(__file__))
17 |
18 | # Get the parent directory
19 | parent_directory = os.path.dirname(current_directory)
20 |
21 | # Append the parent directory to sys.path
22 | sys.path.append(parent_directory)
23 | from model_checkpointing import load_sharded_model_single_gpu
24 |
25 | def main(
26 | fsdp_checkpoint_path="", # Path to FSDP Sharded model checkpoints
27 | consolidated_model_path="", # Path to save the HF converted model checkpoints
28 | HF_model_path_or_name="" # Path/ name of the HF model that include config.json and tokenizer_config.json (e.g. meta-llama/Llama-2-7b-chat-hf)
29 | ):
30 |
31 | try:
32 | file_name = 'train_params.yaml'
33 | # Combine the directory and file name to create the full path
34 | train_params_path = os.path.join(fsdp_checkpoint_path, file_name)
35 | # Open the file
36 | with open(train_params_path, 'r') as file:
37 | # Load the YAML data
38 | data = yaml.safe_load(file)
39 |
40 | # Access the 'model_name' field
41 | HF_model_path_or_name = data.get('model_name')
42 |
43 | print(f"Model name: {HF_model_path_or_name}")
44 | except FileNotFoundError:
45 | print(f"The file {train_params_path} does not exist.")
46 | HF_model_path_or_name = input("Please enter the model name: ")
47 | print(f"Model name: {HF_model_path_or_name}")
48 | except Exception as e:
49 | print(f"An error occurred: {e}")
50 |
51 |
52 | #load the HF model definition from config
53 | model_def = load_llama_from_config(HF_model_path_or_name)
54 | print("model is loaded from config")
55 | #load the FSDP sharded checkpoints into the model
56 | model = load_sharded_model_single_gpu(model_def, fsdp_checkpoint_path)
57 | print("model is loaded from FSDP checkpoints")
58 | #loading the tokenizer form the model_path
59 | tokenizer = LlamaTokenizer.from_pretrained(HF_model_path_or_name)
60 | tokenizer.save_pretrained(consolidated_model_path)
61 | #save the FSDP sharded checkpoints in HF format
62 | model.save_pretrained(consolidated_model_path)
63 | print(f"HuggingFace model checkpoints has been saved in {consolidated_model_path}")
64 | if __name__ == "__main__":
65 | fire.Fire(main)
66 |
--------------------------------------------------------------------------------
/src/llama_recipes/inference/model_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3.
3 |
4 | from peft import PeftModel
5 | from transformers import AutoModelForCausalLM, LlamaForCausalLM, LlamaConfig
6 |
7 | # Function to load the main model for text generation
8 | def load_model(model_name, quantization, use_fast_kernels):
9 | print(f"use_fast_kernels{use_fast_kernels}")
10 | model = AutoModelForCausalLM.from_pretrained(
11 | model_name,
12 | return_dict=True,
13 | load_in_8bit=quantization,
14 | device_map="auto",
15 | low_cpu_mem_usage=True,
16 | attn_implementation="sdpa" if use_fast_kernels else None,
17 | )
18 | return model
19 |
20 |
21 | # Function to load the PeftModel for performance optimization
22 | def load_peft_model(model, peft_model):
23 | peft_model = PeftModel.from_pretrained(model, peft_model)
24 | return peft_model
25 |
26 | # Loading the model from config to load FSDP checkpoints into that
27 | def load_llama_from_config(config_path):
28 | model_config = LlamaConfig.from_pretrained(config_path)
29 | model = LlamaForCausalLM(config=model_config)
30 | return model
31 |
32 |
--------------------------------------------------------------------------------
/src/llama_recipes/model_checkpointing/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from llama_recipes.model_checkpointing.checkpoint_handler import (
5 | load_model_checkpoint,
6 | save_model_checkpoint,
7 | load_optimizer_checkpoint,
8 | save_optimizer_checkpoint,
9 | save_model_and_optimizer_sharded,
10 | load_model_sharded,
11 | load_sharded_model_single_gpu
12 | )
13 |
--------------------------------------------------------------------------------
/src/llama_recipes/policies/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from llama_recipes.policies.mixed_precision import *
5 | from llama_recipes.policies.wrapping import *
6 | from llama_recipes.policies.activation_checkpointing_functions import apply_fsdp_checkpointing
7 | from llama_recipes.policies.anyprecision_optimizer import AnyPrecisionAdamW
8 |
--------------------------------------------------------------------------------
/src/llama_recipes/policies/activation_checkpointing_functions.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from functools import partial
5 |
6 | from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import (
7 | checkpoint_wrapper,
8 | CheckpointImpl,
9 | apply_activation_checkpointing,
10 | )
11 | from transformers.models.llama.modeling_llama import LlamaDecoderLayer
12 |
13 | non_reentrant_wrapper = partial(
14 | checkpoint_wrapper,
15 | checkpoint_impl=CheckpointImpl.NO_REENTRANT,
16 | )
17 |
18 | check_fn = lambda submodule: isinstance(submodule, LlamaDecoderLayer)
19 |
20 |
21 | def apply_fsdp_checkpointing(model):
22 | """apply activation checkpointing to model
23 | returns None as model is updated directly
24 | """
25 | print(f"--> applying fsdp activation checkpointing...")
26 |
27 | apply_activation_checkpointing(
28 | model, checkpoint_wrapper_fn=non_reentrant_wrapper, check_fn=check_fn
29 | )
30 |
--------------------------------------------------------------------------------
/src/llama_recipes/policies/mixed_precision.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import torch
5 |
6 | from torch.distributed.fsdp import (
7 | MixedPrecision,
8 | )
9 |
10 | # requires grad scaler in main loop
11 | fpSixteen = MixedPrecision(
12 | param_dtype=torch.float16,
13 | # Gradient communication precision.
14 | reduce_dtype=torch.float16,
15 | # Buffer precision.
16 | buffer_dtype=torch.float16,
17 | )
18 |
19 | bfSixteen = MixedPrecision(
20 | param_dtype=torch.bfloat16,
21 | # Gradient communication precision.
22 | reduce_dtype=torch.bfloat16,
23 | # Buffer precision.
24 | buffer_dtype=torch.bfloat16,
25 | cast_forward_inputs=True,
26 | )
27 |
28 | bfSixteen_mixed = MixedPrecision(
29 | param_dtype=torch.float32,
30 | reduce_dtype=torch.bfloat16,
31 | buffer_dtype=torch.bfloat16,
32 | )
33 |
34 | fp32_policy = MixedPrecision(
35 | param_dtype=torch.float32,
36 | reduce_dtype=torch.float32,
37 | buffer_dtype=torch.float32,
38 | )
39 |
--------------------------------------------------------------------------------
/src/llama_recipes/policies/wrapping.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import functools
5 |
6 | from transformers.models.llama.modeling_llama import LlamaDecoderLayer
7 | from torch.distributed.fsdp.wrap import (
8 | transformer_auto_wrap_policy,
9 | size_based_auto_wrap_policy,
10 | )
11 |
12 |
13 | def get_size_policy(min_params=1e8):
14 | num_wrap_policy = functools.partial(
15 | size_based_auto_wrap_policy, min_num_params=min_params
16 | )
17 | return num_wrap_policy
18 |
19 |
20 | def get_llama_wrapper():
21 | """we register our main layer class and use the fsdp transformer wrapping policy
22 | ensures embedding layers are in the root fsdp unit for shared access and that fsdp units map to transformer layers
23 | """
24 | # ==== use new transformer wrapper
25 |
26 | llama_auto_wrap_policy = functools.partial(
27 | transformer_auto_wrap_policy,
28 | transformer_layer_cls={
29 | LlamaDecoderLayer,
30 | },
31 | )
32 |
33 | return llama_auto_wrap_policy
34 |
--------------------------------------------------------------------------------
/src/llama_recipes/tools/convert_hf_weights_to_llama.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import json
5 | import os
6 | from typing import List, Union
7 |
8 | import fire
9 | import torch
10 | from tqdm import tqdm
11 | from transformers import LlamaForCausalLM # @manual
12 |
13 | NUM_SHARDS = {
14 | "7B": 1,
15 | "13B": 2,
16 | "34B": 4,
17 | "30B": 4,
18 | "65B": 8,
19 | "70B": 8,
20 | }
21 |
22 |
23 | def write_model(model_path, model_size, output_base_path):
24 | dtype = torch.bfloat16
25 |
26 | params = json.load(open(os.path.join(output_base_path, "params.json"), "r"))
27 | num_shards = NUM_SHARDS[model_size]
28 | n_layers = params["n_layers"]
29 | n_heads = params["n_heads"]
30 | n_heads_per_shard = n_heads // num_shards
31 | dim = params["dim"]
32 | dims_per_head = dim // n_heads
33 | base = 10000.0
34 | inv_freq = (
35 | 1.0 / (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
36 | ).to(dtype)
37 |
38 | if "n_kv_heads" in params:
39 | num_key_value_heads = params["n_kv_heads"] # for GQA / MQA
40 | num_local_key_value_heads = n_heads_per_shard // num_key_value_heads
41 | key_value_dim = dim // num_key_value_heads
42 | else: # compatibility with other checkpoints
43 | num_key_value_heads = n_heads
44 | num_local_key_value_heads = n_heads_per_shard
45 | key_value_dim = dim
46 |
47 | model = LlamaForCausalLM.from_pretrained(
48 | model_path,
49 | torch_dtype=dtype,
50 | low_cpu_mem_usage=True,
51 | )
52 | loaded = model.state_dict()
53 |
54 | # permute for sliced rotary
55 | def permute(w, n_heads=n_heads, dim1=dim, dim2=dim):
56 | return (
57 | w.view(n_heads, 2, dim1 // n_heads // 2, dim2)
58 | .transpose(1, 2)
59 | .reshape(dim1, dim2)
60 | )
61 |
62 | state_dict = [{} for _ in range(num_shards)]
63 |
64 | def insert(name: str, tensor: Union[List, torch.Tensor]):
65 | for i in range(num_shards):
66 | state_dict[i][name] = (
67 | tensor[i].clone() if isinstance(tensor, list) else tensor
68 | )
69 |
70 | def insert_chunk(name: str, tensor: torch.Tensor, dim: int):
71 | tensors = tensor.chunk(num_shards, dim=dim)
72 | for i, tensor in enumerate(tensors):
73 | state_dict[i][name] = tensor.clone()
74 |
75 | insert_chunk("tok_embeddings.weight", loaded["model.embed_tokens.weight"], 1)
76 | insert("norm.weight", loaded["model.norm.weight"])
77 | insert_chunk("output.weight", loaded["lm_head.weight"], 0)
78 |
79 | for layer_i in tqdm(range(n_layers), desc="Converting layers"):
80 |
81 | ts = (
82 | permute(loaded[f"model.layers.{layer_i}.self_attn.q_proj.weight"])
83 | .view(n_heads_per_shard * num_shards, dims_per_head, dim)
84 | .chunk(num_shards, dim=0)
85 | )
86 | insert(f"layers.{layer_i}.attention.wq.weight", [t.view(-1, dim) for t in ts])
87 |
88 | ts = (
89 | permute(
90 | loaded[f"model.layers.{layer_i}.self_attn.k_proj.weight"],
91 | num_key_value_heads,
92 | key_value_dim,
93 | dim,
94 | )
95 | .view(num_local_key_value_heads * num_shards, dims_per_head, dim)
96 | .chunk(num_shards, dim=0)
97 | )
98 | insert(f"layers.{layer_i}.attention.wk.weight", [t.view(-1, dim) for t in ts])
99 |
100 | ts = (
101 | loaded[f"model.layers.{layer_i}.self_attn.v_proj.weight"]
102 | .view(num_local_key_value_heads * num_shards, dims_per_head, dim)
103 | .chunk(num_shards, dim=0)
104 | )
105 | insert(f"layers.{layer_i}.attention.wv.weight", [t.view(-1, dim) for t in ts])
106 |
107 | insert_chunk(
108 | f"layers.{layer_i}.attention.wo.weight",
109 | loaded[f"model.layers.{layer_i}.self_attn.o_proj.weight"],
110 | 1,
111 | )
112 |
113 | insert_chunk(
114 | f"layers.{layer_i}.feed_forward.w1.weight",
115 | loaded[f"model.layers.{layer_i}.mlp.gate_proj.weight"],
116 | 0,
117 | )
118 |
119 | insert_chunk(
120 | f"layers.{layer_i}.feed_forward.w2.weight",
121 | loaded[f"model.layers.{layer_i}.mlp.down_proj.weight"],
122 | 1,
123 | )
124 |
125 | insert_chunk(
126 | f"layers.{layer_i}.feed_forward.w3.weight",
127 | loaded[f"model.layers.{layer_i}.mlp.up_proj.weight"],
128 | 0,
129 | )
130 |
131 | insert(
132 | f"layers.{layer_i}.attention_norm.weight",
133 | loaded[f"model.layers.{layer_i}.input_layernorm.weight"],
134 | )
135 | insert(
136 | f"layers.{layer_i}.ffn_norm.weight",
137 | loaded[f"model.layers.{layer_i}.post_attention_layernorm.weight"],
138 | )
139 | insert("rope.freqs", inv_freq)
140 |
141 | for i in tqdm(range(num_shards), desc="Saving checkpoint shards"):
142 | torch.save(
143 | state_dict[i], os.path.join(output_base_path, f"consolidated.{i:02d}.pth")
144 | )
145 |
146 |
147 | def main(
148 | model_path: str,
149 | model_size: str,
150 | output_dir: str,
151 | ):
152 | """Convert llama weights from huggingface format to consolidated format.
153 | params:
154 | model_path: model name or path to the model directory.
155 | model_size: Llama model size, one of 7B, 13B, 34B, 30B, 65B, 70B.
156 | output_dir: directory to save Llama weights, should contains params.json.
157 | """
158 | assert model_size in NUM_SHARDS, f"Unknown model size {model_size}"
159 | params_path = os.path.join(output_dir, "params.json")
160 | assert os.path.isfile(params_path), f"{params_path} does not exist"
161 |
162 | write_model(model_path, model_size, output_dir)
163 |
164 |
165 | if __name__ == "__main__":
166 | fire.Fire(main)
167 |
--------------------------------------------------------------------------------
/src/llama_recipes/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | from llama_recipes.utils.memory_utils import MemoryTrace
5 | from llama_recipes.utils.dataset_utils import *
6 | from llama_recipes.utils.fsdp_utils import fsdp_auto_wrap_policy, hsdp_device_mesh
7 | from llama_recipes.utils.train_utils import *
--------------------------------------------------------------------------------
/src/llama_recipes/utils/config_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import inspect
5 | from dataclasses import asdict
6 |
7 | import torch.distributed as dist
8 | from torch.utils.data import DistributedSampler
9 | from peft import (
10 | LoraConfig,
11 | AdaptionPromptConfig,
12 | PrefixTuningConfig,
13 | )
14 | from transformers import default_data_collator
15 | from transformers.data import DataCollatorForSeq2Seq
16 |
17 | from llama_recipes.configs import datasets, lora_config, llama_adapter_config, prefix_config, train_config
18 | from llama_recipes.data.sampler import LengthBasedBatchSampler, DistributedLengthBasedBatchSampler
19 | from llama_recipes.utils.dataset_utils import DATASET_PREPROC
20 |
21 |
22 | def update_config(config, **kwargs):
23 | if isinstance(config, (tuple, list)):
24 | for c in config:
25 | update_config(c, **kwargs)
26 | else:
27 | for k, v in kwargs.items():
28 | if hasattr(config, k):
29 | setattr(config, k, v)
30 | elif "." in k:
31 | # allow --some_config.some_param=True
32 | config_name, param_name = k.split(".")
33 | if type(config).__name__ == config_name:
34 | if hasattr(config, param_name):
35 | setattr(config, param_name, v)
36 | else:
37 | # In case of specialized config we can warm user
38 | print(f"Warning: {config_name} does not accept parameter: {k}")
39 | elif isinstance(config, train_config):
40 | print(f"Warning: unknown parameter {k}")
41 |
42 |
43 | def generate_peft_config(train_config, kwargs):
44 | configs = (lora_config, llama_adapter_config, prefix_config)
45 | peft_configs = (LoraConfig, AdaptionPromptConfig, PrefixTuningConfig)
46 | names = tuple(c.__name__.rstrip("_config") for c in configs)
47 |
48 | assert train_config.peft_method in names, f"Peft config not found: {train_config.peft_method}"
49 |
50 | config = configs[names.index(train_config.peft_method)]()
51 |
52 | update_config(config, **kwargs)
53 | params = asdict(config)
54 | peft_config = peft_configs[names.index(train_config.peft_method)](**params)
55 |
56 | return peft_config
57 |
58 |
59 | def generate_dataset_config(train_config, kwargs):
60 | names = tuple(DATASET_PREPROC.keys())
61 |
62 | assert train_config.dataset in names, f"Unknown dataset: {train_config.dataset}"
63 |
64 | dataset_config = {k:v for k, v in inspect.getmembers(datasets)}[train_config.dataset]()
65 |
66 | update_config(dataset_config, **kwargs)
67 | return dataset_config
68 |
69 |
70 | def get_dataloader_kwargs(train_config, dataset, tokenizer, mode):
71 | kwargs = {}
72 | batch_size = train_config.batch_size_training if mode=="train" else train_config.val_batch_size
73 | if train_config.batching_strategy == "padding":
74 | if train_config.enable_fsdp:
75 | kwargs["batch_sampler"] = DistributedLengthBasedBatchSampler(
76 | dataset,
77 | batch_size=batch_size,
78 | rank=dist.get_rank(),
79 | num_replicas=dist.get_world_size(),
80 | shuffle=mode=="train",
81 | )
82 | else:
83 | kwargs["batch_sampler"] = LengthBasedBatchSampler(dataset, batch_size, drop_last=True, shuffle=mode=="train")
84 | kwargs["collate_fn"] = DataCollatorForSeq2Seq(tokenizer)
85 | elif train_config.batching_strategy == "packing":
86 | if train_config.enable_fsdp:
87 | kwargs["sampler"] = DistributedSampler(
88 | dataset,
89 | rank=dist.get_rank(),
90 | num_replicas=dist.get_world_size(),
91 | shuffle=mode=="train",
92 | drop_last=True,
93 | )
94 | kwargs["batch_size"] = batch_size
95 | kwargs["drop_last"] = True
96 | kwargs["collate_fn"] = default_data_collator
97 | else:
98 | raise ValueError(f"Unknown batching strategy: {train_config.batching_strategy}")
99 |
100 | return kwargs
101 |
--------------------------------------------------------------------------------
/src/llama_recipes/utils/dataset_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import importlib
5 | from functools import partial
6 | from pathlib import Path
7 |
8 | import torch
9 |
10 | from llama_recipes.datasets import (
11 | get_grammar_dataset,
12 | get_alpaca_dataset,
13 | get_samsum_dataset,
14 | )
15 |
16 |
17 | def load_module_from_py_file(py_file: str) -> object:
18 | """
19 | This method loads a module from a py file which is not in the Python path
20 | """
21 | module_name = Path(py_file).name
22 | loader = importlib.machinery.SourceFileLoader(module_name, py_file)
23 | spec = importlib.util.spec_from_loader(module_name, loader)
24 | module = importlib.util.module_from_spec(spec)
25 |
26 | loader.exec_module(module)
27 |
28 | return module
29 |
30 |
31 | def get_custom_dataset(dataset_config, tokenizer, split: str):
32 | if ":" in dataset_config.file:
33 | module_path, func_name = dataset_config.file.split(":")
34 | else:
35 | module_path, func_name = dataset_config.file, "get_custom_dataset"
36 |
37 | if not module_path.endswith(".py"):
38 | raise ValueError(f"Dataset file {module_path} is not a .py file.")
39 |
40 | module_path = Path(module_path)
41 | if not module_path.is_file():
42 | raise FileNotFoundError(f"Dataset py file {module_path.as_posix()} does not exist or is not a file.")
43 |
44 | module = load_module_from_py_file(module_path.as_posix())
45 | try:
46 | return getattr(module, func_name)(dataset_config, tokenizer, split)
47 | except AttributeError as e:
48 | print(f"It seems like the given method name ({func_name}) is not present in the dataset .py file ({module_path.as_posix()}).")
49 | raise e
50 |
51 |
52 | DATASET_PREPROC = {
53 | "alpaca_dataset": partial(get_alpaca_dataset),
54 | "grammar_dataset": get_grammar_dataset,
55 | "samsum_dataset": get_samsum_dataset,
56 | "custom_dataset": get_custom_dataset,
57 | }
58 |
59 |
60 | def get_preprocessed_dataset(
61 | tokenizer, dataset_config, split: str = "train"
62 | ) -> torch.utils.data.Dataset:
63 | if not dataset_config.dataset in DATASET_PREPROC:
64 | raise NotImplementedError(f"{dataset_config.dataset} is not (yet) implemented")
65 |
66 | def get_split():
67 | return (
68 | dataset_config.train_split
69 | if split == "train"
70 | else dataset_config.test_split
71 | )
72 |
73 | return DATASET_PREPROC[dataset_config.dataset](
74 | dataset_config,
75 | tokenizer,
76 | get_split(),
77 | )
78 |
--------------------------------------------------------------------------------
/src/llama_recipes/utils/flop_utils.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Optional, Union
2 | import time
3 | import torch
4 | from torch.utils.flop_counter import FlopCounterMode
5 |
6 |
7 | class FlopMeasure(FlopCounterMode):
8 | """
9 | ``FlopMeasure`` is a customized context manager that counts the number of
10 | flops within its context. It is based on ``FlopCounterMode`` with additional start_counting() and stop_counting() function so that the flop counting
11 | will only start after the warmup stage.
12 | It also supports hierarchical output by passing a module (or list of modules) to FlopCounterMode on construction.
13 |
14 | Example usage
15 |
16 | .. code-block:: python
17 |
18 | model = ...
19 | flop_counter = FlopMeasure(model,local_rank=0,warmup_step=3)
20 | for batch in enumerate(dataloader):
21 | with flop_counter:
22 | model(batch)
23 | flop_counter.step()
24 | """
25 |
26 | def __init__(
27 | self,
28 | mods: Optional[Union[torch.nn.Module, List[torch.nn.Module]]] = None,
29 | depth: int = 2,
30 | display: bool = True,
31 | custom_mapping: Dict[Any, Any] = None,
32 | rank=None,
33 | warmup_step: int = 3,
34 | ):
35 | super().__init__(mods, depth, display, custom_mapping)
36 | self.rank = rank
37 | self.warmup_step = warmup_step
38 | self.start_time = 0
39 | self.end_time = 0
40 |
41 | def step(self):
42 | # decrease the warmup step by 1 for every step, so that the flop counting will start when warmup_step =0. Stop decreasing when warm_up reaches -1.
43 | if self.warmup_step >= 0:
44 | self.warmup_step -= 1
45 | if self.warmup_step == 0 and self.start_time == 0:
46 | self.start_time = time.time()
47 | elif self.warmup_step == -1 and self.start_time != 0 and self.end_time == 0:
48 | self.end_time = time.time()
49 | def __enter__(self):
50 | if self.warmup_step == 0:
51 | self.start_time = time.time()
52 | super().__enter__()
53 | return self
54 | def is_done(self):
55 | return self.warmup_step == -1
56 | def get_total_flops(self):
57 | return super().get_total_flops()
58 | def get_flops_per_sec(self):
59 | if self.start_time == 0 or self.end_time == 0:
60 | print("Warning: flop count did not finish correctly")
61 | return 0
62 | return super().get_total_flops()/ (self.end_time - self.start_time)
63 | def get_table(self, depth=2):
64 | return super().get_table(depth)
65 |
66 | def __exit__(self, *args):
67 | if self.get_total_flops() == 0:
68 | print(
69 | "Warning: did not record any flops this time. Skipping the flop report"
70 | )
71 | else:
72 | if self.display:
73 | if self.rank is None or self.rank == 0:
74 | print("Total time used in this flop counting step is: {}".format(self.end_time - self.start_time))
75 | print("The total TFlop per second is: {}".format(self.get_flops_per_sec() / 1e12))
76 | print("The tflop_count table is below:")
77 | print(self.get_table(self.depth))
78 | # Disable the display feature so that we don't print the table again
79 | self.display = False
80 | super().__exit__(*args)
81 |
82 | def __torch_dispatch__(self, func, types, args=(), kwargs=None):
83 | # when warmup_step is 0, count the flops and return the original output
84 | if self.warmup_step == 0:
85 | return super().__torch_dispatch__(func, types, args, kwargs)
86 | # otherwise, just return the original output
87 | return func(*args, **kwargs)
88 |
--------------------------------------------------------------------------------
/src/llama_recipes/utils/fsdp_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 | from torch.distributed._tensor.device_mesh import init_device_mesh
4 | import os
5 |
6 | def fsdp_auto_wrap_policy(model, transformer_layer_name):
7 | import functools
8 |
9 | from torch.distributed.fsdp.wrap import _or_policy, lambda_auto_wrap_policy, transformer_auto_wrap_policy
10 |
11 | from peft.tuners import PrefixEncoder, PromptEmbedding, PromptEncoder
12 |
13 | def lambda_policy_fn(module):
14 | if (
15 | len(list(module.named_children())) == 0
16 | and getattr(module, "weight", None) is not None
17 | and module.weight.requires_grad
18 | ):
19 | return True
20 | return False
21 |
22 | lambda_policy = functools.partial(lambda_auto_wrap_policy, lambda_fn=lambda_policy_fn)
23 | transformer_wrap_policy = functools.partial(
24 | transformer_auto_wrap_policy,
25 | transformer_layer_cls=(
26 | PrefixEncoder,
27 | PromptEncoder,
28 | PromptEmbedding,
29 | transformer_layer_name,
30 | # FullyShardedDataParallelPlugin.get_module_class_from_name(
31 | # model, transformer_layer_name
32 | # ),
33 | ),
34 | )
35 |
36 | auto_wrap_policy = functools.partial(_or_policy, policies=[lambda_policy, transformer_wrap_policy])
37 | return auto_wrap_policy
38 |
39 |
40 | def hsdp_device_mesh(replica_group_size, sharding_group_size, device=None):
41 | """
42 | Initializes a device mesh for use with Hybrid Sharding strategy in FSDP (HSDP) training.
43 |
44 | This function requires explicit sizes for replica and sharding groups to accommodate models
45 | whose GPU fit is unknown, providing flexibility in distributed training setups.
46 |
47 | Args:
48 | replica_group_size (int): The size of each replica group. Must be provided to ensure
49 | the model fits within the available resources.
50 | sharding_group_size (int): The size of each sharding group that the model can fit. Must be provided to
51 | ensure the correct distribution of model parameters.
52 | device (str, optional): The device to use (e.g., "cuda:0"). If None, defaults to "cuda"
53 | with the local rank as the device index.
54 |
55 | Returns:
56 | A device mesh object compatible with FSDP.
57 |
58 | Raises:
59 | ValueError: If replica_group_size or sharding_group_size are not provided, or if the
60 | world size is not evenly divisible by the sharding group size.
61 | RuntimeError: If a valid device mesh cannot be created.
62 |
63 | Usage:
64 | If your model fits on 4 GPUS, and you have 3 nodes of 8 GPUs, then:
65 | Sharding_Group_Size = 4
66 | Replica_Groups_Size = (24 total gpus, 4 per sharding group) = 6 Replica Groups
67 | >>> device_mesh = initialize_device_mesh(replica_group_size, sharding_group_size)
68 | >>> sharded_model = FSDP(model, device_mesh=device_mesh, ...)
69 | """
70 |
71 | if replica_group_size is None or sharding_group_size is None:
72 | raise ValueError("Both replica_group_size and sharding_group_size must be provided.")
73 |
74 | local_rank = int(os.getenv("LOCAL_RANK", "0"))
75 | world_size = int(os.getenv("WORLD_SIZE", "1"))
76 |
77 | device = device or f"cuda"
78 |
79 | if world_size % sharding_group_size != 0:
80 | raise ValueError(f"World size {world_size} is not evenly divisible by "
81 | f"sharding group size {sharding_group_size}.")
82 |
83 | if (world_size // sharding_group_size) % replica_group_size != 0:
84 | raise ValueError(f"The calculated number of replica groups is not evenly divisible by "
85 | f"replica_group_size {replica_group_size}.")
86 |
87 | device_mesh = init_device_mesh(device, (replica_group_size, sharding_group_size))
88 | if device_mesh is None:
89 | raise RuntimeError("Failed to create a valid device mesh.")
90 |
91 | return device_mesh
92 |
--------------------------------------------------------------------------------
/src/llama_recipes/utils/hf_llama_conversion/README.md:
--------------------------------------------------------------------------------
1 | # Convert Hugging Face llama weights to official llama consolidated format
2 |
3 | This is the reverse conversion for `convert_llama_weights_to_hf.py` script from the transformer package.
4 |
5 | ## Step 0: Convert to consolidated format
6 | - Create an output directory for the converted weights, such as `test70B`.
7 | - Copy file params.json from the official llama download into that directory.
8 | - Run the conversion script. `model-path` can be a Hugging Face hub model or a local hf model directory.
9 | ```
10 | python -m llama_recipes.tools.convert_hf_weights_to_llama --model-path meta-llama/Llama-2-70b-chat-hf --output-dir test70B --model-size 70B
11 | ```
12 |
13 | ## Step 1: Run inference
14 | Checkout the official llama inference [repo](https://github.com/facebookresearch/llama). Test using chat or text completion.
15 | ```
16 | torchrun --nproc_per_node 8 example_chat_completion.py --ckpt_dir ./test70B --tokenizer_path ${llama_2_dir}/tokenizer.model
17 | ```
18 |
19 | For validation, please compare the converted weights with official llama 2 weights
20 | ```
21 | python compare_llama_weights.py test70B ${llama_2_70b_chat_dir}
22 | ```
23 |
--------------------------------------------------------------------------------
/src/llama_recipes/utils/hf_llama_conversion/compare_llama_weights.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import gc
5 | import glob
6 | import os
7 | import sys
8 |
9 | import torch
10 | import tqdm
11 |
12 |
13 | def main() -> None:
14 | """Compare two llama checkpoint directories"""
15 |
16 | one_files = sorted(glob.glob(os.path.join(sys.argv[1], "consolidated.*.pth")))
17 | two_files = sorted(glob.glob(os.path.join(sys.argv[2], "consolidated.*.pth")))
18 | assert len(one_files) == len(
19 | two_files
20 | ), "One directory has {} files while another has {} files.".format(
21 | len(one_files), len(two_files)
22 | )
23 |
24 | deltas = []
25 | for i in tqdm.trange(len(one_files), desc="Comparing shards"):
26 | one = torch.load(one_files[i])
27 | two = torch.load(two_files[i])
28 | assert len(one) == len(
29 | two
30 | ), "shard should have the same length: {} != {}".format(len(one), len(two))
31 |
32 | for _, (v, w) in enumerate(zip(one.items(), two.items())):
33 | assert v[0] == w[0], "{} != {}".format(v[0], w[0])
34 | assert v[1].shape == w[1].shape, "tensor {} shape {} != {}".format(
35 | v[0], v[1].shape, w[1].shape
36 | )
37 |
38 | delta = (v[1] - w[1]).abs().max().item()
39 | deltas.append((i, v[0], delta))
40 | del one
41 | del two
42 | gc.collect()
43 |
44 | deltas = sorted(deltas, key=lambda x: x[-1], reverse=True)
45 | print("Top 10 largest deltas:")
46 | for i, k, v in deltas[:10]:
47 | print(f" shard {i} {k}: {v}")
48 |
49 |
50 | if __name__ == "__main__":
51 | main()
52 |
--------------------------------------------------------------------------------
/src/llama_recipes/utils/memory_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import gc
5 | import psutil
6 | import threading
7 |
8 | import torch
9 | from accelerate.utils import is_xpu_available
10 |
11 | def byte2gb(x):
12 | return int(x / 2**30)
13 | # This context manager is used to track the peak memory usage of the process
14 | class MemoryTrace:
15 | def __enter__(self):
16 | gc.collect()
17 | if is_xpu_available():
18 | torch.xpu.empty_cache()
19 | torch.xpu.reset_max_memory_allocated() # reset the peak gauge to zero
20 | self.begin = byte2gb(torch.xpu.memory_allocated())
21 | elif torch.cuda.is_available():
22 | torch.cuda.empty_cache()
23 | torch.cuda.reset_max_memory_allocated() # reset the peak gauge to zero
24 | self.begin = byte2gb(torch.cuda.memory_allocated())
25 | self.process = psutil.Process()
26 | self.cpu_begin = byte2gb(self.cpu_mem_used())
27 | self.peak_monitoring = True
28 | peak_monitor_thread = threading.Thread(target=self.peak_monitor_func)
29 | peak_monitor_thread.daemon = True
30 | peak_monitor_thread.start()
31 | return self
32 |
33 | def cpu_mem_used(self):
34 | """get resident set size memory for the current process"""
35 | return self.process.memory_info().rss
36 |
37 | def peak_monitor_func(self):
38 | self.cpu_peak = -1
39 |
40 | while True:
41 | self.cpu_peak = max(self.cpu_mem_used(), self.cpu_peak)
42 |
43 | # can't sleep or will not catch the peak right (this comment is here on purpose)
44 | # time.sleep(0.001) # 1msec
45 |
46 | if not self.peak_monitoring:
47 | break
48 |
49 | def __exit__(self, *exc):
50 | self.peak_monitoring = False
51 |
52 | gc.collect()
53 | if is_xpu_available():
54 | torch.xpu.empty_cache()
55 | self.end = byte2gb(torch.xpu.memory_allocated())
56 | self.peak = byte2gb(torch.xpu.max_memory_allocated())
57 | xpu_info = torch.xpu.memory_stats()
58 | self.peak_active_gb = byte2gb(xpu_info["active_bytes.all.peak"])
59 | self.malloc_retries = xpu_info.get("num_alloc_retries", 0)
60 | self.peak_active_gb = byte2gb(xpu_info["active_bytes.all.peak"])
61 | self.m_ooms = xpu_info.get("num_ooms", 0)
62 | self.used = byte2gb(self.end - self.begin)
63 | self.peaked = byte2gb(self.peak - self.begin)
64 | self.max_reserved = byte2gb(torch.xpu.max_memory_reserved())
65 | elif torch.cuda.is_available():
66 | torch.cuda.empty_cache()
67 | self.end = byte2gb(torch.cuda.memory_allocated())
68 | self.peak = byte2gb(torch.cuda.max_memory_allocated())
69 | cuda_info = torch.cuda.memory_stats()
70 | self.peak_active_gb = byte2gb(cuda_info["active_bytes.all.peak"])
71 | self.malloc_retries = cuda_info.get("num_alloc_retries", 0)
72 | self.peak_active_gb = byte2gb(cuda_info["active_bytes.all.peak"])
73 | self.m_ooms = cuda_info.get("num_ooms", 0)
74 | self.used = byte2gb(self.end - self.begin)
75 | self.peaked = byte2gb(self.peak - self.begin)
76 | self.max_reserved = byte2gb(torch.cuda.max_memory_reserved())
77 |
78 | self.cpu_end = self.cpu_mem_used()
79 | self.cpu_used = byte2gb(self.cpu_end - self.cpu_begin)
80 | self.cpu_peaked = byte2gb(self.cpu_peak - self.cpu_begin)
81 | # print(f"delta used/peak {self.used:4d}/{self.peaked:4d}")
82 |
83 | def print_stats(self):
84 | device_str = None
85 | if is_xpu_available():
86 | device_str = "XPU"
87 | elif torch.cuda.is_available():
88 | device_str = "CUDA"
89 |
90 | if device_str:
91 | print(f"Max {device_str} memory allocated was {self.peak} GB")
92 | print(f"Max {device_str} memory reserved was {self.max_reserved} GB")
93 | print(f"Peak active {device_str} memory was {self.peak_active_gb} GB")
94 | print(f"{device_str} Malloc retries : {self.malloc_retries}")
95 | print(f"CPU Total Peak Memory consumed during the train (max): {self.cpu_peaked + self.cpu_begin} GB")
--------------------------------------------------------------------------------
/src/llama_recipes/utils/plot_metrics.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3 |
4 | import json
5 | import matplotlib.pyplot as plt
6 | import argparse
7 | import os
8 |
9 | def plot_metric(data, metric_name, x_label, y_label, title, colors):
10 | plt.figure(figsize=(7, 6))
11 |
12 | plt.plot(data[f'train_epoch_{metric_name}'], label=f'Train Epoch {metric_name.capitalize()}', color=colors[0])
13 | plt.plot(data[f'val_epoch_{metric_name}'], label=f'Validation Epoch {metric_name.capitalize()}', color=colors[1])
14 | plt.xlabel(x_label)
15 | plt.ylabel(y_label)
16 | plt.title(f'Train and Validation Epoch {title}')
17 | plt.legend()
18 | plt.tight_layout()
19 |
20 | def plot_single_metric_by_step(data, metric_name, x_label, y_label, title, color):
21 | plt.plot(data[f'{metric_name}'], label=f'{title}', color=color)
22 | plt.xlabel(x_label)
23 | plt.ylabel(y_label)
24 | plt.title(title)
25 | plt.legend()
26 | plt.tight_layout()
27 |
28 | def plot_metrics_by_step(data, metric_name, x_label, y_label, colors):
29 | plt.figure(figsize=(14, 6))
30 |
31 | plt.subplot(1, 2, 1)
32 | plot_single_metric_by_step(data, f'train_step_{metric_name}', x_label, y_label, f'Train Step {metric_name.capitalize()}', colors[0])
33 | plt.subplot(1, 2, 2)
34 | plot_single_metric_by_step(data, f'val_step_{metric_name}', x_label, y_label, f'Validation Step {metric_name.capitalize()}', colors[1])
35 | plt.tight_layout()
36 |
37 |
38 | def plot_metrics(file_path):
39 | if not os.path.exists(file_path):
40 | print(f"File {file_path} does not exist.")
41 | return
42 |
43 | with open(file_path, 'r') as f:
44 | try:
45 | data = json.load(f)
46 | except json.JSONDecodeError:
47 | print("Invalid JSON file.")
48 | return
49 |
50 | directory = os.path.dirname(file_path)
51 | filename_prefix = os.path.basename(file_path).split('.')[0]
52 |
53 | plot_metric(data, 'loss', 'Epoch', 'Loss', 'Loss', ['b', 'r'])
54 | plt.savefig(os.path.join(directory, f"{filename_prefix}_train_and_validation_loss.png"))
55 | plt.close()
56 |
57 | plot_metric(data, 'perplexity', 'Epoch', 'Perplexity', 'Perplexity', ['g', 'm'])
58 | plt.savefig(os.path.join(directory, f"{filename_prefix}_train_and_validation_perplexity.png"))
59 | plt.close()
60 |
61 | plot_metrics_by_step(data, 'loss', 'Step', 'Loss', ['b', 'r'])
62 | plt.savefig(os.path.join(directory, f"{filename_prefix}_train_and_validation_loss_by_step.png"))
63 | plt.close()
64 |
65 | plot_metrics_by_step(data, 'perplexity', 'Step', 'Loss', ['g', 'm'])
66 | plt.savefig(os.path.join(directory, f"{filename_prefix}_train_and_validation_perplexity_by_step.png"))
67 | plt.close()
68 |
69 | if __name__ == "__main__":
70 | parser = argparse.ArgumentParser(description='Plot metrics from JSON file.')
71 | parser.add_argument('--file_path', required=True, type=str, help='Path to the metrics JSON file.')
72 | args = parser.parse_args()
73 |
74 | plot_metrics(args.file_path)
75 |
--------------------------------------------------------------------------------
/src/metrics/file_consistency.py:
--------------------------------------------------------------------------------
1 | def metric_json_file_consistency(floorplan):
2 | return floorplan.validate_normal
3 |
4 | def metric_json_strict_file_consistency(floorplan):
5 | return floorplan.validate_strict
--------------------------------------------------------------------------------
/src/metrics/prompt_consistency.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy
2 | def _compute_recall_precision(TP, FP, FN):
3 | if len(TP) + len(FN) == 0:
4 | return None
5 | precision = len(TP) / (len(TP) + len(FP)) if len(TP) + len(FP) > 0 else 1.0
6 | recall = len(TP) / (len(TP) + len(FN))
7 | return precision, recall
8 |
9 | def _compute_TP_FP_FN(predicted_set, real_set):
10 | TP = predicted_set & real_set
11 | FP = predicted_set - real_set
12 | FN = real_set - predicted_set
13 | return TP, FP, FN
14 |
15 | def _compute_TP_FP_FN_lists(predicted_L, real_L):
16 | TP, FP, FN = [], [], []
17 | real_L = deepcopy(real_L)
18 | for i, pred in enumerate(predicted_L):
19 | if pred in real_L:
20 | TP.append(pred)
21 | real_L.remove(pred)
22 | else:
23 | FP.append(pred)
24 | FN = real_L
25 | return TP, FP, FN
26 |
27 | def metric_num_room_prompt_consistency(floorplan, prompt_floorplan):
28 | try:
29 | prompt_room_count = prompt_floorplan.get_room_count()
30 | except KeyError:
31 | return None
32 | try:
33 | floorplan_room_count = floorplan.get_room_count()
34 | return abs(floorplan_room_count - prompt_room_count)/prompt_room_count
35 | except KeyError:
36 | return 1.0
37 |
38 | def metric_room_id_prompt_consistency(floorplan, prompt_floorplan):
39 | floorplan_room_ids = floorplan.get_unmodified_room_ids()
40 | prompt_room_ids = prompt_floorplan.get_unmodified_room_ids()
41 | TP, FP, FN = _compute_TP_FP_FN(floorplan_room_ids, prompt_room_ids)
42 | return _compute_recall_precision(TP, FP, FN)
43 |
44 | def metric_room_area_prompt_consistency(floorplan, prompt_floorplan):
45 | floorplan_room_ids = floorplan.get_unmodified_room_ids()
46 | prompt_room_ids = prompt_floorplan.get_unmodified_room_ids()
47 |
48 | buff = []
49 | for room_id in floorplan_room_ids & prompt_room_ids:
50 | try:
51 | floorplan_room_area = floorplan.get_room_polygon_area(room_id)[0]
52 | prompt_room_area = prompt_floorplan.get_room_area(room_id)
53 | buff.append(abs(floorplan_room_area - prompt_room_area) / prompt_room_area)
54 | except:
55 | pass
56 | return sum(buff) / len(buff) if len(buff)>0 else None
57 |
58 | def metric_polygon_area_sum_vs_total_area_prompt_consistency(floorplan, prompt_floorplan):
59 |
60 | try:
61 | prompt_total_area = prompt_floorplan.get_total_area()
62 | except KeyError:
63 | return None
64 |
65 | floorplan_room_ids = floorplan.get_unmodified_room_ids()
66 | polygon_total_area = 0.0
67 | for room_id in floorplan_room_ids:
68 | try:
69 | polygon_total_area += floorplan.get_room_polygon_area(room_id)[0]
70 | except:
71 | pass
72 |
73 | return abs(polygon_total_area - prompt_total_area) / prompt_total_area
74 |
75 | def metric_room_type_prompt_consistency(floorplan, prompt_floorplan):
76 |
77 | floorplan_room_types = floorplan.get_room_types()
78 | prompt_room_types = prompt_floorplan.get_room_types()
79 |
80 | TP, FP, FN = _compute_TP_FP_FN_lists(floorplan_room_types, prompt_room_types)
81 | return _compute_recall_precision(TP, FP, FN)
82 |
83 | def metric_room_id_type_match_prompt_consistency(floorplan, prompt_floorplan):
84 | floorplan_room_ids = floorplan.get_unmodified_room_ids()
85 | prompt_room_ids = prompt_floorplan.get_unmodified_room_ids()
86 |
87 | buff, numel = 0, 0
88 | for room_id in floorplan_room_ids & prompt_room_ids:
89 | try:
90 | prompt_room_type = prompt_floorplan.get_room_type(room_id)
91 | except KeyError:
92 | continue
93 | numel += 1
94 | try:
95 | floorplan_room_type = floorplan.get_room_type(room_id)
96 | except KeyError:
97 | continue
98 | buff += 1 if floorplan_room_type == prompt_room_type else 0
99 | return buff / numel if numel > 0 else None
100 |
101 | def metric_room_height_prompt_consistency(floorplan, prompt_floorplan):
102 | floorplan_room_ids = floorplan.get_unmodified_room_ids()
103 | prompt_room_ids = prompt_floorplan.get_unmodified_room_ids()
104 |
105 | buff = []
106 | for room_id in floorplan_room_ids & prompt_room_ids:
107 | try:
108 | prompt_height = prompt_floorplan.get_room_height(room_id)
109 | polygon_height = floorplan.get_room_polygon(room_id).height
110 | buff.append(abs(polygon_height - prompt_height) / prompt_height)
111 | except:
112 | pass
113 | return sum(buff) / len(buff) if buff else None
114 |
115 | def metric_room_width_prompt_consistency(floorplan, prompt_floorplan):
116 | floorplan_room_ids = floorplan.get_unmodified_room_ids()
117 | prompt_room_ids = prompt_floorplan.get_unmodified_room_ids()
118 |
119 | buff = []
120 | for room_id in floorplan_room_ids & prompt_room_ids:
121 | try:
122 | prompt_width = prompt_floorplan.get_room_width(room_id)
123 | polygon_width = floorplan.get_room_polygon(room_id).width
124 | buff.append(abs(polygon_width - prompt_width) / prompt_width)
125 | except:
126 | pass
127 | return sum(buff) / len(buff) if buff else None
--------------------------------------------------------------------------------
/src/metrics/self_consistency.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | def metric_room_count_self_consistency(floorplan):
3 | try:
4 | return floorplan.get_room_count() == floorplan.get_num_rooms()
5 | except:
6 | return None
7 |
8 | def metric_room_id_self_consistency(floorplan):
9 | return len(floorplan.get_room_ids()) == floorplan.get_num_rooms()
10 |
11 | def metric_total_area_self_consistency(floorplan):
12 | room_ids = floorplan.get_room_ids()
13 | try:
14 | total_area = floorplan.get_total_area()
15 | except:
16 | return None
17 | area_diff = total_area
18 | for room_id in room_ids:
19 | try:
20 | area_diff -= floorplan.get_room_area(room_id)
21 | except:
22 | try:
23 | area_diff -= floorplan.get_room_polygon_area(room_id)[0]
24 | except:
25 | pass
26 | return abs(area_diff) / total_area
27 |
28 | def metric_polygon_area_self_consistency(floorplan):
29 | room_ids = floorplan.get_room_ids()
30 | area_scores = []
31 | if_align_score = 0 # if area computed with sorted vertices is the same as the area computed with unsorted vertices
32 | num_valid_rooms = 0
33 | for room_id in room_ids:
34 | try:
35 | computed_area, if_align = floorplan.get_room_polygon_area(room_id)
36 | if_align_score += 1 if if_align else 0
37 | stated_area = floorplan.get_room_area(room_id)
38 | area_scores.append(abs(computed_area - stated_area) / stated_area)
39 | num_valid_rooms += 1
40 | except:
41 | pass
42 |
43 | return (sum(area_scores)/num_valid_rooms, if_align_score / num_valid_rooms) if num_valid_rooms>0 else None
44 |
45 |
46 | def metric_polygon_overlap_count_self_consistency(floorplan):
47 | return floorplan.count_room_overlaps() > 0
48 |
49 | def metric_polygon_containment_count_self_consistency(floorplan):
50 | raise NotImplementedError("Not implemented yet")
51 |
52 | def metric_room_height_self_consistency(floorplan):
53 |
54 | room_ids = set(floorplan.get_room_ids())
55 |
56 | height_scores = []
57 | for room_id in room_ids:
58 | try:
59 | stated_height = floorplan.get_room_height(room_id)
60 | polygon_height = floorplan.get_room_polygon(room_id).height
61 | height_scores.append(abs(polygon_height - stated_height) / stated_height)
62 | except:
63 | pass
64 | return sum(height_scores)/len(height_scores) if height_scores else None
65 |
66 | def metric_room_width_self_consistency(floorplan):
67 | room_ids = set(floorplan.get_room_ids())
68 |
69 | width_scores = []
70 | for room_id in room_ids:
71 | try:
72 | stated_width = floorplan.get_room_width(room_id)
73 | polygon_width = floorplan.get_room_polygon(room_id).width
74 | width_scores.append(abs(polygon_width - stated_width) / stated_width)
75 | except:
76 | pass
77 | return sum(width_scores)/len(width_scores) if width_scores else None
--------------------------------------------------------------------------------
/src/pred/extract_output_json.py:
--------------------------------------------------------------------------------
1 | import json
2 | from ..utils import repair_json
3 |
4 | def extract_output_json(input_str: str):
5 | try:
6 | output_index = input_str.find("Output:")
7 | if output_index == -1:
8 | output_index = input_str.find("assistant") + len("assistant")
9 | else:
10 | output_index + len("Output:\n")
11 | if output_index == -1:
12 | return None
13 |
14 | output_str = input_str[output_index:]
15 | output_dict = json.loads(output_str)
16 |
17 | return output_dict
18 | except json.JSONDecodeError:
19 | try:
20 | json_repaired = repair_json(output_str, return_objects=True)
21 | if json_repaired != "":
22 | return json_repaired
23 | else:
24 | return {}
25 | except Exception:
26 | return {}
27 |
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .process_dataset import Floorplan, FloorplansAndPrompt
2 | from .eval_sample import FloorplansAndPromptEvaluation
3 | from .json_repair import *
4 | from .polygon_object import Polygon
5 | from .eval_overall import Evaluate
6 | from .util import natural_sort_key, list_folders, list_json_files
7 | from .plot import get_df_from_summary, plot_radar_from_df, plot_categories_sanity_check, \
8 | get_df_from_summary_separated_by_num_rooms, plot_3d_from_df
9 |
10 |
11 | __all__ = ['Floorplan', 'FloorplansAndPrompt', 'FloorplansAndPromptEvaluation', 'Polygon', 'Evaluate',
12 | 'natural_sort_key', 'list_folders', 'list_json_files',
13 | 'repair_json', 'json_loads', 'json_load', 'json_from_file','plot_radar_from_df',
14 | 'plot_categories_sanity_check', 'get_df_from_summary', 'get_df_from_summary_separated_by_num_rooms',
15 | 'plot_3d_from_df']
--------------------------------------------------------------------------------
/src/utils/bubble_graph.py:
--------------------------------------------------------------------------------
1 | import json_repair
2 | import numpy as np
3 |
4 | def extract_polygon(json_str=None, json_dict=None):
5 | room_info = extract_room_info(json_str,json_dict)
6 | return [room['floor_polygon'] for room in room_info]
7 |
8 | def extract_room_info(json_str=None, json_dict=None):
9 | '''
10 | extract polygon, room type and room id when they exist
11 | '''
12 | rooms_info = []
13 | if json_dict is None:
14 | json_dict = json_repair.loads(json_str)
15 | if 'rooms' not in json_dict.keys():
16 | return None
17 | for room in json_dict['rooms']:
18 | room_d = {}
19 | if 'floor_polygon' in room.keys():
20 | vertices = room['floor_polygon']
21 | polygon = []
22 | for vertix in vertices:
23 | if 'x' in vertix.keys() and 'z' in vertix.keys():
24 | polygon.append([vertix['x'],vertix['z']])
25 | room_d['floor_polygon'] = polygon
26 | if 'room_type' in room.keys():
27 | room_d['room_type'] = room['room_type']
28 | if 'id' in room.keys():
29 | room_d['id'] = room['id']
30 | rooms_info.append(room_d)
31 | return rooms_info
32 |
33 | def polygon2bbox(polygon):
34 | x_max, x_min, y_max, y_min = 0, np.inf, 0, np.inf
35 | for x,y in polygon:
36 | x_max = max(x_max,x)
37 | x_min = min(x_min,x)
38 | y_max = max(y_max,y)
39 | y_min = min(y_min,y)
40 | return (x_min, y_min, x_max, y_max)
41 |
42 | def bboxes2bubble(bboxes, th=9):
43 | '''
44 | bboxes: list of xyxy definitions for each room
45 | '''
46 | edges = []
47 | for u in range(len(bboxes)):
48 | for v in range(u+1,len(bboxes)):
49 | if not collide2d(bboxes[u][:4],bboxes[v][:4],th=th): continue
50 | # uy0, ux0, uy1, ux1 = bboxes[u][:4]
51 | # vy0, vx0, vy1, vx1 = bboxes[v][:4]
52 | # uc = (uy0+uy1)/2,(ux0+ux1)/2
53 | # vc = (vy0+vy1)/2,(vx0+vx1)/2
54 | # if ux0 < vx0 and ux1 > vx1 and uy0 < vy0 and uy1 > vy1:
55 | # relation = 5 #'surrounding'
56 | # elif ux0 >= vx0 and ux1 <= vx1 and uy0 >= vy0 and uy1 <= vy1:
57 | # relation = 4 #'inside'
58 | # else:
59 | # relation = point_box_relation(uc,bboxes[v,:4])
60 | # edges.append([u,v,relation])
61 | edges.append([u,v])
62 |
63 | edges = np.array(edges,dtype=int)
64 | return edges
65 |
66 | def collide2d(bbox1, bbox2, th=0):
67 | return not(
68 | (bbox1[0]-th > bbox2[2]) or
69 | (bbox1[2]+th < bbox2[0]) or
70 | (bbox1[1]-th > bbox2[3]) or
71 | (bbox1[3]+th < bbox2[1])
72 | )
73 |
74 |
75 | def get_edit_distance(g1,g2,g1_dict,g2_dict):
76 | '''
77 | g1: graph 1 -- defined by pairs of connected nodes
78 | g2: graph 2
79 | g1_dict: dictionary containing info on nodes of g1
80 | g1_dict['node2room'] = list of room names where idx is room idx
81 | g1_dict['node2id'] = list of room idx to 'id'
82 | '''
83 | pass
84 |
85 | def procthor2bubble(version=7):
86 | from datasets import load_from_disk
87 | from datasets import Dataset, DatasetDict
88 | ds_path = f'/network/scratch/l/luozhiha/datasets/procthor_data:v{version}'
89 | dataset = load_from_disk(ds_path)
90 | modified_data = {}
91 | for split in ['train','validation','test']:
92 | modified_split = []
93 | dset = dataset[split]
94 | for idx, data in enumerate(dset):
95 | print(f'{split}: {idx}')
96 | room_info = extract_room_info(json_dict = data)
97 | polygons = [room['floor_polygon'] for room in room_info]
98 | bboxes = [polygon2bbox(pg) for pg in polygons]
99 | edges = bboxes2bubble(bboxes,th=2)
100 | data['edges'] = edges.tolist()
101 | modified_split.append(data)
102 | modified_data[split] = Dataset.from_list(modified_split)
103 | modified_data = DatasetDict(modified_data)
104 | version = 8
105 | ds_path = f'/network/scratch/l/luozhiha/datasets/procthor_data:v{version}'
106 | modified_data.save_to_disk(ds_path)
107 | import pdb; pdb.set_trace()
108 |
109 | if __name__ == '__main__':
110 | procthor2bubble(version=7)
--------------------------------------------------------------------------------
/src/utils/fp_plot/__init__.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from . import procthorpy
3 | # import rplanpy
4 |
5 | # def plot_rplan(file: str, out_file: str = 'output_graph.png', plot_graph: bool = False) -> None:
6 | # data = rplanpy.data.RplanData(file)
7 | # ncols = 2 if plot_graph else 1
8 | # _fig, ax = plt.subplots(nrows=1, ncols=ncols, figsize=(ncols*5, 5))
9 |
10 | # if plot_graph:
11 | # rplanpy.plot.plot_floorplan(data, ax=ax[0], title="Rooms")
12 | # rplanpy.plot.plot_floorplan_graph(
13 | # data=data, with_colors=True, edge_label='door', ax=ax[1],
14 | # title="Bubble graph"
15 | # )
16 | # else:
17 | # rplanpy.plot.plot_floorplan(data, ax=ax, title="Rooms")
18 |
19 | # plt.tight_layout()
20 | # plt.savefig(out_file)
21 | # plt.show()
22 |
23 | def plot_procthor(data, out_file: str = 'output_procthor.png') -> None:
24 | data = data["rooms"]
25 | ncols = 1
26 | _fig, ax = plt.subplots(nrows=1, ncols=ncols, figsize=(ncols*5, 5))
27 |
28 | procthorpy.plot.plot_floorplan(data, ax=ax, title=None, label_rooms=False)
29 |
30 | plt.tight_layout()
31 | plt.savefig(out_file,bbox_inches='tight', transparent=True)
32 | plt.clf()
33 | plt.close()
34 | # plt.show()
--------------------------------------------------------------------------------
/src/utils/fp_plot/procthorpy/__init__.py:
--------------------------------------------------------------------------------
1 | from . import utils
2 | from . import plot
3 |
--------------------------------------------------------------------------------
/src/utils/fp_plot/procthorpy/plot.py:
--------------------------------------------------------------------------------
1 | from matplotlib import patches
2 | import matplotlib.pyplot as plt
3 | import networkx as nx
4 | import numpy as np
5 | from .utils import ROOM_COLOR, ROOM_TYPE
6 |
7 |
8 | def floorplan_to_color(data):
9 | room_colors = []
10 | for room in data:
11 | room_type_key = next(key for key, value in ROOM_TYPE.items() if value == room['room_type'])
12 | color = ROOM_COLOR[room_type_key]
13 | room_colors.append((room['floor_polygon'], color, room['room_type']))
14 | return room_colors
15 |
16 |
17 | def plot_floorplan(data, ax=None, title=None, wall_thickness=0.4, label_rooms=False):
18 | room_colors = floorplan_to_color(data)
19 |
20 | for polygon, color, room_type in room_colors:
21 | polygon_points = [(point['x'], point['z']) for point in polygon]
22 | color_normalized = [c / 255.0 for c in color]
23 |
24 | # Draw room
25 | polygon_shape = patches.Polygon(polygon_points, closed=True, edgecolor='black', facecolor=color_normalized, linewidth=2)
26 | ax.add_patch(polygon_shape)
27 |
28 | # # Draw walls
29 | for i in range(len(polygon_points)):
30 | start_point = polygon_points[i]
31 | end_point = polygon_points[(i + 1) % len(polygon_points)]
32 | ax.plot([start_point[0], end_point[0]], [start_point[1], end_point[1]], color=[c / 255.0 for c in ROOM_COLOR[14]], linewidth=wall_thickness * 10)
33 |
34 | # Room label
35 | if label_rooms:
36 | centroid = np.mean(polygon_points, axis=0)
37 | ax.text(centroid[0], centroid[1], room_type, ha='center', va='center', fontsize=6, weight='bold', color='black')
38 |
39 | ax.set_xlim(-1, max(p['x'] for room in data for p in room['floor_polygon']) + 1)
40 | ax.set_ylim(-1, max(p['z'] for room in data for p in room['floor_polygon']) + 1)
41 | ax.set_aspect('equal', adjustable='box')
42 | ax.axis('off')
43 |
44 | if title:
45 | ax.set_title(title)
46 |
47 | return ax
48 |
--------------------------------------------------------------------------------
/src/utils/fp_plot/procthorpy/utils.py:
--------------------------------------------------------------------------------
1 | ROOM_TYPE = {
2 | 0: "LivingRoom",
3 | 1: "Bedroom",
4 | 2: "Kitchen",
5 | 3: "Bathroom",
6 | 4: 'MasterRoom',
7 | 5: 'DiningRoom',
8 | 6: 'ChildRoom',
9 | 7: 'StudyRoom',
10 | 8: 'SecondRoom',
11 | 9: 'GuestRoom',
12 | 10: 'Balcony',
13 | 11: 'Entrance',
14 | 12: 'Storage',
15 | }
16 |
17 | ROOM_COLOR = {
18 | 0: [244, 242, 229],
19 | 1: [253, 244, 171],
20 | 2: [234, 216, 214],
21 | 3: [205, 233, 252],
22 | 4: [244, 242, 229],
23 | 5: [253, 244, 171],
24 | 6: [253, 244, 171],
25 | 7: [253, 244, 171],
26 | 8: [253, 244, 171],
27 | 9: [208, 216, 135],
28 | 10: [244, 242, 229],
29 | 11: [249, 222, 189],
30 | 12: [128, 128, 128],
31 | 13: [255, 255, 255],
32 | 14: [79, 79, 79],
33 | 15: [255, 225, 25],
34 | 16: [128, 128, 128],
35 | 17: [255, 225, 25],
36 | }
--------------------------------------------------------------------------------
/src/utils/json_check/__init__.py:
--------------------------------------------------------------------------------
1 | from .schema import schema, strict_schema
2 | from .verify import is_valid_json
3 |
4 | __all__ = ["schema", "strict_schema", "is_valid_json"]
--------------------------------------------------------------------------------
/src/utils/json_check/schema.py:
--------------------------------------------------------------------------------
1 | strict_schema = {
2 | "type": "object",
3 | "properties": {
4 | "room_count": {
5 | "type": "integer"
6 | },
7 | "total_area": {
8 | "type": "number"
9 | },
10 | "room_types": {
11 | "type": "array",
12 | "items": {
13 | "type": "string"
14 | }
15 | },
16 | "rooms": {
17 | "type": "array",
18 | "items": {
19 | "type": "object",
20 | "properties": {
21 | "id": {
22 | "type": "string"
23 | },
24 | "room_type": {
25 | "type": "string"
26 | },
27 | "area": {
28 | "type": "number"
29 | },
30 | "width": {
31 | "type": "number"
32 | },
33 | "height": {
34 | "type": "number"
35 | },
36 | "is_regular": {
37 | "type": "integer"
38 | },
39 | "floor_polygon": {
40 | "type": "array",
41 | "items": {
42 | "type": "object",
43 | "properties": {
44 | "x": {
45 | "type": "number"
46 | },
47 | "z": {
48 | "type": "number"
49 | }
50 | },
51 | "required": ["x", "z"]
52 | }
53 | }
54 | },
55 | "required": ["id", "room_type", "area", "width", "height", "is_regular", "floor_polygon"]
56 | }
57 | },
58 | },
59 | "required": ["room_count", "total_area", "room_types", "rooms"]
60 | }
61 |
62 | schema = {
63 | "type": "object",
64 | "properties": {
65 | "room_count": {
66 | "type": "integer"
67 | },
68 | "total_area": {
69 | "type": "number"
70 | },
71 | "room_types": {
72 | "type": "array",
73 | "items": {
74 | "type": "string"
75 | }
76 | },
77 | "rooms": {
78 | "type": "array",
79 | "items": {
80 | "type": "object",
81 | "properties": {
82 | "id": {
83 | "type": "string"
84 | },
85 | "room_type": {
86 | "type": "string"
87 | },
88 | "area": {
89 | "type": "number"
90 | },
91 | "width": {
92 | "type": "number"
93 | },
94 | "height": {
95 | "type": "number"
96 | },
97 | "is_regular": {
98 | "type": "integer"
99 | },
100 | "floor_polygon": {
101 | "type": "array",
102 | "items": {
103 | "type": "object",
104 | "properties": {
105 | "x": {
106 | "type": "number"
107 | },
108 | "y": {
109 | "type": "number"
110 | },
111 | "z": {
112 | "type": "number"
113 | }
114 | },
115 | "anyOf": [
116 | {"required": ["x", "z"]},
117 | {"required": ["x", "y"]}
118 | ]
119 | }
120 | }
121 | }
122 | }
123 | },
124 | "doors": {
125 | "type": "array",
126 | "items": {
127 | "type": "object",
128 | "properties": {
129 | "id": {
130 | "type": "string"
131 | },
132 | "position": {
133 | "type": "array",
134 | "items": {
135 | "type": "object",
136 | "properties": {
137 | "x": {
138 | "type": "number"
139 | },
140 | "y": {
141 | "type": "number"
142 | },
143 | "z": {
144 | "type": "number"
145 | }
146 | },
147 | "anyOf": [
148 | {"required": ["x", "z"]},
149 | {"required": ["x", "y"]}
150 | ]
151 | }
152 | }
153 | },
154 | "required": ["id", "position"]
155 | }
156 | },
157 | "windows": {
158 | "type": "array",
159 | "items": {
160 | "type": "object",
161 | "properties": {
162 | "id": {
163 | "type": "string"
164 | },
165 | "position": {
166 | "type": "array",
167 | "items": {
168 | "type": "object",
169 | "properties": {
170 | "x": {
171 | "type": "number"
172 | },
173 | "y": {
174 | "type": "number"
175 | },
176 | "z": {
177 | "type": "number"
178 | }
179 | },
180 | "anyOf": [
181 | {"required": ["x", "z"]},
182 | {"required": ["x", "y"]}
183 | ]
184 | }
185 | }
186 | },
187 | "required": ["id", "position"]
188 | }
189 | }
190 | }
191 | }
--------------------------------------------------------------------------------
/src/utils/json_check/verify.py:
--------------------------------------------------------------------------------
1 | from jsonschema import validate
2 | from jsonschema.exceptions import ValidationError
3 | from .schema import schema, strict_schema
4 |
5 | def is_valid_json(json_data, strict=False):
6 | _schema = strict_schema if strict else schema
7 | try:
8 | validate(json_data, _schema)
9 | return True
10 | except ValidationError as e:
11 | return False
--------------------------------------------------------------------------------
/src/utils/polygon_object.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | def line_intersect(vertex1, vertex2, vertex3, vertex4):
4 | # check if two lines intersect
5 | def ccw(A, B, C):
6 | return (C['z']-A['z']) * (B['x']-A['x']) >= (B['z']-A['z']) * (C['x']-A['x'])
7 | A, B, C, D = vertex1, vertex2, vertex3, vertex4
8 | return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
9 |
10 | # def line_intersect_dumb(p1,p2,q1,q2):
11 | # xp_min = min(p1['x'],p2['x'])
12 | # xp_max = max(p1['x'],p2['x'])
13 | # xq_min = min(q1['x'],q2['x'])
14 | # xq_max = max(q1['x'],q2['x'])
15 | # zp_min = min(p1['z'],p2['z'])
16 | # zp_max = max(p1['z'],p2['z'])
17 | # zq_min = min(q1['z'],q2['z'])
18 | # zq_max = max(q1['z'],q2['z'])
19 |
20 | # x0_min = max(xp_min,xq_min)
21 | # x0_max = min(xp_max,xq_max)
22 |
23 | # if x0_max <= x0_min:
24 | # return False
25 |
26 | # z0_min = max(zp_min,zq_min)
27 | # z0_max = min(zp_max,zq_max)
28 |
29 | # if z0_max <= z0_min:
30 | # return False
31 |
32 | # rise1 = p2['z'] - p1['z']
33 | # rise2 = q2['z'] - q1['z']
34 | # run1 = p2['x'] - p1['z']
35 | # run2 = q2['x'] - q1['z']
36 |
37 | # if run1 == 0 and run2 ==0:
38 | # return False
39 |
40 | # m1, m2 = None, None
41 | # if run1 != 0:
42 | # m1 = rise1/run1
43 | # if run2 != 0:
44 | # m2 = rise2/run2
45 | # if m1==m2:
46 | # return False
47 |
48 | class Polygon:
49 | def __init__(self, vertices, scaling_factor=18/256):
50 | self.scaling_factor = scaling_factor
51 | self.set_vertices(vertices)
52 | self.edges = self.get_edges()
53 | self.unsorted_area = self.calculate_polygon_area(self.vertices)
54 | self.sorted_area = self.calculate_polygon_area(self.sorted_vertices)
55 | self.width = self.max_x - self.min_x
56 | self.height = self.max_y - self.min_y
57 |
58 | def set_vertices(self, vertices):
59 | for vertex in vertices:
60 | vertex['x'] *= self.scaling_factor
61 | vertex['z'] *= self.scaling_factor
62 | self.vertices = vertices
63 | self.sorted_vertices = self.get_sorted_vertices()
64 |
65 | def get_edges(self):
66 | """ Generate edges by creating pairs of points """
67 | return [(self.vertices[i], self.vertices[(i + 1) % len(self.vertices)]) for i in range(len(self.vertices))]
68 | # return [(self.sorted_vertices[i], self.sorted_vertices[(i + 1) % len(self.sorted_vertices)]) for i in range(len(self.sorted_vertices))]
69 |
70 | def get_sorted_vertices(self):
71 | def get_midpoint(vertices):
72 | sum_x, sum_z = 0, 0
73 | min_x, max_x, min_y, max_y = float('inf'), -float('inf'), float('inf'), -float('inf')
74 | for vertex in vertices:
75 | sum_x += vertex['x']
76 | sum_z += vertex['z']
77 | min_x, max_x = min(min_x, vertex['x']), max(max_x, vertex['x'])
78 | min_y, max_y = min(min_y, vertex['z']), max(max_y, vertex['z'])
79 | return (sum_x/len(vertices), sum_z/len(vertices)), (min_x, max_x, min_y, max_y)
80 |
81 | def get_slope_from_mid_point(vertices):
82 | (mid_x, mid_z), (min_x, max_x, min_y, max_y) = get_midpoint(vertices)
83 | ret = []
84 | for vertex in vertices:
85 | ret.append((vertex, math.atan2(vertex['x'] - mid_x, vertex['z'] - mid_z)))
86 | return ret, (min_x, max_x, min_y, max_y)
87 |
88 | vertices_with_slopes, (self.min_x, self.max_x, self.min_y, self.max_y) = get_slope_from_mid_point(self.vertices)
89 | vertices_with_slopes = sorted(vertices_with_slopes, key=lambda x: x[1])
90 | return [vertex[0] for vertex in vertices_with_slopes]
91 |
92 | def calculate_polygon_area(self, vertices, decimals=1): # shoelace formula
93 | n = len(vertices)
94 | area = 0
95 | for i in range(n):
96 | j = (i + 1) % n
97 | area += vertices[i]['x'] * vertices[j]['z']
98 | area -= vertices[j]['x'] * vertices[i]['z']
99 | area = abs(area) / 2.0
100 | return round(area, decimals)
101 |
102 | def surround(self, other):
103 | # TODO
104 | pass
105 |
106 | def overlap(self, other):
107 |
108 | for edge1 in self.edges:
109 | for edge2 in other.edges:
110 | if line_intersect(edge1[0], edge1[1], edge2[0], edge2[1]):
111 | return True
112 | return False
113 |
114 |
115 |
--------------------------------------------------------------------------------
/src/utils/util.py:
--------------------------------------------------------------------------------
1 | def natural_sort_key(s):
2 | import re
3 | return [int(text) if text.isdigit() else text.lower() for text in re.split('(\d+)', s)]
4 |
5 | def list_folders(directory, use_natural_sort=True):
6 | import os
7 | folders = [folder for folder in os.listdir(directory) if os.path.isdir(os.path.join(directory, folder))]
8 | if use_natural_sort:
9 | folders.sort(key=natural_sort_key)
10 | else:
11 | folders.sort(key=str.lower)
12 | return folders
13 |
14 | def list_json_files(directory):
15 | import os
16 | json_files = [file for file in os.listdir(directory) if file.endswith('.json') and os.path.isfile(os.path.join(directory, file))]
17 | json_files.sort(key=natural_sort_key)
18 | return json_files
--------------------------------------------------------------------------------