├── .gitattributes ├── README.md ├── config.json ├── generation_config.json ├── model.safetensors ├── special_tokens_map.json ├── tokenizer.json └── tokenizer_config.json /.gitattributes: -------------------------------------------------------------------------------- 1 | *.safetensors filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pythia Quantized Model for Question/Answer 2 | 3 | This repository hosts a quantized version of the Pythia model, fine-tuned for question/answer tasks. The model has been optimized for efficient deployment while maintaining high accuracy, making it suitable for resource-constrained environments. 4 | 5 | ## Model Details 6 | 7 | - **Model Architecture:** Pythia-410m 8 | - **Task:** Chatbot 9 | - **Dataset:** sewon/ambig_qa 10 | - **Quantization:** Float16 11 | - **Fine-tuning Framework:** Hugging Face Transformers 12 | 13 | ## Usage 14 | 15 | ### Installation 16 | 17 | ```sh 18 | pip install transformers torch 19 | ``` 20 | 21 | ### Loading the Model 22 | 23 | ```python 24 | from transformers import AutoTokenizer, AutoModelForCausalLM 25 | 26 | tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m") 27 | model = AutoModelForCausalLM.from_pretrained("AventIQ-AI/pythia-410m-chatbot") 28 | 29 | tokenizer.pad_token = tokenizer.eos_token 30 | 31 | def chat_with_model(model, tokenizer, question, max_length=256): 32 | """Generate response to a question""" 33 | input_text = question 34 | 35 | inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512) 36 | 37 | with torch.no_grad(): 38 | outputs = model.generate( 39 | inputs["input_ids"], 40 | attention_mask=inputs["attention_mask"], 41 | max_length=max_length, 42 | num_return_sequences=1, 43 | temperature=1.0, 44 | do_sample=True, 45 | pad_token_id=tokenizer.pad_token_id 46 | ) 47 | 48 | return tokenizer.decode(outputs[0], skip_special_tokens=True) 49 | 50 | # Example usage 51 | test_question = "What is the capital of France?" 52 | response = chat_with_model(model, tokenizer, test_question) 53 | print("Answer", response) 54 | ``` 55 | 56 | ## Performance Metrics 57 | 58 | - **Accuracy:** 0.56 59 | - **F1 Score:** 0.56 60 | - **Precision:** 0.68 61 | - **Recall:** 0.56 62 | 63 | ## Fine-Tuning Details 64 | 65 | ### Dataset 66 | 67 | The Hugging Face's `ambig_qa` dataset was used, containing both question and answer examples. 68 | 69 | ### Training 70 | 71 | - Number of epochs: 3 72 | - Batch size: 4 73 | - Evaluation strategy: epoch 74 | - Learning rate: 2e-5 75 | 76 | ### Quantization 77 | 78 | Post-training quantization was applied using PyTorch's built-in quantization framework to reduce the model size and improve inference efficiency. 79 | 80 | ## Repository Structure 81 | 82 | ``` 83 | . 84 | ├── model/ # Contains the quantized model files 85 | ├── tokenizer/ # Tokenizer configuration and vocabulary files 86 | ├── model.safensors/ # Fine Tuned Model 87 | ├── README.md # Model documentation 88 | ``` 89 | 90 | ## Limitations 91 | 92 | - The model may not generalize well to domains outside the fine-tuning dataset. 93 | - Quantization may result in minor accuracy degradation compared to full-precision models. 94 | 95 | ## Contributing 96 | 97 | Contributions are welcome! Feel free to open an issue or submit a pull request if you have suggestions or improvements. 98 | 99 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "./pythia-410m-finetuned", 3 | "architectures": [ 4 | "GPTNeoXForCausalLM" 5 | ], 6 | "attention_bias": true, 7 | "attention_dropout": 0.0, 8 | "bos_token_id": 0, 9 | "classifier_dropout": 0.1, 10 | "eos_token_id": 0, 11 | "hidden_act": "gelu", 12 | "hidden_dropout": 0.0, 13 | "hidden_size": 1024, 14 | "initializer_range": 0.02, 15 | "intermediate_size": 4096, 16 | "layer_norm_eps": 1e-05, 17 | "max_position_embeddings": 2048, 18 | "model_type": "gpt_neox", 19 | "num_attention_heads": 16, 20 | "num_hidden_layers": 24, 21 | "pad_token_id": 0, 22 | "partial_rotary_factor": 0.25, 23 | "rope_scaling": null, 24 | "rope_theta": 10000, 25 | "rotary_emb_base": 10000, 26 | "rotary_pct": 0.25, 27 | "tie_word_embeddings": false, 28 | "torch_dtype": "float16", 29 | "transformers_version": "4.48.1", 30 | "use_cache": true, 31 | "use_parallel_residual": true, 32 | "vocab_size": 50304 33 | } 34 | -------------------------------------------------------------------------------- /generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "bos_token_id": 0, 4 | "eos_token_id": 0, 5 | "transformers_version": "4.48.1" 6 | } 7 | -------------------------------------------------------------------------------- /model.safetensors: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1101093d0c08b0185ec50527900ce3771fa43c17badc536c486631330eef64c6 3 | size 810701896 4 | -------------------------------------------------------------------------------- /special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "<|endoftext|>", 4 | "lstrip": false, 5 | "normalized": false, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "eos_token": { 10 | "content": "<|endoftext|>", 11 | "lstrip": false, 12 | "normalized": false, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "pad_token": "<|endoftext|>", 17 | "unk_token": { 18 | "content": "<|endoftext|>", 19 | "lstrip": false, 20 | "normalized": false, 21 | "rstrip": false, 22 | "single_word": false 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_bos_token": false, 3 | "add_eos_token": false, 4 | "add_prefix_space": false, 5 | "added_tokens_decoder": { 6 | "0": { 7 | "content": "<|endoftext|>", 8 | "lstrip": false, 9 | "normalized": false, 10 | "rstrip": false, 11 | "single_word": false, 12 | "special": true 13 | }, 14 | "1": { 15 | "content": "<|padding|>", 16 | "lstrip": false, 17 | "normalized": false, 18 | "rstrip": false, 19 | "single_word": false, 20 | "special": true 21 | }, 22 | "50254": { 23 | "content": " ", 24 | "lstrip": false, 25 | "normalized": true, 26 | "rstrip": false, 27 | "single_word": false, 28 | "special": false 29 | }, 30 | "50255": { 31 | "content": " ", 32 | "lstrip": false, 33 | "normalized": true, 34 | "rstrip": false, 35 | "single_word": false, 36 | "special": false 37 | }, 38 | "50256": { 39 | "content": " ", 40 | "lstrip": false, 41 | "normalized": true, 42 | "rstrip": false, 43 | "single_word": false, 44 | "special": false 45 | }, 46 | "50257": { 47 | "content": " ", 48 | "lstrip": false, 49 | "normalized": true, 50 | "rstrip": false, 51 | "single_word": false, 52 | "special": false 53 | }, 54 | "50258": { 55 | "content": " ", 56 | "lstrip": false, 57 | "normalized": true, 58 | "rstrip": false, 59 | "single_word": false, 60 | "special": false 61 | }, 62 | "50259": { 63 | "content": " ", 64 | "lstrip": false, 65 | "normalized": true, 66 | "rstrip": false, 67 | "single_word": false, 68 | "special": false 69 | }, 70 | "50260": { 71 | "content": " ", 72 | "lstrip": false, 73 | "normalized": true, 74 | "rstrip": false, 75 | "single_word": false, 76 | "special": false 77 | }, 78 | "50261": { 79 | "content": " ", 80 | "lstrip": false, 81 | "normalized": true, 82 | "rstrip": false, 83 | "single_word": false, 84 | "special": false 85 | }, 86 | "50262": { 87 | "content": " ", 88 | "lstrip": false, 89 | "normalized": true, 90 | "rstrip": false, 91 | "single_word": false, 92 | "special": false 93 | }, 94 | "50263": { 95 | "content": " ", 96 | "lstrip": false, 97 | "normalized": true, 98 | "rstrip": false, 99 | "single_word": false, 100 | "special": false 101 | }, 102 | "50264": { 103 | "content": " ", 104 | "lstrip": false, 105 | "normalized": true, 106 | "rstrip": false, 107 | "single_word": false, 108 | "special": false 109 | }, 110 | "50265": { 111 | "content": " ", 112 | "lstrip": false, 113 | "normalized": true, 114 | "rstrip": false, 115 | "single_word": false, 116 | "special": false 117 | }, 118 | "50266": { 119 | "content": " ", 120 | "lstrip": false, 121 | "normalized": true, 122 | "rstrip": false, 123 | "single_word": false, 124 | "special": false 125 | }, 126 | "50267": { 127 | "content": " ", 128 | "lstrip": false, 129 | "normalized": true, 130 | "rstrip": false, 131 | "single_word": false, 132 | "special": false 133 | }, 134 | "50268": { 135 | "content": " ", 136 | "lstrip": false, 137 | "normalized": true, 138 | "rstrip": false, 139 | "single_word": false, 140 | "special": false 141 | }, 142 | "50269": { 143 | "content": " ", 144 | "lstrip": false, 145 | "normalized": true, 146 | "rstrip": false, 147 | "single_word": false, 148 | "special": false 149 | }, 150 | "50270": { 151 | "content": " ", 152 | "lstrip": false, 153 | "normalized": true, 154 | "rstrip": false, 155 | "single_word": false, 156 | "special": false 157 | }, 158 | "50271": { 159 | "content": " ", 160 | "lstrip": false, 161 | "normalized": true, 162 | "rstrip": false, 163 | "single_word": false, 164 | "special": false 165 | }, 166 | "50272": { 167 | "content": " ", 168 | "lstrip": false, 169 | "normalized": true, 170 | "rstrip": false, 171 | "single_word": false, 172 | "special": false 173 | }, 174 | "50273": { 175 | "content": " ", 176 | "lstrip": false, 177 | "normalized": true, 178 | "rstrip": false, 179 | "single_word": false, 180 | "special": false 181 | }, 182 | "50274": { 183 | "content": " ", 184 | "lstrip": false, 185 | "normalized": true, 186 | "rstrip": false, 187 | "single_word": false, 188 | "special": false 189 | }, 190 | "50275": { 191 | "content": " ", 192 | "lstrip": false, 193 | "normalized": true, 194 | "rstrip": false, 195 | "single_word": false, 196 | "special": false 197 | }, 198 | "50276": { 199 | "content": " ", 200 | "lstrip": false, 201 | "normalized": true, 202 | "rstrip": false, 203 | "single_word": false, 204 | "special": false 205 | } 206 | }, 207 | "bos_token": "<|endoftext|>", 208 | "clean_up_tokenization_spaces": false, 209 | "eos_token": "<|endoftext|>", 210 | "extra_special_tokens": {}, 211 | "model_max_length": 1000000000000000019884624838656, 212 | "pad_token": "<|endoftext|>", 213 | "tokenizer_class": "GPTNeoXTokenizer", 214 | "unk_token": "<|endoftext|>" 215 | } 216 | --------------------------------------------------------------------------------