├── .gitattributes
├── README.md
├── config.json
├── generation_config.json
├── model.safetensors
├── special_tokens_map.json
├── tokenizer.json
└── tokenizer_config.json


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.safetensors filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pythia Quantized Model for Question/Answer
 2 | 
 3 | This repository hosts a quantized version of the Pythia model, fine-tuned for question/answer tasks. The model has been optimized for efficient deployment while maintaining high accuracy, making it suitable for resource-constrained environments.
 4 | 
 5 | ## Model Details
 6 | 
 7 | - **Model Architecture:** Pythia-410m  
 8 | - **Task:** Chatbot  
 9 | - **Dataset:** sewon/ambig_qa  
10 | - **Quantization:** Float16  
11 | - **Fine-tuning Framework:** Hugging Face Transformers  
12 | 
13 | ## Usage
14 | 
15 | ### Installation
16 | 
17 | ```sh
18 | pip install transformers torch
19 | ```
20 | 
21 | ### Loading the Model
22 | 
23 | ```python
24 | from transformers import AutoTokenizer, AutoModelForCausalLM
25 | 
26 | tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m")
27 | model = AutoModelForCausalLM.from_pretrained("AventIQ-AI/pythia-410m-chatbot")
28 | 
29 | tokenizer.pad_token = tokenizer.eos_token
30 | 
31 | def chat_with_model(model, tokenizer, question, max_length=256):
32 |     """Generate response to a question"""
33 |     input_text = question
34 |     
35 |     inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
36 |     
37 |     with torch.no_grad():
38 |         outputs = model.generate(
39 |             inputs["input_ids"],
40 |             attention_mask=inputs["attention_mask"],  
41 |             max_length=max_length,
42 |             num_return_sequences=1,
43 |             temperature=1.0,
44 |             do_sample=True,  
45 |             pad_token_id=tokenizer.pad_token_id
46 |         )
47 | 
48 |     return tokenizer.decode(outputs[0], skip_special_tokens=True)
49 | 
50 | # Example usage
51 | test_question = "What is the capital of France?"
52 | response = chat_with_model(model, tokenizer, test_question)
53 | print("Answer", response)
54 | ```
55 | 
56 | ## Performance Metrics
57 | 
58 | - **Accuracy:** 0.56  
59 | - **F1 Score:** 0.56  
60 | - **Precision:** 0.68  
61 | - **Recall:** 0.56  
62 | 
63 | ## Fine-Tuning Details
64 | 
65 | ### Dataset
66 | 
67 | The Hugging Face's `ambig_qa` dataset was used, containing both question and answer examples.
68 | 
69 | ### Training
70 | 
71 | - Number of epochs: 3  
72 | - Batch size: 4  
73 | - Evaluation strategy: epoch  
74 | - Learning rate: 2e-5  
75 | 
76 | ### Quantization
77 | 
78 | Post-training quantization was applied using PyTorch's built-in quantization framework to reduce the model size and improve inference efficiency.
79 | 
80 | ## Repository Structure
81 | 
82 | ```
83 | .
84 | ├── model/               # Contains the quantized model files
85 | ├── tokenizer/           # Tokenizer configuration and vocabulary files
86 | ├── model.safensors/     # Fine Tuned Model
87 | ├── README.md            # Model documentation
88 | ```
89 | 
90 | ## Limitations
91 | 
92 | - The model may not generalize well to domains outside the fine-tuning dataset.  
93 | - Quantization may result in minor accuracy degradation compared to full-precision models.  
94 | 
95 | ## Contributing
96 | 
97 | Contributions are welcome! Feel free to open an issue or submit a pull request if you have suggestions or improvements.
98 | 
99 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "./pythia-410m-finetuned",
 3 |   "architectures": [
 4 |     "GPTNeoXForCausalLM"
 5 |   ],
 6 |   "attention_bias": true,
 7 |   "attention_dropout": 0.0,
 8 |   "bos_token_id": 0,
 9 |   "classifier_dropout": 0.1,
10 |   "eos_token_id": 0,
11 |   "hidden_act": "gelu",
12 |   "hidden_dropout": 0.0,
13 |   "hidden_size": 1024,
14 |   "initializer_range": 0.02,
15 |   "intermediate_size": 4096,
16 |   "layer_norm_eps": 1e-05,
17 |   "max_position_embeddings": 2048,
18 |   "model_type": "gpt_neox",
19 |   "num_attention_heads": 16,
20 |   "num_hidden_layers": 24,
21 |   "pad_token_id": 0,
22 |   "partial_rotary_factor": 0.25,
23 |   "rope_scaling": null,
24 |   "rope_theta": 10000,
25 |   "rotary_emb_base": 10000,
26 |   "rotary_pct": 0.25,
27 |   "tie_word_embeddings": false,
28 |   "torch_dtype": "float16",
29 |   "transformers_version": "4.48.1",
30 |   "use_cache": true,
31 |   "use_parallel_residual": true,
32 |   "vocab_size": 50304
33 | }
34 | 


--------------------------------------------------------------------------------
/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "bos_token_id": 0,
4 |   "eos_token_id": 0,
5 |   "transformers_version": "4.48.1"
6 | }
7 | 


--------------------------------------------------------------------------------
/model.safetensors:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1101093d0c08b0185ec50527900ce3771fa43c17badc536c486631330eef64c6
3 | size 810701896
4 | 


--------------------------------------------------------------------------------
/special_tokens_map.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "bos_token": {
 3 |     "content": "<|endoftext|>",
 4 |     "lstrip": false,
 5 |     "normalized": false,
 6 |     "rstrip": false,
 7 |     "single_word": false
 8 |   },
 9 |   "eos_token": {
10 |     "content": "<|endoftext|>",
11 |     "lstrip": false,
12 |     "normalized": false,
13 |     "rstrip": false,
14 |     "single_word": false
15 |   },
16 |   "pad_token": "<|endoftext|>",
17 |   "unk_token": {
18 |     "content": "<|endoftext|>",
19 |     "lstrip": false,
20 |     "normalized": false,
21 |     "rstrip": false,
22 |     "single_word": false
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/tokenizer_config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "add_bos_token": false,
  3 |   "add_eos_token": false,
  4 |   "add_prefix_space": false,
  5 |   "added_tokens_decoder": {
  6 |     "0": {
  7 |       "content": "<|endoftext|>",
  8 |       "lstrip": false,
  9 |       "normalized": false,
 10 |       "rstrip": false,
 11 |       "single_word": false,
 12 |       "special": true
 13 |     },
 14 |     "1": {
 15 |       "content": "<|padding|>",
 16 |       "lstrip": false,
 17 |       "normalized": false,
 18 |       "rstrip": false,
 19 |       "single_word": false,
 20 |       "special": true
 21 |     },
 22 |     "50254": {
 23 |       "content": "                        ",
 24 |       "lstrip": false,
 25 |       "normalized": true,
 26 |       "rstrip": false,
 27 |       "single_word": false,
 28 |       "special": false
 29 |     },
 30 |     "50255": {
 31 |       "content": "                       ",
 32 |       "lstrip": false,
 33 |       "normalized": true,
 34 |       "rstrip": false,
 35 |       "single_word": false,
 36 |       "special": false
 37 |     },
 38 |     "50256": {
 39 |       "content": "                      ",
 40 |       "lstrip": false,
 41 |       "normalized": true,
 42 |       "rstrip": false,
 43 |       "single_word": false,
 44 |       "special": false
 45 |     },
 46 |     "50257": {
 47 |       "content": "                     ",
 48 |       "lstrip": false,
 49 |       "normalized": true,
 50 |       "rstrip": false,
 51 |       "single_word": false,
 52 |       "special": false
 53 |     },
 54 |     "50258": {
 55 |       "content": "                    ",
 56 |       "lstrip": false,
 57 |       "normalized": true,
 58 |       "rstrip": false,
 59 |       "single_word": false,
 60 |       "special": false
 61 |     },
 62 |     "50259": {
 63 |       "content": "                   ",
 64 |       "lstrip": false,
 65 |       "normalized": true,
 66 |       "rstrip": false,
 67 |       "single_word": false,
 68 |       "special": false
 69 |     },
 70 |     "50260": {
 71 |       "content": "                  ",
 72 |       "lstrip": false,
 73 |       "normalized": true,
 74 |       "rstrip": false,
 75 |       "single_word": false,
 76 |       "special": false
 77 |     },
 78 |     "50261": {
 79 |       "content": "                 ",
 80 |       "lstrip": false,
 81 |       "normalized": true,
 82 |       "rstrip": false,
 83 |       "single_word": false,
 84 |       "special": false
 85 |     },
 86 |     "50262": {
 87 |       "content": "                ",
 88 |       "lstrip": false,
 89 |       "normalized": true,
 90 |       "rstrip": false,
 91 |       "single_word": false,
 92 |       "special": false
 93 |     },
 94 |     "50263": {
 95 |       "content": "               ",
 96 |       "lstrip": false,
 97 |       "normalized": true,
 98 |       "rstrip": false,
 99 |       "single_word": false,
100 |       "special": false
101 |     },
102 |     "50264": {
103 |       "content": "              ",
104 |       "lstrip": false,
105 |       "normalized": true,
106 |       "rstrip": false,
107 |       "single_word": false,
108 |       "special": false
109 |     },
110 |     "50265": {
111 |       "content": "             ",
112 |       "lstrip": false,
113 |       "normalized": true,
114 |       "rstrip": false,
115 |       "single_word": false,
116 |       "special": false
117 |     },
118 |     "50266": {
119 |       "content": "            ",
120 |       "lstrip": false,
121 |       "normalized": true,
122 |       "rstrip": false,
123 |       "single_word": false,
124 |       "special": false
125 |     },
126 |     "50267": {
127 |       "content": "           ",
128 |       "lstrip": false,
129 |       "normalized": true,
130 |       "rstrip": false,
131 |       "single_word": false,
132 |       "special": false
133 |     },
134 |     "50268": {
135 |       "content": "          ",
136 |       "lstrip": false,
137 |       "normalized": true,
138 |       "rstrip": false,
139 |       "single_word": false,
140 |       "special": false
141 |     },
142 |     "50269": {
143 |       "content": "         ",
144 |       "lstrip": false,
145 |       "normalized": true,
146 |       "rstrip": false,
147 |       "single_word": false,
148 |       "special": false
149 |     },
150 |     "50270": {
151 |       "content": "        ",
152 |       "lstrip": false,
153 |       "normalized": true,
154 |       "rstrip": false,
155 |       "single_word": false,
156 |       "special": false
157 |     },
158 |     "50271": {
159 |       "content": "       ",
160 |       "lstrip": false,
161 |       "normalized": true,
162 |       "rstrip": false,
163 |       "single_word": false,
164 |       "special": false
165 |     },
166 |     "50272": {
167 |       "content": "      ",
168 |       "lstrip": false,
169 |       "normalized": true,
170 |       "rstrip": false,
171 |       "single_word": false,
172 |       "special": false
173 |     },
174 |     "50273": {
175 |       "content": "     ",
176 |       "lstrip": false,
177 |       "normalized": true,
178 |       "rstrip": false,
179 |       "single_word": false,
180 |       "special": false
181 |     },
182 |     "50274": {
183 |       "content": "    ",
184 |       "lstrip": false,
185 |       "normalized": true,
186 |       "rstrip": false,
187 |       "single_word": false,
188 |       "special": false
189 |     },
190 |     "50275": {
191 |       "content": "   ",
192 |       "lstrip": false,
193 |       "normalized": true,
194 |       "rstrip": false,
195 |       "single_word": false,
196 |       "special": false
197 |     },
198 |     "50276": {
199 |       "content": "  ",
200 |       "lstrip": false,
201 |       "normalized": true,
202 |       "rstrip": false,
203 |       "single_word": false,
204 |       "special": false
205 |     }
206 |   },
207 |   "bos_token": "<|endoftext|>",
208 |   "clean_up_tokenization_spaces": false,
209 |   "eos_token": "<|endoftext|>",
210 |   "extra_special_tokens": {},
211 |   "model_max_length": 1000000000000000019884624838656,
212 |   "pad_token": "<|endoftext|>",
213 |   "tokenizer_class": "GPTNeoXTokenizer",
214 |   "unk_token": "<|endoftext|>"
215 | }
216 | 


--------------------------------------------------------------------------------