├── step0_download_base_model.py ├── step3_merg.py ├── step4_test.py ├── step1_0_pdf_to_text.py ├── data.jsonl ├── step2_fine_tuning.py ├── step5_rag_it.py ├── Readme.md ├── step6_agentic.py └── step1_1_generate_jsonl.py /step0_download_base_model.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoModelForCausalLM, AutoTokenizer 2 | 3 | model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" 4 | model = AutoModelForCausalLM.from_pretrained(model_id) 5 | tokenizer = AutoTokenizer.from_pretrained(model_id) 6 | 7 | model.save_pretrained("tinyllama-base") 8 | tokenizer.save_pretrained("tinyllama-base") 9 | print("✅ Base model downloaded to ./tinyllama-base") 10 | -------------------------------------------------------------------------------- /step3_merg.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoModelForCausalLM, AutoTokenizer 2 | from peft import PeftModel 3 | 4 | # Load base model and LoRA adapter 5 | base_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") 6 | model = PeftModel.from_pretrained(base_model, "tinyllama-finetuned") 7 | 8 | # Merge and unload 9 | model = model.merge_and_unload() 10 | 11 | # Save merged model 12 | model.save_pretrained("tinyllama-merged") 13 | AutoTokenizer.from_pretrained("tinyllama-finetuned").save_pretrained("tinyllama-merged") 14 | print("✅ Merged model saved to ./tinyllama-merged") 15 | -------------------------------------------------------------------------------- /step4_test.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer, AutoModelForCausalLM 2 | import torch 3 | 4 | # Load merged model and tokenizer 5 | model = AutoModelForCausalLM.from_pretrained("tinyllama-merged", torch_dtype=torch.float16) 6 | tokenizer = AutoTokenizer.from_pretrained("tinyllama-merged") 7 | 8 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 9 | model = model.to(device) 10 | 11 | prompt = "### Instruction:\nWho is Hassan Habib?\n\n### Input:\n\n### Response:\n" 12 | inputs = tokenizer(prompt, return_tensors="pt").to(device) 13 | 14 | model.eval() 15 | with torch.no_grad(): 16 | outputs = model.generate(**inputs, max_new_tokens=50) 17 | print(tokenizer.decode(outputs[0], skip_special_tokens=True)) -------------------------------------------------------------------------------- /step1_0_pdf_to_text.py: -------------------------------------------------------------------------------- 1 | import fitz # PyMuPDF 2 | import os 3 | import re 4 | 5 | 6 | def clean_text(text): 7 | # Replace hyphenated words at line breaks (e.g., knowl-\nedge → knowledge) 8 | text = re.sub(r'-\n', '', text) 9 | 10 | # Join lines inside a paragraph (i.e., not between paragraphs) 11 | text = re.sub(r'(? 🚫 Do **NOT** install `bitsandbytes` if you're on Windows or using an AMD GPU 27 | 28 | --- 29 | 30 | ## 📁 Folder Structure 31 | 32 | ``` 33 | standard-llama-finetune/ 34 | ├── data.jsonl ← training dataset (editable) 35 | ├── step0_download_base_model.py ← download base model from Hugging Face 36 | ├── step1_0_pdf_to_text.py ← Convert PDF to text 37 | ├── step1_1_generate_jsonl.py ← Generate JSONL file for fine-tuning data 38 | ├── step2_fine_tuning.py ← download base model from Hugging Face 39 | ├── step3_merg.py ← fine-tune TinyLlama with LoRA 40 | ├── step4_test.py ← merge LoRA adapter into base model 41 | ``` 42 | 43 | --- 44 | 45 | ## 🔽 0. Download the Base Model 46 | 47 | ```bash 48 | python step0_download_base_model.py 49 | ``` 50 | 51 | This will save the model to `./tinyllama-base/` 52 | 53 | --- 54 | 55 | ## 🔽 1.0. Convert PDF to Raw Text 56 | 57 | ```bash 58 | python step1_0_pdf_to_text.py 59 | ``` 60 | 61 | This will save the .txt file at the root. 62 | 63 | --- 64 | 65 | ## 🔽 1.1. Generate JSONL Files 66 | 67 | ```bash 68 | python step1_1_generate_jsonl.py 69 | ``` 70 | 71 | This will save the .jsonl file at the root. 72 | 73 | --- 74 | 75 | ## 🧠 2. Fine-Tune with LoRA 76 | 77 | ```bash 78 | python step2_fine_tuning.py 79 | ``` 80 | 81 | - Trains on `data.jsonl` 82 | - Runs for 30 epochs (you can adjust inside the script) 83 | - Saves LoRA adapter to `tinyllama-finetuned/` 84 | 85 | --- 86 | 87 | ## 🔗 3. Merge LoRA into Base Model 88 | 89 | ```bash 90 | python step3_merg.py 91 | ``` 92 | 93 | - Merges the LoRA weights into the base model 94 | - Saves to `tinyllama-merged/` — ready for conversion or inference 95 | 96 | --- 97 | 98 | ## 🧪 4. Run Sanity Check (Optional) 99 | 100 | ```bash 101 | python step4_test.py 102 | ``` 103 | 104 | Expected output: 105 | 106 | ``` 107 | Hassan Habib is a software engineering leader and the author of The Standard. 108 | ``` 109 | 110 | --- 111 | 112 | ## 🦙 5. Convert to `.gguf` for llama.cpp (make sure you install CMake, clone and build llama.cpp) 113 | 114 | ```bash 115 | cd llama.cpp/ 116 | python3 convert_hf_to_gguf.py ../tinyllama-merged --outfile standard-mini.gguf --outtype f16 117 | ``` 118 | 119 | Then run with: 120 | 121 | ```bash 122 | ./build/bin/llama-cli --model standard-mini.gguf --prompt "Describe Orchestration services" 123 | 124 | ``` 125 | 126 | Paste this prompt: 127 | 128 | ``` 129 | ### Instruction: 130 | Who is Hassan Habib? 131 | 132 | ### Input: 133 | 134 | ### Response: 135 | ``` 136 | 137 | --- 138 | 139 | ## 📽️ Video Step-by-Step 140 | ## How to Run AI Offline w/ .NET 141 | https://www.youtube.com/watch?v=lc6lVCe0XHI&t=3s 142 | 143 | ## How to Fine-Tune your AI Model 144 | https://www.youtube.com/watch?v=FQr7VrK5RRQ&t=1087s 145 | 146 | ## How to Feed your Llama Model (TXT to JSONL) 147 | https://www.youtube.com/watch?v=YB9cVyjV9Bo 148 | 149 | ## Make Your Offline AI Model Talk to Local SQL — Fully Private RAG with LLaMA + FAISS 150 | https://www.youtube.com/watch?v=3jFpLNglWBc&t=293s 151 | 152 | ## 👨‍🏫 Author 153 | Built and tested by [Hassan Habib](https://github.com/hassanhabib), fine-tuned with ❤️ and terminal grit. 154 | 155 | --- 156 | 157 | Want to turn this into a video or GitHub tutorial? It’s built to teach. 158 | -------------------------------------------------------------------------------- /step6_agentic.py: -------------------------------------------------------------------------------- 1 | import pyodbc 2 | from llama_cpp import Llama 3 | import re 4 | 5 | # ------------------------ 6 | # 🔍 Get DB schema 7 | # ------------------------ 8 | def get_db_schema(cursor): 9 | schema = "" 10 | 11 | cursor.execute(""" 12 | SELECT TABLE_NAME 13 | FROM INFORMATION_SCHEMA.TABLES 14 | WHERE TABLE_TYPE = 'BASE TABLE' AND TABLE_CATALOG = DB_NAME() 15 | """) 16 | tables = [row[0] for row in cursor.fetchall()] 17 | 18 | for table in tables: 19 | cursor.execute(f""" 20 | SELECT COLUMN_NAME, DATA_TYPE 21 | FROM INFORMATION_SCHEMA.COLUMNS 22 | WHERE TABLE_NAME = '{table}' 23 | """) 24 | columns = cursor.fetchall() 25 | schema += f"Table: {table}\n" 26 | for column_name, data_type in columns: 27 | schema += f"- {column_name} ({data_type})\n" 28 | schema += "\n" 29 | return schema.strip() 30 | 31 | # ------------------------ 32 | # 🧠 Build LLM prompt 33 | # ------------------------ 34 | def build_prompt(user_question, schema, error_message=None, previous_sql=None): 35 | if error_message: 36 | return f""" 37 | You previously generated this SQL which failed: 38 | 39 | {previous_sql} 40 | 41 | The error was: 42 | {error_message} 43 | 44 | Try again. ONLY use the tables and columns listed in this schema. 45 | 46 | Schema: 47 | {schema} 48 | 49 | User question: 50 | "{user_question}" 51 | 52 | Respond ONLY with a valid Microsoft SQL Server SELECT query and end it with a semicolon. 53 | """ 54 | else: 55 | return f""" 56 | You are a SQL expert. You will receive a database schema and a user question. 57 | 58 | You MUST: 59 | - Use ONLY table and column names exactly as provided in the schema 60 | - NEVER invent or singularize table names like 'Student' if only 'Students' exists 61 | - Output ONLY a valid Microsoft SQL Server SELECT statement ending with a semicolon 62 | 63 | Schema: 64 | {schema} 65 | 66 | Question: "{user_question}" 67 | 68 | Output: 69 | """ 70 | 71 | # ------------------------ 72 | # 🧼 Extract SELECT statement 73 | # ------------------------ 74 | def extract_valid_sql(text): 75 | match = re.search(r"(SELECT\s.+?;)", text, re.IGNORECASE | re.DOTALL) 76 | if not match: 77 | raise ValueError("No valid SELECT statement found.") 78 | sql = match.group(1).strip() 79 | if "LIMIT" in sql.upper(): 80 | raise ValueError("Invalid keyword 'LIMIT' for T-SQL.") 81 | return sql 82 | 83 | # ------------------------ 84 | # 🚀 MAIN SCRIPT 85 | # ------------------------ 86 | 87 | # Step 0: Connect to MSSQL 88 | conn = pyodbc.connect("DRIVER={ODBC Driver 17 for SQL Server};SERVER=BIGB;DATABASE=SchoolDb;Trusted_Connection=yes;") 89 | cursor = conn.cursor() 90 | 91 | # Step 1: Load local LLM 92 | llm = Llama(model_path="models/mistral-7b-instruct-v0.2.Q4_K_M.gguf") 93 | 94 | # Step 2: User question 95 | user_question = "Find the names of all students who have a score higher than 150." 96 | 97 | # Step 3: Introspect schema 98 | schema = get_db_schema(cursor) 99 | 100 | # Step 4: Attempt loop 101 | MAX_RETRIES = 2 102 | attempt = 0 103 | error_message = None 104 | sql_query = None 105 | data = None 106 | 107 | while attempt < MAX_RETRIES: 108 | prompt = build_prompt(user_question, schema, error_message, sql_query) 109 | response = llm(prompt=prompt, max_tokens=200) 110 | raw_response = response['choices'][0]['text'].strip() 111 | print(f"🔍 Attempt {attempt + 1} - LLM Response:\n{raw_response}") 112 | 113 | try: 114 | sql_query = extract_valid_sql(raw_response) 115 | print("✅ Extracted SQL:\n", sql_query) 116 | 117 | cursor.execute(sql_query) 118 | results = cursor.fetchall() 119 | columns = [column[0] for column in cursor.description] 120 | data = [dict(zip(columns, row)) for row in results] 121 | 122 | if not data: 123 | raise ValueError("Query ran but returned no results.") 124 | 125 | break # ✅ Success 126 | 127 | except Exception as e: 128 | error_message = str(e) 129 | print("🚨 Error:", error_message) 130 | attempt += 1 131 | 132 | # Step 5: Summarize or report failure 133 | if data: 134 | summary_prompt = f""" 135 | Here are the results of the query:\n{data} 136 | Summarize this nicely for the user. 137 | """ 138 | summary = llm(prompt=summary_prompt, max_tokens=200)['choices'][0]['text'].strip() 139 | print("📄 Summary:\n", summary) 140 | else: 141 | print("❌ Failed after retries. Please rephrase your question.") 142 | -------------------------------------------------------------------------------- /step1_1_generate_jsonl.py: -------------------------------------------------------------------------------- 1 | # generate_questions_offline.py (CLI version) 2 | import os 3 | import json 4 | import re 5 | import nltk 6 | import subprocess 7 | import tempfile 8 | 9 | # === SETUP === 10 | nltk.download('punkt', quiet=True) 11 | from nltk.tokenize import sent_tokenize 12 | 13 | # === CONFIGURATION === 14 | TXT_FILE = "TheStandard.txt" 15 | JSONL_FILE = "TheStandard.jsonl" 16 | RAW_LOG_FILE = "raw_output.jsonl" 17 | MODEL_CLI = "llama.cpp/build/bin/llama-cli" 18 | MODEL_PATH = "llama.cpp/mistral/mistral-7b-instruct-v0.1.Q4_K_M.gguf" 19 | 20 | CHUNK_MIN_WORDS = 30 21 | CHUNK_MAX_WORDS = 100 22 | MAX_PARAGRAPHS = 1000 23 | MAX_QUESTIONS_PER_PARAGRAPH = 6 24 | 25 | # === CLEAN TEXT === 26 | def clean_text(text): 27 | text = re.sub(r'-\n', '', text) 28 | text = re.sub(r'(?= 3 and not q.lower().startswith(("of", "and", "the")): 94 | q = re.sub(r'^\d+(\.\d+)*[\).]?\s*', '', q) 95 | questions.append(q) 96 | 97 | return list(set(questions)) 98 | 99 | # === MAIN === 100 | def main(): 101 | if not os.path.exists(TXT_FILE): 102 | print(f"❌ File not found: {TXT_FILE}") 103 | return 104 | 105 | with open(TXT_FILE, 'r', encoding='utf-8') as f: 106 | raw_text = f.read() 107 | 108 | paragraphs = split_into_paragraphs(clean_text(raw_text)) 109 | print(f"📖 Found {len(paragraphs)} paragraphs to process.") 110 | 111 | written_count = 0 112 | skipped_count = 0 113 | 114 | with open(JSONL_FILE, 'w', encoding='utf-8') as out: 115 | for i, paragraph in enumerate(paragraphs[:MAX_PARAGRAPHS]): 116 | try: 117 | questions = generate_questions(paragraph) 118 | 119 | if not questions: 120 | print(f"[{i+1}] ⚠️ No questions generated.") 121 | skipped_count += 1 122 | continue 123 | 124 | for j, question in enumerate(questions): 125 | entry = { 126 | "instruction": question, 127 | "input": "", 128 | "output": paragraph.strip() 129 | } 130 | out.write(json.dumps(entry, ensure_ascii=False) + '\n') 131 | written_count += 1 132 | print(f"[{i+1}.{j+1}] ✅ {question}") 133 | 134 | except Exception as e: 135 | print(f"[{i+1}] ❌ Error: {e}") 136 | skipped_count += 1 137 | continue 138 | 139 | print(f"\n📦 Done: {written_count} questions saved to '{JSONL_FILE}'") 140 | print(f"⚠️ Skipped: {skipped_count} paragraphs") 141 | 142 | # === ENTRY === 143 | if __name__ == "__main__": 144 | main() 145 | --------------------------------------------------------------------------------