├── .gitignore
├── Code
    ├── 01 - Fundamental
    │   ├── 01 - Word2Vec (Skipgram) from Scratch.ipynb
    │   ├── 02 - Word2Ve (Negative Sampling) from Scratch.ipynb
    │   ├── 03 - GloVe from Scratch.ipynb
    │   ├── 04 - GloVe (Gensim).ipynb
    │   ├── 05 - Window Classifier for NER.ipynb
    │   ├── 06 - Dependency Parsing.ipynb
    │   ├── code-along
    │   │   ├── 01 - Word2Vec (Skipgram).ipynb
    │   │   ├── 02 - Word2Vec (Neg Sampling).ipynb
    │   │   ├── 03 - GloVe from Scratch.ipynb
    │   │   ├── 04 - GloVe (Gensim).ipynb
    │   │   ├── 05 - Window Classifier for NER.ipynb
    │   │   └── 06 - Dependency Parsing.ipynb
    │   └── figures
    │   │   ├── glove.png
    │   │   ├── glove_weighting_func.png
    │   │   ├── ner_model.png
    │   │   └── ner_win.png
    ├── 02 - DL
    │   ├── 01 - Classification
    │   │   ├── 01 - TorchText.ipynb
    │   │   ├── 02 - TorchText + Padded.ipynb
    │   │   ├── 03 - TorchText + Padded + biLSTM.ipynb
    │   │   ├── 04 - TorchText + Padded + CNN.ipynb
    │   │   ├── code-along
    │   │   │   ├── 01 - biLSTM.ipynb
    │   │   │   └── 02 - CNN.ipynb
    │   │   └── figures
    │   │   │   ├── conv1d.png
    │   │   │   ├── glove.png
    │   │   │   ├── sentiment1.png
    │   │   │   ├── sentiment12.png
    │   │   │   ├── sentiment12_2.png
    │   │   │   ├── sentiment13.png
    │   │   │   ├── sentiment14.png
    │   │   │   ├── sentiment15.png
    │   │   │   ├── sentiment2.png
    │   │   │   ├── sentiment3.png
    │   │   │   ├── sentiment4.png
    │   │   │   ├── sentiment9.png
    │   │   │   └── word2vec.png
    │   ├── 02 - Seq2Seq
    │   │   ├── 01 - TorchText + LSTM + Teacher Forcing.ipynb
    │   │   ├── 02 - TorchText + GRU + Reusing Context Vectors + Teacher Forcing.ipynb
    │   │   ├── 03 - TorchText + biGRU + Attention + Teacher Forcing.ipynb
    │   │   ├── 04 - TorchText + biGRU + Attention + Masking + Padded + Teacher Forcing.ipynb
    │   │   ├── 05 - TorchText + CNN + Teacher Forcing.ipynb
    │   │   ├── 06 - TorchText + Transformer + Teacher Forcing.ipynb
    │   │   ├── 07 - TorchText + biGRU + Attention + Masking + Padded + Beam Search.ipynb
    │   │   ├── code-along
    │   │   │   ├── MT + Attention.ipynb
    │   │   │   └── MT + Transformer.ipynb
    │   │   └── figures
    │   │   │   ├── attention1.jpg
    │   │   │   ├── attention2.jpg
    │   │   │   ├── attention3.jpg
    │   │   │   ├── convseq2seq0.png
    │   │   │   ├── convseq2seq1.png
    │   │   │   ├── convseq2seq2.png
    │   │   │   ├── convseq2seq3.png
    │   │   │   ├── convseq2seq4.png
    │   │   │   ├── convseq2seq5.png
    │   │   │   ├── scheduler.png
    │   │   │   ├── seq2seq1.png
    │   │   │   ├── seq2seq10.png
    │   │   │   ├── seq2seq2.png
    │   │   │   ├── seq2seq3.png
    │   │   │   ├── seq2seq4.png
    │   │   │   ├── seq2seq6.png
    │   │   │   ├── seq2seq7.png
    │   │   │   ├── seq2seq8.png
    │   │   │   ├── seq2seq9.png
    │   │   │   ├── transformer-attention.png
    │   │   │   ├── transformer-decoder.png
    │   │   │   ├── transformer-encoder.png
    │   │   │   └── transformer1.png
    │   ├── 03 - Language Model
    │   │   ├── 01 - LSTM LM.ipynb
    │   │   ├── 02 - TRA LM + Batched Beam Search.ipynb
    │   │   └── code-along
    │   │   │   ├── LSTM LM.ipynb
    │   │   │   └── figures
    │   │   │       └── LM.png
    │   ├── 04 - Masked Language Model
    │   │   ├── BERT-update.ipynb
    │   │   ├── BERT.ipynb
    │   │   ├── code-along
    │   │   │   └── BERT.ipynb
    │   │   └── figures
    │   │   │   └── BERT_embed.png
    │   ├── Appendix - LSTM + Attention from Scratch.ipynb
    │   └── case-studies
    │   │   ├── Pruning
    │   │       └── Pruning.ipynb
    │   │   ├── QA
    │   │       ├── 1. DrQA.ipynb
    │   │       ├── 2. BiDAF.ipynb
    │   │       ├── 3. QANet.ipynb
    │   │       └── images
    │   │       │   ├── Stacked BiLSTM.jpg
    │   │       │   ├── attnkj.PNG
    │   │       │   ├── attnstan.PNG
    │   │       │   ├── bidaf.png
    │   │       │   ├── bilstm.png
    │   │       │   ├── c2q.PNG
    │   │       │   ├── charemb1.PNG
    │   │       │   ├── charemb2.PNG
    │   │       │   ├── charemb3.PNG
    │   │       │   ├── charemb4.PNG
    │   │       │   ├── conv.PNG
    │   │       │   ├── conv2d.PNG
    │   │       │   ├── depthconv.PNG
    │   │       │   ├── drqa1.PNG
    │   │       │   ├── drqa2.PNG
    │   │       │   ├── drqab.PNG
    │   │       │   ├── drqaflow.PNG
    │   │       │   ├── encoderblock.PNG
    │   │       │   ├── maxpool.PNG
    │   │       │   ├── p1.PNG
    │   │       │   ├── p2.PNG
    │   │       │   ├── pointconv.PNG
    │   │       │   ├── posemb.PNG
    │   │       │   ├── q2c.PNG
    │   │       │   ├── qanet.PNG
    │   │       │   ├── selfattn1.PNG
    │   │       │   ├── selfattn2.PNG
    │   │       │   ├── selfattn3.PNG
    │   │       │   ├── selfattn4.PNG
    │   │       │   ├── selfattn5.PNG
    │   │       │   ├── selfattn6.PNG
    │   │       │   ├── seq2seq.PNG
    │   │       │   ├── simimat.PNG
    │   │       │   ├── softmax.PNG
    │   │       │   └── squadjson.PNG
    │   │   ├── Summarization
    │   │       ├── GetToThePoint.ipynb
    │   │       └── images
    │   │       │   ├── attention.png
    │   │       │   ├── gettothepoint.png
    │   │       │   └── pointer.png
    │   │   └── code-along
    │   │       └── LSTM LM.ipynb
    ├── 03 - SpaCy
    │   ├── 01 - SpaCy.ipynb
    │   ├── 02 - SpaCy + Pipeline.ipynb
    │   ├── 03 - Spacy + Custom + Regex.ipynb
    │   ├── 04 - SpaCy + Training Neural Network.ipynb
    │   ├── 05 - Appendix (Extensions, Scaling).ipynb
    │   ├── case_studies
    │   │   ├── Resume NER (EntityRuler).ipynb
    │   │   ├── Reviews Classification.ipynb
    │   │   └── Technology NER (Neural Network).ipynb
    │   ├── code-along
    │   │   ├── 01 - SpaCy.ipynb
    │   │   ├── 02 - Sentiment.ipynb
    │   │   └── 03 - Resume Parsing.ipynb
    │   └── figures
    │   │   ├── container.svg
    │   │   ├── data-struct.png
    │   │   ├── dep_example.png
    │   │   ├── span2.png
    │   │   ├── table.png
    │   │   └── training1.png
    ├── 04 - Huggingface
    │   ├── 1 - Basic
    │   │   ├── 1 - Pipeline.ipynb
    │   │   ├── 2 - Tokenization.ipynb
    │   │   ├── 3 - Finetuning.ipynb
    │   │   └── 4 - Finetuning using PyTorch.ipynb
    │   ├── 2 - Datasets
    │   │   └── 1 - Datasets, Preprocessing, Streaming.ipynb
    │   ├── 3 - Tokenizer
    │   │   ├── 1 - Training a New Tokenizer from Existing.ipynb
    │   │   ├── 2 - Training a New Tokenizer from Scratch.ipynb
    │   │   ├── 3 - Mapping Tokens to Words.ipynb
    │   │   └── Appendix - Subword Tokenization Algorithms -  Theory.ipynb
    │   ├── 4 - Instruction
    │   │   ├── 01 - Alpaca + SFT.ipynb
    │   │   └── 02 - Chat templates + SFT.ipynb
    │   ├── Appendix - Contrastive learning
    │   │   ├── .gitignore
    │   │   ├── figures
    │   │   │   └── simcse_architecture.png
    │   │   └── unsupervised-simcse.ipynb
    │   ├── Appendix - Efficient Network Training
    │   │   ├── 01-distilation
    │   │   │   └── distilBERT.ipynb
    │   │   ├── 02-quantization
    │   │   │   ├── 4_bit_LLM_Quantization_with_GPTQ.ipynb
    │   │   │   ├── Weight_Quantization.ipynb
    │   │   │   └── figures
    │   │   │   │   ├── llm-int8.webp
    │   │   │   │   ├── precision.webp
    │   │   │   │   ├── quantize-int8.webp
    │   │   │   │   └── scale-precision.webp
    │   │   ├── 03-peft
    │   │   │   ├── PEFT_from_hf.ipynb
    │   │   │   ├── PEFT_from_scratch.ipynb
    │   │   │   └── Soft Prompt.ipynb
    │   │   ├── 04-MoE.ipynb
    │   │   ├── 05-kv-cache.ipynb
    │   │   └── figures
    │   │   │   ├── BERT_embed.png
    │   │   │   ├── adapter.webp
    │   │   │   ├── adapter_hub.png
    │   │   │   ├── bitfit.pbm
    │   │   │   ├── llm-int8.webp
    │   │   │   ├── lora-2.png
    │   │   │   ├── lora.png
    │   │   │   ├── precision.webp
    │   │   │   ├── prefix_adapterhub.png
    │   │   │   ├── prompt.webp
    │   │   │   ├── quantize-int8.webp
    │   │   │   ├── scale-precision.webp
    │   │   │   └── soft_embedding.png
    │   ├── Appendix - NLP tasks
    │   │   ├── Code generation.ipynb
    │   │   ├── Multi-Task-Classification.ipynb
    │   │   ├── NER.ipynb
    │   │   ├── QA.ipynb
    │   │   └── Summarization.ipynb
    │   ├── Appendix - Sentence Embedding
    │   │   ├── S-BERT.ipynb
    │   │   └── figures
    │   │   │   ├── sbert-ablation.png
    │   │   │   └── sbert-architecture.png
    │   ├── Appendix - decoding
    │   │   ├── Decoding (top-p, top-k, beam).ipynb
    │   │   └── Decoding (with visualization).ipynb
    │   ├── Appendix - visualization
    │   │   └── bertviz.ipynb
    │   └── code-along
    │   │   ├── HuggingFace.ipynb
    │   │   ├── NER.ipynb
    │   │   └── QA.ipynb
    ├── 05 - Reasoning
    │   ├── 01-prompting.ipynb
    │   ├── 02-model-distillation.md
    │   ├── 03-deepseek.ipynb
    │   ├── README.md
    │   └── figures
    │   │   ├── RAG-process.png
    │   │   ├── allenembedding.png
    │   │   ├── ape.jpeg
    │   │   ├── cot-prompting.png
    │   │   ├── deepseek-meme.jpeg
    │   │   ├── general-knowledge.png
    │   │   ├── memory.png
    │   │   ├── mla.webp
    │   │   ├── react.png
    │   │   ├── retrieval.jpeg
    │   │   ├── self-consistency.jpeg
    │   │   ├── tot.jpeg
    │   │   ├── vectorstores.jpeg
    │   │   └── zero-shot-cot.png
    ├── 06 - RAG
    │   ├── .gitignore
    │   ├── 1 - Retrieve-Read
    │   │   ├── 01-information-retrieval.ipynb
    │   │   ├── 02-rag-from-scratch.ipynb
    │   │   └── datasets
    │   │   │   └── cat-facts.txt
    │   ├── 2 - Langchain
    │   │   ├── 01-prompt.ipynb
    │   │   ├── 02-retrieval.ipynb
    │   │   ├── 03-chain-memory.ipynb
    │   │   ├── 04-rag-chatbot.ipynb
    │   │   └── 05-rag-agent-tools.ipynb
    │   ├── 3 - Query-Translation
    │   │   ├── 01-multi-query.ipynb
    │   │   ├── 02-rag-fusion.ipynb
    │   │   ├── 03-query-decomposition.ipynb
    │   │   ├── 04-step-back.ipynb
    │   │   ├── 05-HyDE.ipynb
    │   │   ├── 06-rewrite-retrieve-read.ipynb
    │   │   └── 07-stepback-qa.ipynb
    │   ├── 4 - RAG-techniques
    │   │   ├── 1 - CRAG.ipynb
    │   │   ├── 2 - SelfRAG.ipynb
    │   │   └── 3 - AdaptiveRAG.ipynb
    │   ├── README.md
    │   ├── code-along
    │   │   ├── 01-rag-langchain.ipynb
    │   │   └── 02-agent-tools.ipynb
    │   └── figures
    │   │   ├── RAG-process.png
    │   │   ├── RAG_workflow.png
    │   │   ├── allenembedding.png
    │   │   ├── cot-prompting.png
    │   │   ├── ir.jpg
    │   │   ├── memory.png
    │   │   ├── rag-lewis.png
    │   │   ├── ragas-score.webp
    │   │   ├── retrieval.jpeg
    │   │   ├── vectorstores.jpeg
    │   │   └── zero-shot-cot.png
    ├── 07 - Human Preferences
    │   ├── README.md
    │   ├── figures
    │   │   ├── Dromedary-2.png
    │   │   ├── RLHF_w_custom_dataset.png
    │   │   ├── instructGPT.png
    │   │   ├── label_studio.png
    │   │   ├── salmon.png
    │   │   ├── u-net-architecture-2.png
    │   │   ├── u-net-architecture.png
    │   │   └── workflow-rrhf.png
    │   ├── huggingface
    │   │   ├── 00-RLHF_with_Custom_Datasets.ipynb
    │   │   ├── 00-TRL.ipynb
    │   │   ├── 01-InstructGPT.ipynb
    │   │   ├── 02-SFT.ipynb
    │   │   ├── 03-PPO.ipynb
    │   │   ├── 04-DPO.ipynb
    │   │   └── dpo-qlora-4bit.py
    │   └── scratch
    │   │   ├── dpo-train.py
    │   │   └── rrhf-train.ipynb
    ├── 08 - Speech
    │   ├── .gitignore
    │   ├── ASR
    │   │   ├── 01 - Voice + CNN1D.ipynb
    │   │   ├── 02 - Voice + CNN2D.ipynb
    │   │   ├── 03 - Speech + LSTM + CTC.ipynb
    │   │   └── 04 - Speech + Transformer + CTC.ipynb
    │   ├── README.md
    │   ├── TTS
    │   │   ├── 01 - WaveNet.ipynb
    │   │   ├── 02 - Tacotron.ipynb
    │   │   └── 03 - Whisper.ipynb
    │   └── figures
    │   │   ├── FT.webp
    │   │   ├── MelScale.gif
    │   │   └── Spectrogram.webp
    ├── 09 - Multimodal
    │   ├── 01-ViT.ipynb
    │   ├── 02-BEIT.ipynb
    │   ├── 03-CLIP.ipynb
    │   ├── README.md
    │   ├── appendix
    │   │   ├── 04-SimVLM-tentative.ipynb
    │   │   ├── 05-Flamingo.ipynb
    │   │   ├── 06-BLIP-2.ipynb
    │   │   └── 07-CoCa-tentative.ipynb
    │   └── figures
    │   │   ├── ViTArchitecture.png
    │   │   ├── beit_architecture.png
    │   │   ├── blip2_architecture_1.png
    │   │   ├── blip2_architecture_2.png
    │   │   ├── brain-mri-lgg.png
    │   │   ├── coca.png
    │   │   ├── flamingo.png
    │   │   ├── gated-xattn-dense-layers.png
    │   │   ├── patch.png
    │   │   ├── perceiver-resample.png
    │   │   ├── simvlm.png
    │   │   ├── teaser.png
    │   │   └── vit.gif
    ├── 10 - Benchmark
    │   └── README.md
    ├── 11 - Agentic AI
    │   ├── 03 - Memory
    │   │   └── draft.ipynb
    │   ├── README.md
    │   ├── figures
    │   │   ├── key_idea_memgpt.png
    │   │   └── type_memory.png
    │   └── requirements.txt
    └── Assignment-Quiz
    │   ├── .gitignore
    │   └── assignment
    │       ├── 2024
    │           ├── A1_Engine_Search.pdf
    │           ├── A2_Language_Model.pdf
    │           ├── A3_Machine_Translation.pdf
    │           ├── A4_Resume_Parser.pdf
    │           ├── A5_Sentence_Embedding.pdf
    │           ├── A6_Student_Layers_Initialization.pdf
    │           ├── A7_AIT_GPT_Chatbot.pdf
    │           └── A8_Alpaca_Instruction_Eval_hf.pdf
    │       └── 2025
    │           ├── A1_That_s_What_I_LIKE.pdf
    │           ├── A2_Language_Model.pdf
    │           ├── A3_Make_Your_Own_Machine_Translation_Language.pdf
    │           ├── A4_Do_you_AGREE.pdf
    │           ├── A5_DPO.pdf
    │           ├── A6_TALK_W_DOCS.pdf
    │           └── A7_DISTIL_VS_LORA.pdf
├── README.md
├── figures
    ├── allenembedding.svg
    ├── allenseq2vec.svg
    ├── allentokenid.svg
    ├── contextlength.png
    ├── dependency.png
    ├── fasttokenqa.png
    ├── heads.png
    ├── historybert.png
    ├── karpathy.jpg
    ├── ner.png
    ├── pipeline.png
    ├── pretoken.png
    ├── rnn_weight.png
    ├── semanticsearch.png
    ├── sentiment1.png
    ├── sentiment12.png
    ├── sentiment12_2.png
    ├── sentiment13.png
    ├── sentiment14.png
    ├── sentiment15.png
    ├── sentiment2.png
    ├── sentiment3.png
    ├── sentiment4.png
    ├── sentiment9.png
    ├── tokenizer.png
    ├── unigram1.png
    └── unigram2.png
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .DS_Store
107 | ._.DS_Store
108 | **/.DS_Store
109 | **/._.DS_Store
110 | 
111 | # spacy
112 | configs/
113 | docs/
114 | data/
115 | output/
116 | models/
117 | 
118 | #huggingface
119 | model_weights/
120 | tokenizer_weights/
121 | 
122 | #allennlp
123 | temp/
124 | 
125 | _code-along/
126 | 
127 | chakyenv/
128 | 
129 | .vector_cache/
130 | Code/06 - Multimodal/save_model/U-Net.pth


--------------------------------------------------------------------------------
/Code/01 - Fundamental/figures/glove.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/01 - Fundamental/figures/glove.png


--------------------------------------------------------------------------------
/Code/01 - Fundamental/figures/glove_weighting_func.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/01 - Fundamental/figures/glove_weighting_func.png


--------------------------------------------------------------------------------
/Code/01 - Fundamental/figures/ner_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/01 - Fundamental/figures/ner_model.png


--------------------------------------------------------------------------------
/Code/01 - Fundamental/figures/ner_win.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/01 - Fundamental/figures/ner_win.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/conv1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/conv1d.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/glove.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/glove.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment1.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment12.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment12_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment12_2.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment13.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment14.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment15.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment2.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment3.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment4.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/sentiment9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/sentiment9.png


--------------------------------------------------------------------------------
/Code/02 - DL/01 - Classification/figures/word2vec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/01 - Classification/figures/word2vec.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/attention1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/attention1.jpg


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/attention2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/attention2.jpg


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/attention3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/attention3.jpg


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq0.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq1.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq2.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq3.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq4.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/convseq2seq5.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/scheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/scheduler.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq1.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq10.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq2.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq3.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq4.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq6.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq7.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq8.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/seq2seq9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/seq2seq9.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/transformer-attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/transformer-attention.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/transformer-decoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/transformer-decoder.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/transformer-encoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/transformer-encoder.png


--------------------------------------------------------------------------------
/Code/02 - DL/02 - Seq2Seq/figures/transformer1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/02 - Seq2Seq/figures/transformer1.png


--------------------------------------------------------------------------------
/Code/02 - DL/03 - Language Model/code-along/figures/LM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/03 - Language Model/code-along/figures/LM.png


--------------------------------------------------------------------------------
/Code/02 - DL/04 - Masked Language Model/figures/BERT_embed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/04 - Masked Language Model/figures/BERT_embed.png


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/Stacked BiLSTM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/Stacked BiLSTM.jpg


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/attnkj.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/attnkj.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/attnstan.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/attnstan.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/bidaf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/bidaf.png


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/bilstm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/bilstm.png


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/c2q.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/c2q.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/charemb1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/charemb1.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/charemb2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/charemb2.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/charemb3.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/charemb3.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/charemb4.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/charemb4.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/conv.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/conv.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/conv2d.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/conv2d.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/depthconv.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/depthconv.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/drqa1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/drqa1.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/drqa2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/drqa2.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/drqab.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/drqab.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/drqaflow.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/drqaflow.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/encoderblock.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/encoderblock.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/maxpool.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/maxpool.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/p1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/p1.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/p2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/p2.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/pointconv.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/pointconv.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/posemb.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/posemb.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/q2c.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/q2c.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/qanet.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/qanet.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/selfattn1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/selfattn1.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/selfattn2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/selfattn2.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/selfattn3.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/selfattn3.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/selfattn4.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/selfattn4.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/selfattn5.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/selfattn5.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/selfattn6.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/selfattn6.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/seq2seq.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/seq2seq.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/simimat.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/simimat.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/softmax.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/softmax.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/QA/images/squadjson.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/QA/images/squadjson.PNG


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/Summarization/images/attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/Summarization/images/attention.png


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/Summarization/images/gettothepoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/Summarization/images/gettothepoint.png


--------------------------------------------------------------------------------
/Code/02 - DL/case-studies/Summarization/images/pointer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/02 - DL/case-studies/Summarization/images/pointer.png


--------------------------------------------------------------------------------
/Code/03 - SpaCy/figures/data-struct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/03 - SpaCy/figures/data-struct.png


--------------------------------------------------------------------------------
/Code/03 - SpaCy/figures/dep_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/03 - SpaCy/figures/dep_example.png


--------------------------------------------------------------------------------
/Code/03 - SpaCy/figures/span2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/03 - SpaCy/figures/span2.png


--------------------------------------------------------------------------------
/Code/03 - SpaCy/figures/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/03 - SpaCy/figures/table.png


--------------------------------------------------------------------------------
/Code/03 - SpaCy/figures/training1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/03 - SpaCy/figures/training1.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Contrastive learning/.gitignore:
--------------------------------------------------------------------------------
1 | simcse-draft.ipynb
2 | *.txt


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Contrastive learning/figures/simcse_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Contrastive learning/figures/simcse_architecture.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/4_bit_LLM_Quantization_with_GPTQ.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "attachments": {},
  5 |       "cell_type": "markdown",
  6 |       "metadata": {
  7 |         "id": "yezrHxYvg_wR"
  8 |       },
  9 |       "source": [
 10 |         "# 4-bit LLM Quantization with GPTQ"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "attachments": {},
 15 |       "cell_type": "markdown",
 16 |       "metadata": {},
 17 |       "source": [
 18 |         "## Optimal Brain Quantization\n",
 19 |         "###  layer-wise compression problem\n",
 20 |         "$$\n",
 21 |         "\\text{argmin}_{\\mathbf{\\widehat{W}_\\ell}} \\quad ||\\mathbf{W_\\ell} \\mathbf{X_\\ell} - \\mathbf{\\widehat{W}_\\ell} \\mathbf{X_\\ell}||_2^2 \\quad \\text{s.t.} \\quad \\mathcal{C}(\\mathbf{\\widehat{W}_\\ell}) > C.\n",
 22 |         "$$\n",
 23 |         "\n",
 24 |         "### pruning technique\n",
 25 |         "$$\n",
 26 |         "\\mathbf{w}_P = \\text{argmin}_{\\mathbf{w}_P} \\, \\mathbf{w}_P^\\top ((\\mathbf{H}^{-1})_P)^{-1} \\mathbf{w}_P, \\quad \\boldsymbol{\\delta_P} = - \\mathbf{H}^{-1}_{:, P} ((\\mathbf{H}^{-1})_P)^{-1} \\mathbf{w}_P,\n",
 27 |         "$$\n",
 28 |         "\n",
 29 |         "## The GPTQ Algorithm\n",
 30 |         "### Step 1: Arbitrary Order Insight\n",
 31 |         "- add the least additional error\n",
 32 |         "### Step 2: Lazy Batch-Updates\n",
 33 |         "- GPTQ can apply the algorithm to a batch of columns at a time\n",
 34 |         "### Step 3: Cholesky Reformulation\n",
 35 |         "- accumulate numerical errors."
 36 |       ]
 37 |     },
 38 |     {
 39 |       "cell_type": "code",
 40 |       "execution_count": 1,
 41 |       "metadata": {
 42 |         "id": "BhufqqQAaz6e"
 43 |       },
 44 |       "outputs": [],
 45 |       "source": [
 46 |         "# !BUILD_CUDA_EXT=0 pip install -q auto-gptq transformers"
 47 |       ]
 48 |     },
 49 |     {
 50 |       "cell_type": "code",
 51 |       "execution_count": null,
 52 |       "metadata": {
 53 |         "id": "dg8NyBL0ZNyw"
 54 |       },
 55 |       "outputs": [],
 56 |       "source": [
 57 |         "import random\n",
 58 |         "\n",
 59 |         "from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
 60 |         "from datasets import load_dataset\n",
 61 |         "import torch\n",
 62 |         "from transformers import AutoTokenizer\n",
 63 |         "\n",
 64 |         "\n",
 65 |         "# Define base model and output directory\n",
 66 |         "model_id = \"gpt2\"\n",
 67 |         "out_dir = model_id + \"-GPTQ\""
 68 |       ]
 69 |     },
 70 |     {
 71 |       "cell_type": "code",
 72 |       "execution_count": null,
 73 |       "metadata": {
 74 |         "id": "C9352jN0ZP6I"
 75 |       },
 76 |       "outputs": [],
 77 |       "source": [
 78 |         "# Load quantize config, model and tokenizer\n",
 79 |         "quantize_config = BaseQuantizeConfig(\n",
 80 |         "    bits=4,\n",
 81 |         "    group_size=128,\n",
 82 |         "    damp_percent=0.01,\n",
 83 |         "    desc_act=False,\n",
 84 |         ")\n",
 85 |         "model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
 86 |         "tokenizer = AutoTokenizer.from_pretrained(model_id)"
 87 |       ]
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "execution_count": null,
 92 |       "metadata": {
 93 |         "colab": {
 94 |           "base_uri": "https://localhost:8080/"
 95 |         },
 96 |         "id": "6wuBLe6aZSe-",
 97 |         "outputId": "e4ebd71a-2854-4347-cebe-08cf040d1eb6"
 98 |       },
 99 |       "outputs": [
100 |         {
101 |           "name": "stderr",
102 |           "output_type": "stream",
103 |           "text": [
104 |             "WARNING:datasets.builder:Found cached dataset json (/root/.cache/huggingface/datasets/allenai___json/allenai--c4-6e494e9c0ee1404e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)\n",
105 |             "Token indices sequence length is longer than the specified maximum sequence length for this model (2441065 > 1024). Running this sequence through the model will result in indexing errors\n"
106 |           ]
107 |         }
108 |       ],
109 |       "source": [
110 |         "# Load data and tokenize examples\n",
111 |         "n_samples = 1024\n",
112 |         "data = load_dataset(\"allenai/c4\", data_files=\"en/c4-train.00001-of-01024.json.gz\", split=f\"train[:{n_samples*5}]\")\n",
113 |         "tokenized_data = tokenizer(\"\\n\\n\".join(data['text']), return_tensors='pt')\n",
114 |         "\n",
115 |         "# Format tokenized examples\n",
116 |         "examples_ids = []\n",
117 |         "for _ in range(n_samples):\n",
118 |         "    i = random.randint(0, tokenized_data.input_ids.shape[1] - tokenizer.model_max_length - 1)\n",
119 |         "    j = i + tokenizer.model_max_length\n",
120 |         "    input_ids = tokenized_data.input_ids[:, i:j]\n",
121 |         "    attention_mask = torch.ones_like(input_ids)\n",
122 |         "    examples_ids.append({'input_ids': input_ids, 'attention_mask': attention_mask})"
123 |       ]
124 |     },
125 |     {
126 |       "cell_type": "code",
127 |       "execution_count": null,
128 |       "metadata": {
129 |         "colab": {
130 |           "base_uri": "https://localhost:8080/"
131 |         },
132 |         "id": "ETsG2iYrXaUg",
133 |         "outputId": "e48b825e-0ebc-4a73-dbfd-b5571cafd24e"
134 |       },
135 |       "outputs": [
136 |         {
137 |           "name": "stdout",
138 |           "output_type": "stream",
139 |           "text": [
140 |             "CPU times: user 4min 35s, sys: 3.49 s, total: 4min 39s\n",
141 |             "Wall time: 5min 8s\n"
142 |           ]
143 |         },
144 |         {
145 |           "data": {
146 |             "text/plain": [
147 |               "('gpt2-GPTQ/tokenizer_config.json',\n",
148 |               " 'gpt2-GPTQ/special_tokens_map.json',\n",
149 |               " 'gpt2-GPTQ/vocab.json',\n",
150 |               " 'gpt2-GPTQ/merges.txt',\n",
151 |               " 'gpt2-GPTQ/added_tokens.json',\n",
152 |               " 'gpt2-GPTQ/tokenizer.json')"
153 |             ]
154 |           },
155 |           "execution_count": 5,
156 |           "metadata": {},
157 |           "output_type": "execute_result"
158 |         }
159 |       ],
160 |       "source": [
161 |         "%%time\n",
162 |         "\n",
163 |         "# Quantize with GPTQ\n",
164 |         "model.quantize(\n",
165 |         "    examples_ids,\n",
166 |         "    batch_size=1,\n",
167 |         "    use_triton=True,\n",
168 |         ")\n",
169 |         "\n",
170 |         "# Save model and tokenizer\n",
171 |         "model.save_quantized(out_dir, use_safetensors=True)\n",
172 |         "tokenizer.save_pretrained(out_dir)"
173 |       ]
174 |     },
175 |     {
176 |       "cell_type": "code",
177 |       "execution_count": null,
178 |       "metadata": {
179 |         "colab": {
180 |           "base_uri": "https://localhost:8080/"
181 |         },
182 |         "id": "nktu1FsdZ9sd",
183 |         "outputId": "9943c829-1b58-474a-f245-6aefa09d81dc"
184 |       },
185 |       "outputs": [
186 |         {
187 |           "name": "stderr",
188 |           "output_type": "stream",
189 |           "text": [
190 |             "WARNING:accelerate.utils.modeling:The safetensors archive passed at gpt2-GPTQ/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.\n",
191 |             "WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused attention module yet, will skip inject fused attention.\n",
192 |             "WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused mlp module yet, will skip inject fused mlp.\n"
193 |           ]
194 |         }
195 |       ],
196 |       "source": [
197 |         "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
198 |         "\n",
199 |         "# Reload model and tokenizer\n",
200 |         "model = AutoGPTQForCausalLM.from_quantized(\n",
201 |         "    out_dir,\n",
202 |         "    device=device,\n",
203 |         "    use_triton=True,\n",
204 |         "    use_safetensors=True,\n",
205 |         ")\n",
206 |         "tokenizer = AutoTokenizer.from_pretrained(out_dir)"
207 |       ]
208 |     },
209 |     {
210 |       "cell_type": "code",
211 |       "execution_count": null,
212 |       "metadata": {
213 |         "colab": {
214 |           "base_uri": "https://localhost:8080/"
215 |         },
216 |         "id": "cRhIGrXdiFdt",
217 |         "outputId": "6dca2078-6f01-44da-9895-3a03bdfb4b5b"
218 |       },
219 |       "outputs": [
220 |         {
221 |           "name": "stderr",
222 |           "output_type": "stream",
223 |           "text": [
224 |             "The model 'GPT2GPTQForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForCausalLM', 'RoCBertForCausalLM', 'RoFormerForCausalLM', 'RwkvForCausalLM', 'Speech2Text2ForCausalLM', 'TransfoXLLMHeadModel', 'TrOCRForCausalLM', 'XGLMForCausalLM', 'XLMWithLMHeadModel', 'XLMProphetNetForCausalLM', 'XLMRobertaForCausalLM', 'XLMRobertaXLForCausalLM', 'XLNetLMHeadModel', 'XmodForCausalLM'].\n",
225 |             "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
226 |           ]
227 |         },
228 |         {
229 |           "name": "stdout",
230 |           "output_type": "stream",
231 |           "text": [
232 |             "I have a dream,\" she told CNN last week. \"I have this dream of helping my mother find her own. But, to tell that for the first time, now that I'm seeing my mother now, just knowing how wonderful it is that\n"
233 |           ]
234 |         }
235 |       ],
236 |       "source": [
237 |         "from transformers import pipeline\n",
238 |         "\n",
239 |         "generator = pipeline('text-generation', model=model, tokenizer=tokenizer)\n",
240 |         "result = generator(\"I have a dream\", do_sample=True, max_length=50)[0]['generated_text']\n",
241 |         "print(result)"
242 |       ]
243 |     },
244 |     {
245 |       "attachments": {},
246 |       "cell_type": "markdown",
247 |       "metadata": {},
248 |       "source": [
249 |         "## Reference\n",
250 |         "- [GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers](https://arxiv.org/abs/2210.17323)\n",
251 |         "- [GGML](https://github.com/ggerganov/ggml)\n",
252 |         "- [Making LLMs even more accessible with bitsandbytes, 4-bit quantization and QLoRA](https://huggingface.co/blog/4bit-transformers-bitsandbytes)\n",
253 |         "- [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ)"
254 |       ]
255 |     },
256 |     {
257 |       "attachments": {},
258 |       "cell_type": "markdown",
259 |       "metadata": {},
260 |       "source": []
261 |     }
262 |   ],
263 |   "metadata": {
264 |     "accelerator": "GPU",
265 |     "colab": {
266 |       "authorship_tag": "ABX9TyOS2QEuJ1BDI/3IFsLsFIZo",
267 |       "gpuType": "T4",
268 |       "include_colab_link": true,
269 |       "provenance": []
270 |     },
271 |     "kernelspec": {
272 |       "display_name": "Python 3",
273 |       "name": "python3"
274 |     },
275 |     "language_info": {
276 |       "codemirror_mode": {
277 |         "name": "ipython",
278 |         "version": 3
279 |       },
280 |       "file_extension": ".py",
281 |       "mimetype": "text/x-python",
282 |       "name": "python",
283 |       "nbconvert_exporter": "python",
284 |       "pygments_lexer": "ipython3",
285 |       "version": "3.11.6"
286 |     }
287 |   },
288 |   "nbformat": 4,
289 |   "nbformat_minor": 0
290 | }
291 | 


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/figures/llm-int8.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/figures/llm-int8.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/figures/precision.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/figures/precision.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/figures/quantize-int8.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/figures/quantize-int8.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/figures/scale-precision.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/02-quantization/figures/scale-precision.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/03-peft/PEFT_from_scratch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Adapter "
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import torch.nn as nn\n",
 18 |     "def adapter(in_dim, bottleneck_dim, out_dim):\n",
 19 |     "    adapter_layers = nn.Sequential(\n",
 20 |     "        nn.Linear(in_dim, bottleneck_dim),\n",
 21 |     "        nn.GELU(),\n",
 22 |     "        nn.Linear(bottleneck_dim, out_dim),\n",
 23 |     "    )\n",
 24 |     "    return adapter_layers"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "attachments": {},
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Prompt Tuning"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 2,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/plain": [
 43 |        "torch.Size([50, 768])"
 44 |       ]
 45 |      },
 46 |      "execution_count": 2,
 47 |      "metadata": {},
 48 |      "output_type": "execute_result"
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "import torch\n",
 53 |     "import torch.nn as nn\n",
 54 |     "\n",
 55 |     "num_tokens, embed_dim = 50, 768\n",
 56 |     "\n",
 57 |     "soft_prompt = nn.Parameter ( # Make tensor trainable\n",
 58 |     "    torch.rand(num_tokens, embed_dim)) # Initialize soft prompt tensor\n",
 59 |     "\n",
 60 |     "soft_prompt.shape"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 12,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "torch.Size([100, 768])"
 72 |       ]
 73 |      },
 74 |      "execution_count": 12,
 75 |      "metadata": {},
 76 |      "output_type": "execute_result"
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "seq_len = 100\n",
 81 |     "x = torch.rand(seq_len, embed_dim)\n",
 82 |     "x.shape"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 13,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "torch.Size([150, 768])"
 94 |       ]
 95 |      },
 96 |      "execution_count": 13,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "def input_with_soft_prompt(x, soft_prompt) :\n",
103 |     "    x = torch.concat([soft_prompt, x], dim=-1) # Prepend soft prompt to input\n",
104 |     "    return x\n",
105 |     "\n",
106 |     "x = input_with_soft_prompt(x, soft_prompt)\n",
107 |     "x.shape"
108 |    ]
109 |   },
110 |   {
111 |    "attachments": {},
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "## Prefix Tuning"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "import torch\n",
125 |     "import torch.nn as nn\n",
126 |     "\n",
127 |     "class FullyConnectedLayers(nn.Module):\n",
128 |     "    def __init__(self, input_size, hidden_size, output_size):\n",
129 |     "        super(FullyConnectedLayers, self).__init__()\n",
130 |     "        self.fc1 = nn.Linear(input_size, hidden_size)\n",
131 |     "        self.fc2 = nn.Linear(hidden_size, output_size)\n",
132 |     "        self.relu = nn.ReLU()\n",
133 |     "\n",
134 |     "    def forward(self, x):\n",
135 |     "        x = self.fc1(x)\n",
136 |     "        x = self.relu(x)\n",
137 |     "        x = self.fc2(x)\n",
138 |     "        return x\n",
139 |     "\n",
140 |     "class LayerNorm(nn.Module):\n",
141 |     "    def __init__(self, features, eps=1e-6):\n",
142 |     "        super(LayerNorm, self).__init__()\n",
143 |     "        self.gamma = nn.Parameter(torch.ones(features))\n",
144 |     "        self.beta = nn.Parameter(torch.zeros(features))\n",
145 |     "        self.eps = eps\n",
146 |     "\n",
147 |     "    def forward(self, x):\n",
148 |     "        mean = x.mean(-1, keepdim=True)\n",
149 |     "        std = x.std(-1, keepdim=True)\n",
150 |     "        return self.gamma * (x - mean) / (std + self.eps) + self.beta\n",
151 |     "\n",
152 |     "class SelfAttention(nn.Module):\n",
153 |     "    def __init__(self, embed_size, heads):\n",
154 |     "        super(SelfAttention, self).__init__()\n",
155 |     "        self.embed_size = embed_size\n",
156 |     "        self.heads = heads\n",
157 |     "        self.head_dim = embed_size // heads\n",
158 |     "\n",
159 |     "        assert (\n",
160 |     "            self.head_dim * heads == embed_size\n",
161 |     "        ), \"Embedding size needs to be divisible by heads\"\n",
162 |     "\n",
163 |     "        self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)\n",
164 |     "        self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)\n",
165 |     "        self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)\n",
166 |     "        self.fc_out = nn.Linear(heads * self.head_dim, embed_size)\n",
167 |     "\n",
168 |     "    def forward(self, values, keys, query, mask):\n",
169 |     "        N = query.shape[0]\n",
170 |     "        value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1]\n",
171 |     "\n",
172 |     "        # Split the embedding into self.heads different pieces\n",
173 |     "        values = values.reshape(N, value_len, self.heads, self.head_dim)\n",
174 |     "        keys = keys.reshape(N, key_len, self.heads, self.head_dim)\n",
175 |     "        queries = query.reshape(N, query_len, self.heads, self.head_dim)\n",
176 |     "\n",
177 |     "        values = self.values(values)\n",
178 |     "        keys = self.keys(keys)\n",
179 |     "        queries = self.queries(queries)\n",
180 |     "\n",
181 |     "        # Calculate energy\n",
182 |     "        energy = torch.einsum(\"nqhd,nkhd->nhqk\", [queries, keys])\n",
183 |     "        if mask is not None:\n",
184 |     "            energy = energy.masked_fill(mask == 0, float(\"-1e20\"))\n",
185 |     "\n",
186 |     "        # Normalize energy\n",
187 |     "        attention = torch.nn.functional.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)\n",
188 |     "\n",
189 |     "        # Attention to values\n",
190 |     "        out = torch.einsum(\"nhql,nlhd->nqhd\", [attention, values]).reshape(\n",
191 |     "            N, query_len, self.heads * self.head_dim\n",
192 |     "        )\n",
193 |     "\n",
194 |     "        # Concatenate heads\n",
195 |     "        out = self.fc_out(out)\n",
196 |     "        return out"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "def transformer_block_with_prefix(soft_prompt, x):\n",
206 |     "    soft_prompt = FullyConnectedLayers(soft_prompt)\n",
207 |     "    x = torch.cat([soft_prompt, x], dim=-1)\n",
208 |     "    \n",
209 |     "    residual = x\n",
210 |     "    attention = SelfAttention(embed_size=... , heads=...)  # Define embed_size and heads\n",
211 |     "    x = attention(x, x, x, mask=None)  # Define mask if needed\n",
212 |     "    x = LayerNorm(x + residual)\n",
213 |     "    \n",
214 |     "    residual = x\n",
215 |     "    x = FullyConnectedLayers(... , ... , ...)  # Define input_size, hidden_size, output_size\n",
216 |     "    x = LayerNorm(x + residual)\n",
217 |     "    return x"
218 |    ]
219 |   },
220 |   {
221 |    "attachments": {},
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "## LoRA"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 2,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "import torch.nn as nn\n",
235 |     "import torch\n",
236 |     "\n",
237 |     "class LoRALayer(nn.Module):\n",
238 |     "    def __init__(self, in_dim, out_dim, rank, alpha):\n",
239 |     "        super().__init__()\n",
240 |     "        std_dev = 1 / torch.sqrt(torch.tensor(rank).float())\n",
241 |     "        self.A = nn.Parameter(torch.randn(in_dim, rank) * std_dev)\n",
242 |     "        self.B = nn.Parameter(torch.zeros(rank, out_dim))\n",
243 |     "        self.alpha = alpha\n",
244 |     "\n",
245 |     "    def forward(self, x):\n",
246 |     "        x = self.alpha * (x @ self.A @ self.B)\n",
247 |     "        return x"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "import torch.functional as F\n",
257 |     "\n",
258 |     "def forward(self, x):\n",
259 |     "    x = self.linear_1(x) #+ self.lora_1(x)\n",
260 |     "    x = F.relu(x)\n",
261 |     "    x = self.linear_2(x) #+ self.lora_2(x)\n",
262 |     "    logits = #softmax\n",
263 |     "    return logits"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "class LinearWithLoRA(torch.nn.Module):\n",
273 |     "    def __init__(self, linear, rank, alpha):\n",
274 |     "        super().__init__()\n",
275 |     "        self.linear = linear\n",
276 |     "        self.lora = LoRALayer(\n",
277 |     "            linear.in_features, linear.out_features, rank, alpha\n",
278 |     "        )\n",
279 |     "\n",
280 |     "    def forward(self, x):\n",
281 |     "        return self.linear(x) + self.lora(x)"
282 |    ]
283 |   }
284 |  ],
285 |  "metadata": {
286 |   "kernelspec": {
287 |    "display_name": "Python 3",
288 |    "language": "python",
289 |    "name": "python3"
290 |   },
291 |   "language_info": {
292 |    "codemirror_mode": {
293 |     "name": "ipython",
294 |     "version": 3
295 |    },
296 |    "file_extension": ".py",
297 |    "mimetype": "text/x-python",
298 |    "name": "python",
299 |    "nbconvert_exporter": "python",
300 |    "pygments_lexer": "ipython3",
301 |    "version": "3.12.6"
302 |   },
303 |   "orig_nbformat": 4
304 |  },
305 |  "nbformat": 4,
306 |  "nbformat_minor": 2
307 | }
308 | 


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/04-MoE.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Mixture-of-Experts (MoE)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Sparse-Gated Mixture of Experts in LSTM [Shazeer et al. ICLR 2017](https://arxiv.org/pdf/1701.06538)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import torch\n",
 24 |     "import torch.nn as nn\n",
 25 |     "\n",
 26 |     "# Gating network based on LSTM\n",
 27 |     "class LSTMGatingNetwork(nn.Module):\n",
 28 |     "    def __init__(self, input_dim, hidden_dim, num_experts):\n",
 29 |     "        super(LSTMGatingNetwork, self).__init__()\n",
 30 |     "        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)\n",
 31 |     "        self.fc = nn.Linear(hidden_dim, num_experts)\n",
 32 |     "        \n",
 33 |     "    def forward(self, x):\n",
 34 |     "        # x shape: (batch, seq_len, input_dim)\n",
 35 |     "        lstm_out, _ = self.lstm(x)              # lstm_out: (batch, seq_len, hidden_dim)\n",
 36 |     "        scores = self.fc(lstm_out)             # scores: (batch, seq_len, num_experts)\n",
 37 |     "        weights = torch.softmax(scores, dim=-1) # convert to probabilities per time step\n",
 38 |     "        return weights\n",
 39 |     "\n",
 40 |     "# Mixture-of-Experts model with LSTM gating\n",
 41 |     "class LSTMMoE(nn.Module):\n",
 42 |     "    def __init__(self, input_dim, hidden_dim, output_dim, num_experts, expert_network=None):\n",
 43 |     "        super(LSTMMoE, self).__init__()\n",
 44 |     "        # Gating network (LSTM-based)\n",
 45 |     "        self.gating_network = LSTMGatingNetwork(input_dim, hidden_dim, num_experts)\n",
 46 |     "        # Expert networks (if not provided, use simple linear layers as experts)\n",
 47 |     "        if expert_network is None:\n",
 48 |     "            # Default: each expert is a Linear layer from input_dim -> output_dim\n",
 49 |     "            self.experts = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_experts)])\n",
 50 |     "        else:\n",
 51 |     "            # If a custom expert network class is provided, instantiate for each expert\n",
 52 |     "            self.experts = nn.ModuleList([expert_network() for _ in range(num_experts)])\n",
 53 |     "        \n",
 54 |     "    def forward(self, x):\n",
 55 |     "        # x shape: (batch, seq_len, input_dim)\n",
 56 |     "        batch_size, seq_len, _ = x.size()\n",
 57 |     "        # 1. Get gating weights from LSTM gating network\n",
 58 |     "        gating_weights = self.gating_network(x)       # shape: (batch, seq_len, num_experts)\n",
 59 |     "        # 2. Compute outputs of each expert on the inputs\n",
 60 |     "        expert_outputs = [expert(x) for expert in self.experts]  # list of tensors, each (batch, seq_len, output_dim)\n",
 61 |     "        expert_outputs = torch.stack(expert_outputs, dim=-1)     # shape: (batch, seq_len, output_dim, num_experts)\n",
 62 |     "        # 3. Weight and sum expert outputs using the gating weights\n",
 63 |     "        weights_expanded = gating_weights.unsqueeze(-2)          # shape: (batch, seq_len, 1, num_experts)\n",
 64 |     "        combined_output = (expert_outputs * weights_expanded).sum(dim=-1)  # (batch, seq_len, output_dim)\n",
 65 |     "        return combined_output\n",
 66 |     "\n",
 67 |     "# Example usage:\n",
 68 |     "input_dim, hidden_dim, output_dim, num_experts = 4, 8, 3, 2\n",
 69 |     "model = LSTMMoE(input_dim, hidden_dim, output_dim, num_experts)\n",
 70 |     "# Dummy input: batch of 1 sequence, length 5, feature dim 4\n",
 71 |     "x = torch.randn(1, 5, input_dim)\n",
 72 |     "y = model(x)\n",
 73 |     "print(\"Input shape:\", x.shape)\n",
 74 |     "print(\"Output shape:\", y.shape)\n",
 75 |     "print(\"Output:\", y)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "# Output (example):\n",
 85 |     "# Input shape: torch.Size([1, 5, 4])  \n",
 86 |     "# Output shape: torch.Size([1, 5, 3])  \n",
 87 |     "# Output: tensor([[[ 0.1991, -0.2271, -0.4974],\n",
 88 |     "#          [-0.0026, -0.2181, -0.4217],\n",
 89 |     "#          [-0.1261, -0.1725,  0.1611],\n",
 90 |     "#          [-0.1749,  0.2343, -0.2493],\n",
 91 |     "#          [ 0.2959,  0.3869, -0.7265]]], grad_fn=<SumBackward1>)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "## Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity. [Fedus et al, ICLR, 2021](https://arxiv.org/pdf/2101.03961)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "import torch\n",
108 |     "import torch.nn as nn\n",
109 |     "\n",
110 |     "# Switch Transformer style MoE layer\n",
111 |     "class SwitchMoE(nn.Module):\n",
112 |     "    def __init__(self, input_dim, expert_hidden_dim, output_dim, num_experts):\n",
113 |     "        super(SwitchMoE, self).__init__()\n",
114 |     "        self.num_experts = num_experts\n",
115 |     "        # Gating (router) network: a linear layer that scores each expert for a token\n",
116 |     "        self.gate = nn.Linear(input_dim, num_experts)\n",
117 |     "        # Expert networks: each is a feed-forward MLP (two linear layers with ReLU)\n",
118 |     "        self.experts = nn.ModuleList([\n",
119 |     "            nn.Sequential(\n",
120 |     "                nn.Linear(input_dim, expert_hidden_dim),\n",
121 |     "                nn.ReLU(),\n",
122 |     "                nn.Linear(expert_hidden_dim, output_dim)\n",
123 |     "            ) for _ in range(num_experts)\n",
124 |     "        ])\n",
125 |     "    \n",
126 |     "    def forward(self, x):\n",
127 |     "        # x shape: (batch, seq_len, input_dim)\n",
128 |     "        batch_size, seq_len, dim = x.size()\n",
129 |     "        # 1. Compute gating scores for each token and select top-1 expert\n",
130 |     "        gating_scores = self.gate(x)                        # (batch, seq_len, num_experts)\n",
131 |     "        expert_indices = gating_scores.argmax(dim=-1)       # (batch, seq_len) index of chosen expert per token\n",
132 |     "        \n",
133 |     "        # 2. Prepare an output tensor\n",
134 |     "        output = torch.zeros(batch_size, seq_len, self.experts[0][-1].out_features)\n",
135 |     "        \n",
136 |     "        # 3. Route tokens to their chosen experts and compute expert outputs\n",
137 |     "        # Flatten batch and sequence dimensions for easier indexing\n",
138 |     "        x_flat = x.view(-1, dim)                            # shape: (batch*seq_len, input_dim)\n",
139 |     "        indices_flat = expert_indices.view(-1)              # shape: (batch*seq_len,)\n",
140 |     "        output_flat = torch.zeros(x_flat.size(0), self.experts[0][-1].out_features)\n",
141 |     "        # Process tokens group by expert to avoid loop over each token\n",
142 |     "        for expert_idx in range(self.num_experts):\n",
143 |     "            mask = (indices_flat == expert_idx)\n",
144 |     "            if mask.any():\n",
145 |     "                # select all tokens assigned to this expert\n",
146 |     "                tokens = x_flat[mask]                      # shape: (n_tokens_for_expert, input_dim)\n",
147 |     "                # compute outputs for these tokens using the expert\n",
148 |     "                tokens_out = self.experts[expert_idx](tokens)  # (n_tokens_for_expert, output_dim)\n",
149 |     "                output_flat[mask] = tokens_out             # place outputs in the corresponding positions\n",
150 |     "        # Reshape back to (batch, seq_len, output_dim)\n",
151 |     "        output = output_flat.view(batch_size, seq_len, -1)\n",
152 |     "        return output\n",
153 |     "\n",
154 |     "# Example usage:\n",
155 |     "input_dim = 5\n",
156 |     "output_dim = 5   # usually same as input_dim in transformer for residual connection\n",
157 |     "num_experts = 3\n",
158 |     "expert_hidden_dim = 10  # hidden layer size in each expert FFN\n",
159 |     "\n",
160 |     "model = SwitchMoE(input_dim, expert_hidden_dim, output_dim, num_experts)\n",
161 |     "# Dummy input: batch of 2 sequences, each with 4 tokens (seq_len=4), token feature dim=5\n",
162 |     "x = torch.randn(2, 4, input_dim)\n",
163 |     "y = model(x)\n",
164 |     "print(\"Input shape:\", x.shape)\n",
165 |     "print(\"Output shape:\", y.shape)\n",
166 |     "print(\"Token-to-Expert assignments:\\n\", model.gate(x).argmax(dim=-1))\n",
167 |     "print(\"Output:\\n\", y)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "# Output (example):\n",
177 |     "# Input shape: torch.Size([2, 4, 5])  \n",
178 |     "# Output shape: torch.Size([2, 4, 5])  \n",
179 |     "# Token-to-Expert assignments:\n",
180 |     "#  tensor([[1, 1, 1, 0],\n",
181 |     "#         [1, 0, 2, 1]])  \n",
182 |     "# Output:\n",
183 |     "#  tensor([[[ 0.4486, -0.2896, -0.2615, -0.2078,  0.0117],\n",
184 |     "#           [ 0.4096, -0.3731, -0.2567, -0.2418, -0.0269],\n",
185 |     "#           [ 0.3724, -0.5101, -0.2314, -0.2437,  0.0134],\n",
186 |     "#           [-0.1222, -0.6251,  0.2697,  0.2317, -0.2568]],\n",
187 |     "\n",
188 |     "#          [[ 0.5131, -0.3547, -0.3457, -0.1203,  0.1192],\n",
189 |     "#           [-0.0289, -0.3413,  0.2114,  0.0775,  0.0413],\n",
190 |     "#           [-0.2445,  0.0563,  0.1714,  0.2636,  0.3997],\n",
191 |     "#           [ 0.3778, -0.4098, -0.2812, -0.3058, -0.2247]]])"
192 |    ]
193 |   }
194 |  ],
195 |  "metadata": {
196 |   "kernelspec": {
197 |    "display_name": "Python 3",
198 |    "language": "python",
199 |    "name": "python3"
200 |   },
201 |   "language_info": {
202 |    "codemirror_mode": {
203 |     "name": "ipython",
204 |     "version": 3
205 |    },
206 |    "file_extension": ".py",
207 |    "mimetype": "text/x-python",
208 |    "name": "python",
209 |    "nbconvert_exporter": "python",
210 |    "pygments_lexer": "ipython3",
211 |    "version": "3.11.2"
212 |   }
213 |  },
214 |  "nbformat": 4,
215 |  "nbformat_minor": 2
216 | }
217 | 


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/BERT_embed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/BERT_embed.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/adapter.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/adapter.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/adapter_hub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/adapter_hub.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/bitfit.pbm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/bitfit.pbm


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/llm-int8.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/llm-int8.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/lora-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/lora-2.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/lora.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/lora.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/precision.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/precision.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/prefix_adapterhub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/prefix_adapterhub.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/prompt.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/prompt.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/quantize-int8.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/quantize-int8.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/scale-precision.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/scale-precision.webp


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/soft_embedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Efficient Network Training/figures/soft_embedding.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - NLP tasks/Multi-Task-Classification.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - NLP tasks/Multi-Task-Classification.ipynb


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Sentence Embedding/figures/sbert-ablation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Sentence Embedding/figures/sbert-ablation.png


--------------------------------------------------------------------------------
/Code/04 - Huggingface/Appendix - Sentence Embedding/figures/sbert-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/04 - Huggingface/Appendix - Sentence Embedding/figures/sbert-architecture.png


--------------------------------------------------------------------------------
/Code/05 - Reasoning/02-model-distillation.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/02-model-distillation.md


--------------------------------------------------------------------------------
/Code/05 - Reasoning/03-deepseek.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Deepseek\n",
  8 |     "\n",
  9 |     "In today's lecture, we will delve into the emergence of DeepSeek, a Chinese AI startup that has recently garnered significant attention in the artificial intelligence community. Founded in 2023, DeepSeek has rapidly developed advanced language models, notably DeepSeek-R1 and DeepSeek-V3, which have demonstrated performance comparable to leading models from established organizations like OpenAI. \n",
 10 |     "\n",
 11 |     "<img src=\"./figures/deepseek-meme.jpeg\" >"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Key Techniques Employed by DeepSeek\n",
 19 |     "\n",
 20 |     "DeepSeek has implemented several innovative techniques in its model development:\n",
 21 |     "\n",
 22 |     "- **Mixture-of-Experts (MoE) Architecture**: DeepSeek-V3 utilizes an MoE architecture with 671 billion total parameters, where 37 billion are activated per token. This design allows the model to allocate computational resources efficiently, focusing on relevant subsets of parameters during processing. \n",
 23 |     "\n",
 24 |     "- **Multi-head Latent Attention (MLA)**: To enhance inference efficiency, DeepSeek employs MLA, which compresses the Key-Value cache into a latent vector. This compression reduces memory usage and accelerates processing times.\n",
 25 |     "\n",
 26 |     "- **Reinforcement Learning with Reward Engineering**: DeepSeek's training regimen includes large-scale reinforcement learning focused on reasoning tasks. The researchers developed a rule-based reward system to guide the model's learning process, which has proven more effective than traditional neural reward models. \n",
 27 |     "\n",
 28 |     "\n",
 29 |     "## Impact on the AI Industry\n",
 30 |     "\n",
 31 |     "DeepSeek's advancements have had a profound impact on the AI landscape:\n",
 32 |     "\n",
 33 |     "1. **Cost Efficiency**: By utilizing less advanced hardware and implementing optimization techniques, DeepSeek has significantly reduced the costs associated with training and deploying large-scale AI models. This approach challenges the prevailing notion that cutting-edge AI development necessitates substantial financial and computational resources. \n",
 34 |     "\n",
 35 |     "2. **Competitive Dynamics**: The impressive performance of DeepSeek's models has intensified competition among AI developers, prompting established companies to reassess their strategies and accelerate innovation to maintain their market positions.\n",
 36 |     "\n",
 37 |     "3. **Ethical and Legal Considerations**: DeepSeek's methods, particularly the use of AI distillation—a process involving training new models based on outputs from existing ones—have sparked discussions about intellectual property rights and the ethical implications of such practices.  "
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "# 1. Deepseek-V3"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "## 1.1 Multi-head Latent Attention-(MLA)\n",
 52 |     "<img src=\"./figures/mla.webp\" >\n",
 53 |     "\n",
 54 |     "We first introduce the standard MHA mechanism as background. \n",
 55 |     "Let $d$ be the embedding dimension, $n_h$ be the number of attention heads, $d_h$ be the dimension per head, and $\\mathbf{h}_{t} \\in \\mathbb{R}^{d}$ be the attention input of the $t$-th token at an attention layer. \n",
 56 |     "Standard MHA first produces $\\mathbf{q}_{t}, \\mathbf{k}_{t}, \\mathbf{v}_{t} \\in \\mathbb{R}^{d_h n_h}$ through three matrices $W^{Q}, W^{K}, W^{V} \\in \\mathbb{R}^{d_h n_h \\times d}$, respectively: \n",
 57 |     "\n",
 58 |     "$$\n",
 59 |     "\\mathbf{q}_{t} = W^{Q} \\mathbf{h}_{t},\n",
 60 |     "$$\n",
 61 |     "$$\n",
 62 |     "\\mathbf{k}_{t} = W^{K} \\mathbf{h}_{t},\n",
 63 |     "$$\n",
 64 |     "$$\n",
 65 |     "\\mathbf{v}_{t} = W^{V} \\mathbf{h}_{t}\n",
 66 |     "$$\n"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": []
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "## 1.2 Low-Rank Key-Value Joint Compression\n",
 81 |     "\n",
 82 |     "The core of \\dsattn{} is the low-rank joint compression for keys and values to reduce KV cache:\n",
 83 |     "\n",
 84 |     "$$\n",
 85 |     "\\mathbf{c}_{t}^{KV} = W^{DKV} \\mathbf{h}_{t},\n",
 86 |     "$$\n",
 87 |     "$$\n",
 88 |     "\\mathbf{k}_{t}^{C} = W^{UK} \\mathbf{c}_{t}^{KV},\n",
 89 |     "$$\n",
 90 |     "$$\n",
 91 |     "\\mathbf{v}_{t}^{C} = W^{UV} \\mathbf{c}_{t}^{KV}\n",
 92 |     "$$\n",
 93 |     "\n",
 94 |     "\n",
 95 |     "where $\\mathbf{c}_{t}^{KV} \\in \\mathbb{R}^{d_c}$ is the compressed latent vector for keys and values; \n",
 96 |     "$d_c (\\ll d_h n_h)$ denotes the KV compression dimension;\n",
 97 |     "$W^{DKV} \\in \\mathbb{R}^{d_c \\times d}$ is the down-projection matrix;\n",
 98 |     "and $W^{UK},W^{UV} \\in \\mathbb{R}^{d_h n_h \\times d_c}$ are the up-projection matrices for keys and values, respectively. \n",
 99 |     "During inference, \\dsattn{} only needs to cache $\\mathbf{c}_{t}^{KV}$, so its KV cache has only $d_{c}l$ elements, where $l$ denotes the number of layers. \n",
100 |     "In addition, during inference, since $W^{UK}$ can be absorbed into $W^{Q}$, and $W^{UV}$ can be absorbed into $W^{O}$, we even do not need to compute keys and values out for attention. \n",
101 |     "Figure~\\ref{fig:dsattn} intuitively illustrates how the KV joint compression in \\dsattn{} reduces the KV cache. \n",
102 |     "\n",
103 |     "Moreover, in order to reduce the activation memory during training, we also perform low-rank compression for the queries, even if it cannot reduce the KV cache:\n",
104 |     "\n",
105 |     "$$\n",
106 |     "\\mathbf{c}_{t}^{Q} = W^{DQ} \\mathbf{h}_{t}, \n",
107 |     "$$\n",
108 |     "$$\n",
109 |     "\\mathbf{q}_{t}^{C} = W^{UQ} \\mathbf{c}_{t}^{Q},\n",
110 |     "$$\n",
111 |     "\n",
112 |     "\n",
113 |     "\n",
114 |     "where $\\mathbf{c}_{t}^{Q} \\in \\mathbb{R}^{d_c^{\\prime}}$ is the compressed latent vector for queries; \n",
115 |     "$d_c^{\\prime} (\\ll d_h n_h)$ denotes the query compression dimension; \n",
116 |     "and $W^{DQ} \\in \\mathbb{R}^{d_c^{\\prime} \\times d}, W^{UQ} \\in \\mathbb{R}^{d_h n_h \\times d_c^{\\prime}}$ are the down-projection and up-projection matrices for queries, respectively. "
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "import torch\n",
126 |     "import torch.nn as nn\n",
127 |     "\n",
128 |     "class LoxoRankKVCompression(nn.Module):\n",
129 |     "    def __init__(self, d_model=512, n_heads=8, d_head=64, d_compression=32):\n",
130 |     "        super().__init__()\n",
131 |     "        self.d_model = d_model\n",
132 |     "        self.n_heads = n_heads\n",
133 |     "        self.d_head = d_head\n",
134 |     "        self.d_compression = d_compression\n",
135 |     "\n",
136 |     "        # Projection matrices\n",
137 |     "        self.W_DKV = nn.Linear(d_model, d_compression, bias=False)  # Down-projection\n",
138 |     "        self.W_UK = nn.Linear(d_compression, n_heads * d_head, bias=False)  # Key up-projection\n",
139 |     "        self.W_UV = nn.Linear(d_compression, n_heads * d_head, bias=False)  # Value up-projection\n",
140 |     "\n",
141 |     "    def forward(self, h_t, cache=None):\n",
142 |     "        \"\"\"Process one token step, returns compressed KV and reconstructed K/V\"\"\"\n",
143 |     "        # h_t shape: (batch_size, d_model)\n",
144 |     "        \n",
145 |     "        # Step 1: Joint KV compression (Equation 9)\n",
146 |     "        c_t_KV = self.W_DKV(h_t)  # (batch_size, d_compression)\n",
147 |     "        \n",
148 |     "        # Step 2: Cache management (store compressed representation)\n",
149 |     "        if cache is not None:\n",
150 |     "            cache.append(c_t_KV.detach())\n",
151 |     "        \n",
152 |     "        # Step 3: Up-projection to original dimensions (Equations 10-11)\n",
153 |     "        k_t_C = self.W_UK(c_t_KV).view(-1, self.n_heads, self.d_head)  # (batch_size, n_heads, d_head)\n",
154 |     "        v_t_C = self.W_UV(c_t_KV).view(-1, self.n_heads, self.d_head)  # (batch_size, n_heads, d_head)\n",
155 |     "        \n",
156 |     "        return k_t_C, v_t_C, c_t_KV\n",
157 |     "\n",
158 |     "# Example usage\n",
159 |     "batch_size = 1\n",
160 |     "d_model = 512\n",
161 |     "d_compression = 32\n",
162 |     "\n",
163 |     "# Initialize module\n",
164 |     "compressor = LoxoRankKVCompression(d_model=d_model, d_compression=d_compression)\n",
165 |     "\n",
166 |     "# Simulate hidden state for one token\n",
167 |     "h_t = torch.randn(batch_size, d_model)  # (1, 512)\n",
168 |     "\n",
169 |     "# Forward pass\n",
170 |     "compressed_k, compressed_v, c_t_KV = compressor(h_t)\n",
171 |     "\n",
172 |     "# During inference, we would only cache c_t_KV\n",
173 |     "kv_cache = [c_t_KV.detach()]\n",
174 |     "\n",
175 |     "print(\"Original hidden state size:\", h_t.shape)\n",
176 |     "print(\"Compressed KV cache size:\", c_t_KV.shape)\n",
177 |     "print(\"Reconstructed keys shape:\", compressed_k.shape)\n",
178 |     "print(\"Reconstructed values shape:\", compressed_v.shape)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "## 1.3 Decoupled Rotary Position Embedding\n",
186 |     "\n",
187 |     "Standard RoPE is incompatible with low-rank KV compression as done above. Decoupled RoPE strategy uses additional multi-head queries `q_t` and a shared key `k_t` to carry RoPE. This sums up the complete MLA computation as:\n",
188 |     "\n",
189 |     "$$\n",
190 |     "\\begin{aligned}\n",
191 |     "    [\\mathbf{q}_{t, 1}^{R};\\mathbf{q}_{t, 2}^{R};...;\\mathbf{q}_{t, n_{h}}^{R}] = \\mathbf{q}_{t}^{R} &= \\operatorname{RoPE}({W^{QR}} \\mathbf{c}_{t}^{Q}), \\\\\n",
192 |     "    \\mathbf{k}_{t}^{R} &= \\operatorname{RoPE}({W^{KR}} \\mathbf{h}_{t}), \\\\\n",
193 |     "    \\mathbf{q}_{t, i} &= [\\mathbf{q}_{t, i}^{C}; \\mathbf{q}_{t, i}^{R}], \\\\\n",
194 |     "    \\mathbf{k}_{t, i} &= [\\mathbf{k}_{t, i}^{C}; \\mathbf{k}_{t}^{R}], \\\\\n",
195 |     "    \\mathbf{o}_{t, i} &= \\sum_{j=1}^{t} \\operatorname{Softmax}_j\\left(\\frac{\\mathbf{q}_{t, i}^T \\mathbf{k}_{j, i}}{\\sqrt{d_{h} + d_{h}^{R}}}\\right) \\mathbf{v}_{j, i}^{C}, \\\\ \n",
196 |     "    \\mathbf{u}_{t} &= W^{O} [\\mathbf{o}_{t, 1};\\mathbf{o}_{t, 2};...;\\mathbf{o}_{t, n_{h}}].\n",
197 |     "\\end{aligned}\n",
198 |     "$$\n"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "metadata": {},
204 |    "source": [
205 |     "## 1.4 KV Cache"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "# Simplified generation loop with KV cache\n",
215 |     "def generate(input_ids, max_length=50):\n",
216 |     "    kv_cache = []  # Stores compressed KV states\n",
217 |     "    for _ in range(max_length):\n",
218 |     "        # Forward pass: compute logits and update cache\n",
219 |     "        logits, kv_cache = model(input_ids, kv_cache=kv_cache)\n",
220 |     "        # Sample next token\n",
221 |     "        next_token = sample(logits)\n",
222 |     "        input_ids = torch.cat([input_ids, next_token], dim=-1)\n",
223 |     "    return input_ids"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "metadata": {},
229 |    "source": [
230 |     "# 2. DeepSeek-R1 "
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "## 2.1 Gate Implementation"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "markdown",
242 |    "metadata": {},
243 |    "source": [
244 |     "## 2.2 Cold-start"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "## 2.3 Reasoning-Oriented Reinforcement Learning"
252 |    ]
253 |   }
254 |  ],
255 |  "metadata": {
256 |   "kernelspec": {
257 |    "display_name": "Python 3",
258 |    "language": "python",
259 |    "name": "python3"
260 |   },
261 |   "language_info": {
262 |    "codemirror_mode": {
263 |     "name": "ipython",
264 |     "version": 3
265 |    },
266 |    "file_extension": ".py",
267 |    "mimetype": "text/x-python",
268 |    "name": "python",
269 |    "nbconvert_exporter": "python",
270 |    "pygments_lexer": "ipython3",
271 |    "version": "3.9.6"
272 |   }
273 |  },
274 |  "nbformat": 4,
275 |  "nbformat_minor": 2
276 | }
277 | 


--------------------------------------------------------------------------------
/Code/05 - Reasoning/README.md:
--------------------------------------------------------------------------------
 1 | ## Papers on advanced prompting to improve reasoning
 2 | - [Chain-of-Thought Prompting Elicits Reasoning in Large Language Models (2022)](https://arxiv.org/abs/2201.11903): Using few-shot prompts to ask models to think step by step improves their reasoning. PaLM's score on math word problems (GSM8K) rises from 18% to 57%.
 3 | - [Self-Consistency Improves Chain of Thought Reasoning in Language Models (2022)](https://arxiv.org/abs/2203.11171): Taking votes from multiple outputs improves accuracy even more. Voting across 40 outputs raises PaLM's score on math word problems further, from 57% to 74%, and code-davinci-002's from 60% to 78%.
 4 | - [Tree of Thoughts: Deliberate Problem Solving with Large Language Models (2023)](https://arxiv.org/abs/2305.10601): Searching over trees of step by step reasoning helps even more than voting over chains of thought. It lifts GPT-4's scores on creative writing and crosswords.
 5 | - [Language Models are Zero-Shot Reasoners (2022)](https://arxiv.org/abs/2205.11916): Telling instruction-following models to think step by step improves their reasoning. It lifts text-davinci-002's score on math word problems (GSM8K) from 13% to 41%.
 6 | - [Large Language Models Are Human-Level Prompt Engineers (2023)](https://arxiv.org/abs/2211.01910): Automated searching over possible prompts found a prompt that lifts scores on math word problems (GSM8K) to 43%, 2 percentage points above the human-written prompt in Language Models are Zero-Shot Reasoners.
 7 | - [Reprompting: Automated Chain-of-Thought Prompt Inference Through Gibbs Sampling (2023)](https://arxiv.org/abs/2305.09993): Automated searching over possible chain-of-thought prompts improved ChatGPT's scores on a few benchmarks by 0–20 percentage points.
 8 | - [Faithful Reasoning Using Large Language Models (2022)](https://arxiv.org/abs/2208.14271): Reasoning can be improved by a system that combines: chains of thought generated by alternative selection and inference prompts, a halter model that chooses when to halt selection-inference loops, a value function to search over multiple reasoning paths, and sentence labels that help avoid hallucination.
 9 | - [STaR: Bootstrapping Reasoning With Reasoning (2022)](https://arxiv.org/abs/2203.14465): Chain of thought reasoning can be baked into models via fine-tuning. For tasks with an answer key, example chains of thoughts can be generated by language models.
10 | - [ReAct: Synergizing Reasoning and Acting in Language Models (2023)](https://arxiv.org/abs/2210.03629): For tasks with tools or an environment, chain of thought works better if you prescriptively alternate between Reasoning steps (thinking about what to do) and Acting (getting information from a tool or environment).
11 | - [Reflexion: an autonomous agent with dynamic memory and self-reflection (2023)](https://arxiv.org/abs/2303.11366): Retrying tasks with memory of prior failures improves subsequent performance.
12 | - [Demonstrate-Search-Predict: Composing retrieval and language models for knowledge-intensive NLP (2023)](https://arxiv.org/abs/2212.14024): Models augmented with knowledge via a "retrieve-then-read" can be improved with multi-hop chains of searches.
13 | - [Improving Factuality and Reasoning in Language Models through Multiagent Debate (2023)](https://arxiv.org/abs/2305.14325): Generating debates between a few ChatGPT agents over a few rounds improves scores on various benchmarks. Math word problem scores rise from 77% to 85%.


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/RAG-process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/RAG-process.png


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/allenembedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/allenembedding.png


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/ape.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/ape.jpeg


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/cot-prompting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/cot-prompting.png


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/deepseek-meme.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/deepseek-meme.jpeg


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/general-knowledge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/general-knowledge.png


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/memory.png


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/mla.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/mla.webp


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/react.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/react.png


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/retrieval.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/retrieval.jpeg


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/self-consistency.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/self-consistency.jpeg


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/tot.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/tot.jpeg


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/vectorstores.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/vectorstores.jpeg


--------------------------------------------------------------------------------
/Code/05 - Reasoning/figures/zero-shot-cot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/05 - Reasoning/figures/zero-shot-cot.png


--------------------------------------------------------------------------------
/Code/06 - RAG/.gitignore:
--------------------------------------------------------------------------------
1 | /vectordb_path
2 | /cache
3 | /developing
4 | /chatgpt-tutorial
5 | .env


--------------------------------------------------------------------------------
/Code/06 - RAG/2 - Langchain/01-prompt.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Natural Language Processing\n",
  9 |     "\n",
 10 |     "## Part 1: Introduction to Langchain\n",
 11 |     "LangChain is a framework for developing applications powered by language models. It enables applications that:\n",
 12 |     "1. `Are context-aware`: connect a LM to sources of context (prompt instructions, few shot examples, content to ground its response in, etc.)\n",
 13 |     "2. `Reason`: rely on a LM to reason (about how to answer based on provided context, what actions to take, etc.)\n",
 14 |     "\n",
 15 |     "## Use cases\n",
 16 |     "1. Document question answering (Law, Medicine)\n",
 17 |     "2. Chatbots (Marketing, HR Recruiter)\n",
 18 |     "3. Analyzing structured data (SQL, Report Documents)"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "attachments": {},
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "The most common and most important chain that LangChain helps create contains three things:\n",
 27 |     "\n",
 28 |     "1. `LLM`: The LM is the core reasoning engine here. In order to work with LangChain, you need to understand the different types of LMs and how to work with them.\n",
 29 |     "2. `Prompt Templates`: This provides instructions to the language model. This controls what the language model outputs, so understanding how to construct prompts and different prompting strategies is crucial.\n",
 30 |     "3. `Output Parsers`: These translate the raw response from the LLM to a more workable format, making it easy to use the output downstream."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import os\n",
 40 |     "# Set GPU device\n",
 41 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n",
 42 |     "\n",
 43 |     "os.environ['http_proxy']  = 'http://192.41.170.23:3128'\n",
 44 |     "os.environ['https_proxy'] = 'http://192.41.170.23:3128'"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 1,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "data": {
 54 |       "text/plain": [
 55 |        "'0.0.350'"
 56 |       ]
 57 |      },
 58 |      "execution_count": 1,
 59 |      "metadata": {},
 60 |      "output_type": "execute_result"
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "# !pip install langchain==0.0.350\n",
 65 |     "import langchain\n",
 66 |     "langchain.__version__"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "attachments": {},
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "## Part 2: Prompts\n",
 75 |     "\n",
 76 |     "A prompt for a language model is a set of instructions or input provided by a user to guide the model's response, helping it understand the context and generate relevant and coherent language-based output, such as answering questions, completing sentences, or engaging in a conversation.\n",
 77 |     "\n",
 78 |     "LangChain provides several classes and functions to help construct and work with prompts.\n",
 79 |     "\n",
 80 |     "- `Prompt templates`: Parametrized model inputs\n",
 81 |     "- `Example selectors`: Dynamically select examples to include in prompts (not include this lecture)\n",
 82 |     "\n",
 83 |     "### 2.1 Prompt templates\n",
 84 |     "Prompt templates are pre-defined recipes for generating prompts for language models.\n",
 85 |     "\n",
 86 |     "A template may include `instructions`, `few-shot examples`, and `specific context` and `questions appropriate` for a given task."
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 2,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stderr",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "/Users/guntsv/Library/Python/3.9/lib/python/site-packages/urllib3/__init__.py:34: NotOpenSSLWarning: urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n",
 99 |       "  warnings.warn(\n"
100 |      ]
101 |     },
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "PromptTemplate(input_variables=['adjective', 'content'], template='Tell me a {adjective} joke about {content}.')"
106 |       ]
107 |      },
108 |      "execution_count": 2,
109 |      "metadata": {},
110 |      "output_type": "execute_result"
111 |     }
112 |    ],
113 |    "source": [
114 |     "#For additional validation, specify input_variables explicitly. \n",
115 |     "#These variables will be compared against the variables present in the template string during instantiation\n",
116 |     "#raising an exception if there is a mismatch.\n",
117 |     "\n",
118 |     "from langchain.prompts import PromptTemplate\n",
119 |     "\n",
120 |     "prompt_template = PromptTemplate(\n",
121 |     "    input_variables=[\"adjective\", \"content\"],\n",
122 |     "    template=\"Tell me a {adjective} joke about {content}.\",\n",
123 |     ")\n",
124 |     "\n",
125 |     "prompt_template"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 3,
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "data": {
135 |       "text/plain": [
136 |        "'Tell me a funny joke about chickens.'"
137 |       ]
138 |      },
139 |      "execution_count": 3,
140 |      "metadata": {},
141 |      "output_type": "execute_result"
142 |     }
143 |    ],
144 |    "source": [
145 |     "prompt_template.format(adjective=\"funny\", content=\"chickens\")"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 3,
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "data": {
155 |       "text/plain": [
156 |        "'Tell me a funny joke about chickens.'"
157 |       ]
158 |      },
159 |      "execution_count": 3,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "#using from_template function will neglect input_variables params.\n",
166 |     "from langchain.prompts import PromptTemplate\n",
167 |     "\n",
168 |     "prompt_template = PromptTemplate.from_template(\n",
169 |     "    template = \"Tell me a {adjective} joke about {content}.\",\n",
170 |     ")\n",
171 |     "\n",
172 |     "prompt_template.format(adjective=\"funny\", content=\"chickens\")"
173 |    ]
174 |   },
175 |   {
176 |    "attachments": {},
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "### 2.2 ChatPromptTemplate\n",
181 |     "\n",
182 |     "The prompt to chat models is a list of chat messages.\n",
183 |     "\n",
184 |     "Each chat message is associated with content, and an additional parameter called `role`.\n",
185 |     "\n",
186 |     "For example, in the OpenAI Chat Completions API, a chat message can be associated with an AI assistant, a human or a system role.\n",
187 |     "\n",
188 |     "LangChain provides several objects to easily distinguish between different roles:\n",
189 |     "\n",
190 |     "- `SystemMessage`: A ChatMessage coming from the system.\n",
191 |     "- `AIMessage`: A ChatMessage coming from an AI/assistant.\n",
192 |     "- `HumanMessage`: A ChatMessage coming from a human/user.\n",
193 |     "- `FunctionMessage`: A ChatMessage coming from a function call."
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 15,
199 |    "metadata": {},
200 |    "outputs": [
201 |     {
202 |      "data": {
203 |       "text/plain": [
204 |        "[SystemMessage(content='You serve as the teacher assistant to Chaky, who instructs an NLP course. \\nYour primary responsibility is to assist students in successfully completing the NLP course.'),\n",
205 |        " HumanMessage(content='How to get A if I fail every quizs?')]"
206 |       ]
207 |      },
208 |      "execution_count": 15,
209 |      "metadata": {},
210 |      "output_type": "execute_result"
211 |     }
212 |    ],
213 |    "source": [
214 |     "from langchain.prompts import ChatPromptTemplate\n",
215 |     "from langchain.schema.messages import (\n",
216 |     "    AIMessage,\n",
217 |     "    HumanMessage,\n",
218 |     "    SystemMessage,\n",
219 |     "    ChatMessage\n",
220 |     ")\n",
221 |     "from langchain.prompts import HumanMessagePromptTemplate\n",
222 |     "\n",
223 |     "'''\n",
224 |     "HumanMessage: A message sent from the perspective of the human\n",
225 |     "AIMessage: A message sent from the perspective of the AI the human is interacting with\n",
226 |     "SystemMessage: A message setting the objectives the AI should follow\n",
227 |     "ChatMessage: A message allowing for arbitrary setting of role. You won’t be using this too much\n",
228 |     "'''\n",
229 |     "\n",
230 |     "chat_template = ChatPromptTemplate.from_messages(\n",
231 |     "    [\n",
232 |     "        SystemMessage(content=(\"\"\"You serve as the teacher assistant to Chaky, who instructs an NLP course. \n",
233 |     "Your primary responsibility is to assist students in successfully completing the NLP course.\"\"\")),\n",
234 |     "        HumanMessagePromptTemplate.from_template(\"{text}\"),\n",
235 |     "    ]\n",
236 |     ")\n",
237 |     "\n",
238 |     "chat_template.format_messages(text=\"How to get A if I fail every quizs?\")"
239 |    ]
240 |   },
241 |   {
242 |    "attachments": {},
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "## Appendix \n",
247 |     "- [Custom prompt template](https://python.langchain.com/docs/modules/model_io/prompts/prompt_templates/custom_prompt_template)\n",
248 |     "- [Few-shot prompt templates](https://python.langchain.com/docs/modules/model_io/prompts/prompt_templates/few_shot_examples)\n",
249 |     "- [Few-shot examples for chat models](https://python.langchain.com/docs/modules/model_io/prompts/prompt_templates/few_shot_examples_chat)"
250 |    ]
251 |   },
252 |   {
253 |    "attachments": {},
254 |    "cell_type": "markdown",
255 |    "metadata": {},
256 |    "source": []
257 |   }
258 |  ],
259 |  "metadata": {
260 |   "kernelspec": {
261 |    "display_name": "Python 3",
262 |    "language": "python",
263 |    "name": "python3"
264 |   },
265 |   "language_info": {
266 |    "codemirror_mode": {
267 |     "name": "ipython",
268 |     "version": 3
269 |    },
270 |    "file_extension": ".py",
271 |    "mimetype": "text/x-python",
272 |    "name": "python",
273 |    "nbconvert_exporter": "python",
274 |    "pygments_lexer": "ipython3",
275 |    "version": "3.12.6"
276 |   },
277 |   "orig_nbformat": 4
278 |  },
279 |  "nbformat": 4,
280 |  "nbformat_minor": 2
281 | }
282 | 


--------------------------------------------------------------------------------
/Code/06 - RAG/3 - Query-Translation/01-multi-query.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/3 - Query-Translation/01-multi-query.ipynb


--------------------------------------------------------------------------------
/Code/06 - RAG/3 - Query-Translation/02-rag-fusion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "id": "993c2768",
  7 |    "metadata": {},
  8 |    "source": [
  9 |     "# RAG Fusion\n",
 10 |     "\n",
 11 |     "Re-implemented from [this GitHub repo](https://github.com/Raudaschl/rag-fusion), all credit to original author\n",
 12 |     "\n",
 13 |     "> RAG-Fusion, a search methodology that aims to bridge the gap between traditional search paradigms and the multifaceted dimensions of human queries. Inspired by the capabilities of Retrieval Augmented Generation (RAG), this project goes a step further by employing multiple query generation and Reciprocal Rank Fusion to re-rank search results."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "attachments": {},
 18 |    "cell_type": "markdown",
 19 |    "id": "ebcc6791",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Setup\n",
 23 |     "\n",
 24 |     "For this example, we will use Pinecone and some fake data"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "id": "661a1c36",
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import pinecone\n",
 35 |     "from langchain.embeddings import OpenAIEmbeddings\n",
 36 |     "from langchain.vectorstores import Pinecone\n",
 37 |     "\n",
 38 |     "pinecone.init(api_key=\"...\", environment=\"...\")"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "id": "48ef7e93",
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "all_documents = {\n",
 49 |     "    \"doc1\": \"Climate change and economic impact.\",\n",
 50 |     "    \"doc2\": \"Public health concerns due to climate change.\",\n",
 51 |     "    \"doc3\": \"Climate change: A social perspective.\",\n",
 52 |     "    \"doc4\": \"Technological solutions to climate change.\",\n",
 53 |     "    \"doc5\": \"Policy changes needed to combat climate change.\",\n",
 54 |     "    \"doc6\": \"Climate change and its impact on biodiversity.\",\n",
 55 |     "    \"doc7\": \"Climate change: The science and models.\",\n",
 56 |     "    \"doc8\": \"Global warming: A subset of climate change.\",\n",
 57 |     "    \"doc9\": \"How climate change affects daily weather.\",\n",
 58 |     "    \"doc10\": \"The history of climate change activism.\",\n",
 59 |     "}"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "id": "fde89f0b",
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "vectorstore = Pinecone.from_texts(\n",
 70 |     "    list(all_documents.values()), OpenAIEmbeddings(), index_name=\"rag-fusion\"\n",
 71 |     ")"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "attachments": {},
 76 |    "cell_type": "markdown",
 77 |    "id": "22ddd041",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "## Define the Query Generator\n",
 81 |     "\n",
 82 |     "We will now define a chain to do the query generation"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 7,
 88 |    "id": "1d547524",
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "from langchain.chat_models import ChatOpenAI\n",
 93 |     "from langchain.schema.output_parser import StrOutputParser"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 68,
 99 |    "id": "af9ab4db",
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "from langchain import hub\n",
104 |     "\n",
105 |     "prompt = hub.pull(\"langchain-ai/rag-fusion-query-generation\")"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 3,
111 |    "id": "3628b552",
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "# prompt = ChatPromptTemplate.from_messages([\n",
116 |     "#     (\"system\", \"You are a helpful assistant that generates multiple search queries based on a single input query.\"),\n",
117 |     "#     (\"user\", \"Generate multiple search queries related to: {original_query}\"),\n",
118 |     "#     (\"user\", \"OUTPUT (4 queries):\")\n",
119 |     "# ])"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 5,
125 |    "id": "8d6cbb73",
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "generate_queries = (\n",
130 |     "    prompt | ChatOpenAI(temperature=0) | StrOutputParser() | (lambda x: x.split(\"\\n\"))\n",
131 |     ")"
132 |    ]
133 |   },
134 |   {
135 |    "attachments": {},
136 |    "cell_type": "markdown",
137 |    "id": "ee2824cd",
138 |    "metadata": {},
139 |    "source": [
140 |     "## Define the full chain\n",
141 |     "\n",
142 |     "We can now put it all together and define the full chain. This chain:\n",
143 |     "    \n",
144 |     "    1. Generates a bunch of queries\n",
145 |     "    2. Looks up each query in the retriever\n",
146 |     "    3. Joins all the results together using reciprocal rank fusion\n",
147 |     "    \n",
148 |     "    \n",
149 |     "Note that it does NOT do a final generation step"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 50,
155 |    "id": "ca0bfec4",
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "original_query = \"impact of climate change\""
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 75,
165 |    "id": "02437d65",
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "vectorstore = Pinecone.from_existing_index(\"rag-fusion\", OpenAIEmbeddings())\n",
170 |     "retriever = vectorstore.as_retriever()"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 76,
176 |    "id": "46a9a0e6",
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": [
180 |     "from langchain.load import dumps, loads\n",
181 |     "\n",
182 |     "\n",
183 |     "def reciprocal_rank_fusion(results: list[list], k=60):\n",
184 |     "    fused_scores = {}\n",
185 |     "    for docs in results:\n",
186 |     "        # Assumes the docs are returned in sorted order of relevance\n",
187 |     "        for rank, doc in enumerate(docs):\n",
188 |     "            doc_str = dumps(doc)\n",
189 |     "            if doc_str not in fused_scores:\n",
190 |     "                fused_scores[doc_str] = 0\n",
191 |     "            previous_score = fused_scores[doc_str]\n",
192 |     "            fused_scores[doc_str] += 1 / (rank + k)\n",
193 |     "\n",
194 |     "    reranked_results = [\n",
195 |     "        (loads(doc), score)\n",
196 |     "        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)\n",
197 |     "    ]\n",
198 |     "    return reranked_results"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 77,
204 |    "id": "3f9d4502",
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "chain = generate_queries | retriever.map() | reciprocal_rank_fusion"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 78,
214 |    "id": "d70c4fcd",
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "data": {
219 |       "text/plain": [
220 |        "[(Document(page_content='Climate change and economic impact.'),\n",
221 |        "  0.06558258417063283),\n",
222 |        " (Document(page_content='Climate change: A social perspective.'),\n",
223 |        "  0.06400409626216078),\n",
224 |        " (Document(page_content='How climate change affects daily weather.'),\n",
225 |        "  0.04787506400409626),\n",
226 |        " (Document(page_content='Climate change and its impact on biodiversity.'),\n",
227 |        "  0.03306010928961749),\n",
228 |        " (Document(page_content='Public health concerns due to climate change.'),\n",
229 |        "  0.016666666666666666),\n",
230 |        " (Document(page_content='Technological solutions to climate change.'),\n",
231 |        "  0.016666666666666666),\n",
232 |        " (Document(page_content='Policy changes needed to combat climate change.'),\n",
233 |        "  0.01639344262295082)]"
234 |       ]
235 |      },
236 |      "execution_count": 78,
237 |      "metadata": {},
238 |      "output_type": "execute_result"
239 |     }
240 |    ],
241 |    "source": [
242 |     "chain.invoke({\"original_query\": original_query})"
243 |    ]
244 |   }
245 |  ],
246 |  "metadata": {
247 |   "kernelspec": {
248 |    "display_name": "Python 3 (ipykernel)",
249 |    "language": "python",
250 |    "name": "python3"
251 |   },
252 |   "language_info": {
253 |    "codemirror_mode": {
254 |     "name": "ipython",
255 |     "version": 3
256 |    },
257 |    "file_extension": ".py",
258 |    "mimetype": "text/x-python",
259 |    "name": "python",
260 |    "nbconvert_exporter": "python",
261 |    "pygments_lexer": "ipython3",
262 |    "version": "3.10.1"
263 |   }
264 |  },
265 |  "nbformat": 4,
266 |  "nbformat_minor": 5
267 | }
268 | 


--------------------------------------------------------------------------------
/Code/06 - RAG/3 - Query-Translation/03-query-decomposition.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/3 - Query-Translation/03-query-decomposition.ipynb


--------------------------------------------------------------------------------
/Code/06 - RAG/3 - Query-Translation/04-step-back.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/3 - Query-Translation/04-step-back.ipynb


--------------------------------------------------------------------------------
/Code/06 - RAG/3 - Query-Translation/05-HyDE.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/3 - Query-Translation/05-HyDE.ipynb


--------------------------------------------------------------------------------
/Code/06 - RAG/3 - Query-Translation/06-rewrite-retrieve-read.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "260629f9",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Rewrite-Retrieve-Read\n",
  9 |     "\n",
 10 |     "**Rewrite-Retrieve-Read** is a method proposed in the paper [Query Rewriting for Retrieval-Augmented Large Language Models](https://arxiv.org/pdf/2305.14283.pdf)\n",
 11 |     "\n",
 12 |     "> Because the original query can not be always optimal to retrieve for the LLM, especially in the real world... we first prompt an LLM to rewrite the queries, then conduct retrieval-augmented reading\n",
 13 |     "\n",
 14 |     "We show how you can easily do that with LangChain Expression Language"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "id": "eda93712",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Baseline\n",
 23 |     "\n",
 24 |     "Baseline RAG (**Retrieve-and-read**) can be done like the following:"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 1,
 30 |    "id": "1d2edbd2",
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "from langchain.chat_models import ChatOpenAI\n",
 35 |     "from langchain.prompts import ChatPromptTemplate\n",
 36 |     "from langchain.schema.output_parser import StrOutputParser\n",
 37 |     "from langchain.schema.runnable import RunnablePassthrough\n",
 38 |     "from langchain.utilities import DuckDuckGoSearchAPIWrapper"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 2,
 44 |    "id": "86a46aa9",
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "template = \"\"\"Answer the users question based only on the following context:\n",
 49 |     "\n",
 50 |     "<context>\n",
 51 |     "{context}\n",
 52 |     "</context>\n",
 53 |     "\n",
 54 |     "Question: {question}\n",
 55 |     "\"\"\"\n",
 56 |     "prompt = ChatPromptTemplate.from_template(template)\n",
 57 |     "\n",
 58 |     "model = ChatOpenAI(temperature=0)\n",
 59 |     "\n",
 60 |     "search = DuckDuckGoSearchAPIWrapper()\n",
 61 |     "\n",
 62 |     "\n",
 63 |     "def retriever(query):\n",
 64 |     "    return search.run(query)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 3,
 70 |    "id": "8566d48e",
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "chain = (\n",
 75 |     "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
 76 |     "    | prompt\n",
 77 |     "    | model\n",
 78 |     "    | StrOutputParser()\n",
 79 |     ")"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 4,
 85 |    "id": "5c57f9ee",
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "simple_query = \"what is langchain?\""
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 5,
 95 |    "id": "37c5f962",
 96 |    "metadata": {
 97 |     "scrolled": false
 98 |    },
 99 |    "outputs": [
100 |     {
101 |      "data": {
102 |       "text/plain": [
103 |        "\"LangChain is a powerful and versatile Python library that enables developers and researchers to create, experiment with, and analyze language models and agents. It simplifies the development of language-based applications by providing a suite of features for artificial general intelligence. It can be used to build chatbots, perform document analysis and summarization, and streamline interaction with various large language model providers. LangChain's unique proposition is its ability to create logical links between one or more language models, known as Chains. It is an open-source library that offers a generic interface to foundation models and allows prompt management and integration with other components and tools.\""
104 |       ]
105 |      },
106 |      "execution_count": 5,
107 |      "metadata": {},
108 |      "output_type": "execute_result"
109 |     }
110 |    ],
111 |    "source": [
112 |     "chain.invoke(simple_query)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "id": "23bdb9bd",
118 |    "metadata": {},
119 |    "source": [
120 |     "While this is fine for well formatted queries, it can break down for more complicated queries"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 6,
126 |    "id": "8df6a814",
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "distracted_query = \"man that sam bankman fried trial was crazy! what is langchain?\""
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 7,
136 |    "id": "16d7db64",
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/plain": [
142 |        "'Based on the given context, there is no information provided about \"langchain.\"'"
143 |       ]
144 |      },
145 |      "execution_count": 7,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "chain.invoke(distracted_query)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "id": "0b4f8b93",
157 |    "metadata": {},
158 |    "source": [
159 |     "This is because the retriever does a bad job with these \"distracted\" queries"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 8,
165 |    "id": "3439d8dc",
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "data": {
170 |       "text/plain": [
171 |        "'Business She\\'s the star witness against Sam Bankman-Fried. Her testimony was explosive Gary Wang, who co-founded both FTX and Alameda Research, said Bankman-Fried directed him to change a... The Verge, following the trial\\'s Oct. 4 kickoff: \"Is Sam Bankman-Fried\\'s Defense Even Trying to Win?\". CBS Moneywatch, from Thursday: \"Sam Bankman-Fried\\'s Lawyer Struggles to Poke ... Sam Bankman-Fried, FTX\\'s founder, responded with a single word: \"Oof.\". Less than a year later, Mr. Bankman-Fried, 31, is on trial in federal court in Manhattan, fighting criminal charges ... July 19, 2023. A U.S. judge on Wednesday overruled objections by Sam Bankman-Fried\\'s lawyers and allowed jurors in the FTX founder\\'s fraud trial to see a profane message he sent to a reporter days ... Sam Bankman-Fried, who was once hailed as a virtuoso in cryptocurrency trading, is on trial over the collapse of FTX, the financial exchange he founded. Bankman-Fried is accused of...'"
172 |       ]
173 |      },
174 |      "execution_count": 8,
175 |      "metadata": {},
176 |      "output_type": "execute_result"
177 |     }
178 |    ],
179 |    "source": [
180 |     "retriever(distracted_query)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "id": "7eb748ac",
186 |    "metadata": {},
187 |    "source": [
188 |     "## Rewrite-Retrieve-Read Implementation\n",
189 |     "\n",
190 |     "The main part is a rewriter to rewrite the search query"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 9,
196 |    "id": "88ae702e",
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "template = \"\"\"Provide a better search query for \\\n",
201 |     "web search engine to answer the given question, end \\\n",
202 |     "the queries with ’**’. Question: \\\n",
203 |     "{x} Answer:\"\"\"\n",
204 |     "rewrite_prompt = ChatPromptTemplate.from_template(template)"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 10,
210 |    "id": "184e1bcb",
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "from langchain import hub\n",
215 |     "\n",
216 |     "rewrite_prompt = hub.pull(\"langchain-ai/rewrite\")"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 11,
222 |    "id": "a4c23d40",
223 |    "metadata": {},
224 |    "outputs": [
225 |     {
226 |      "name": "stdout",
227 |      "output_type": "stream",
228 |      "text": [
229 |       "Provide a better search query for web search engine to answer the given question, end the queries with ’**’.  Question {x} Answer:\n"
230 |      ]
231 |     }
232 |    ],
233 |    "source": [
234 |     "print(rewrite_prompt.template)"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 12,
240 |    "id": "f55cd010",
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "# Parser to remove the `**`\n",
245 |     "\n",
246 |     "\n",
247 |     "def _parse(text):\n",
248 |     "    return text.strip(\"**\")"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 13,
254 |    "id": "c9c34bef",
255 |    "metadata": {},
256 |    "outputs": [],
257 |    "source": [
258 |     "rewriter = rewrite_prompt | ChatOpenAI(temperature=0) | StrOutputParser() | _parse"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 14,
264 |    "id": "fb17fb3d",
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "data": {
269 |       "text/plain": [
270 |        "'What is the definition and purpose of Langchain?'"
271 |       ]
272 |      },
273 |      "execution_count": 14,
274 |      "metadata": {},
275 |      "output_type": "execute_result"
276 |     }
277 |    ],
278 |    "source": [
279 |     "rewriter.invoke({\"x\": distracted_query})"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": 15,
285 |    "id": "f83edb09",
286 |    "metadata": {},
287 |    "outputs": [],
288 |    "source": [
289 |     "rewrite_retrieve_read_chain = (\n",
290 |     "    {\n",
291 |     "        \"context\": {\"x\": RunnablePassthrough()} | rewriter | retriever,\n",
292 |     "        \"question\": RunnablePassthrough(),\n",
293 |     "    }\n",
294 |     "    | prompt\n",
295 |     "    | model\n",
296 |     "    | StrOutputParser()\n",
297 |     ")"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 16,
303 |    "id": "43096322",
304 |    "metadata": {},
305 |    "outputs": [
306 |     {
307 |      "data": {
308 |       "text/plain": [
309 |        "'Based on the given context, LangChain is an open-source framework designed to simplify the creation of applications using large language models (LLMs). It enables LLM models to generate responses based on up-to-date online information and simplifies the organization of large volumes of data for easy access by LLMs. LangChain offers a standard interface for chains, integrations with other tools, and end-to-end chains for common applications. It is a robust library that streamlines interaction with various LLM providers. LangChain\\'s unique proposition is its ability to create logical links between one or more LLMs, known as Chains. It is an AI framework with features that simplify the development of language-based applications and offers a suite of features for artificial general intelligence. However, the context does not provide any information about the \"sam bankman fried trial\" mentioned in the question.'"
310 |       ]
311 |      },
312 |      "execution_count": 16,
313 |      "metadata": {},
314 |      "output_type": "execute_result"
315 |     }
316 |    ],
317 |    "source": [
318 |     "rewrite_retrieve_read_chain.invoke(distracted_query)"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "id": "59874b4f",
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": []
328 |   }
329 |  ],
330 |  "metadata": {
331 |   "kernelspec": {
332 |    "display_name": "Python 3 (ipykernel)",
333 |    "language": "python",
334 |    "name": "python3"
335 |   },
336 |   "language_info": {
337 |    "codemirror_mode": {
338 |     "name": "ipython",
339 |     "version": 3
340 |    },
341 |    "file_extension": ".py",
342 |    "mimetype": "text/x-python",
343 |    "name": "python",
344 |    "nbconvert_exporter": "python",
345 |    "pygments_lexer": "ipython3",
346 |    "version": "3.10.1"
347 |   }
348 |  },
349 |  "nbformat": 4,
350 |  "nbformat_minor": 5
351 | }
352 | 


--------------------------------------------------------------------------------
/Code/06 - RAG/4 - RAG-techniques/1 - CRAG.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/4 - RAG-techniques/1 - CRAG.ipynb


--------------------------------------------------------------------------------
/Code/06 - RAG/4 - RAG-techniques/2 - SelfRAG.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/4 - RAG-techniques/2 - SelfRAG.ipynb


--------------------------------------------------------------------------------
/Code/06 - RAG/4 - RAG-techniques/3 - AdaptiveRAG.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/4 - RAG-techniques/3 - AdaptiveRAG.ipynb


--------------------------------------------------------------------------------
/Code/06 - RAG/README.md:
--------------------------------------------------------------------------------
  1 | # Retrieval Augmented Generation (RAG)
  2 | 
  3 | General-purpose language models can be fine-tuned for tasks like sentiment analysis and named entity recognition, which don’t require extra background knowledge.  
  4 | 
  5 | For more complex, knowledge-intensive tasks, integrating external knowledge sources can enhance factual accuracy, improve reliability, and reduce hallucinations.  
  6 | 
  7 | To tackle this, Meta AI introduced **Retrieval-Augmented Generation (RAG)**, which combines information retrieval with text generation. Instead of relying solely on a model’s static knowledge, RAG retrieves relevant documents (e.g., from Wikipedia) and uses them as context when generating responses. This allows models to stay updated without requiring frequent retraining.  
  8 | 
  9 | [Lewis et al. (2021)](https://arxiv.org/pdf/2005.11401) proposed a fine-tuning approach for RAG, where a pre-trained seq2seq model serves as parametric memory, and a dense vector index of Wikipedia functions as non-parametric memory, accessed via a neural retriever.
 10 | 
 11 | <img src="./figures/rag-lewis.png" >
 12 | 
 13 | ## Components in a RAG pipeline:
 14 | **Retriever component:**  retrieves additional context from an external database for the LLM to answer the query.
 15 |     
 16 | **Generator component:** generates an answer based on a prompt augmented with the retrieved information.
 17 | 
 18 | ## Existing RAG Techniques
 19 | Here are the details of all the Advanced RAG techniques covered in this repository.
 20 | 
 21 | | Techniques | Description |
 22 | | --- | --- |
 23 | | Native RAG | Combines retrieved data with LLMs for simple and effective responses. |
 24 | | Hybrid RAG | Combines vector search and traditional methods like BM25 for better information retrieval. |
 25 | | Hyde RAG | Creates hypothetical document embeddings to find relevant information for a query. |
 26 | | Parent Document Retriever | Breaks large documents into small parts and retrieves the full document if a part matches the query. |
 27 | | RAG fusion | Generates sub-queries, ranks documents with Reciprocal Rank Fusion, and uses top results for accurate responses. |
 28 | | Contextual RAG | Compresses retrieved documents to keep only relevant details for concise and accurate responses. |
 29 | | Rewrite Retrieve Read (RRR) | Improves query, retrieves better data, and generates accurate answers. |
 30 | | Unstructured RAG | This method designed to handle documents that combine text, tables, and images. |
 31 | 
 32 | ## RAG
 33 | - Agentic RAG: Use an agent to figure out how to retrieve the most relevant information before using the retrieved information to answer the user's question.
 34 | - Adaptive RAG: Adaptive RAG is a strategy for RAG that unites (1) query analysis with (2) active / self-corrective RAG. Implementation of: https://arxiv.org/abs/2403.14403
 35 |   - For a version that uses a local LLM: Adaptive RAG using local LLMs
 36 | - Corrective RAG: Uses an LLM to grade the quality of the retrieved information from the given source, and if the quality is low, it will try to retrieve the information from another source. Implementation of: https://arxiv.org/pdf/2401.15884.pdf
 37 |   - For a version that uses a local LLM: Corrective RAG using local LLMs
 38 | - Self-RAG: Self-RAG is a strategy for RAG that incorporates self-reflection / self-grading on retrieved documents and generations. Implementation of https://arxiv.org/abs/2310.11511.
 39 |   - For a version that uses a local LLM: Self-RAG using local LLMs
 40 | - SQL Agent: Build a SQL agent that can answer questions about a SQL database.
 41 | 
 42 | ## Evaluation
 43 | 
 44 | Why BLEU and ROUGE Fall Short for RAG
 45 | 
 46 | In RAG applications, the primary goal is not just to generate responses that look similar to reference answers but to ensure that the responses are factually correct, relevant, and supported by retrieved documents. BLEU and ROUGE, while useful for general text generation tasks, do not adequately address these needs. This is where RAGAS metrics, specifically designed for RAG models, become essential.
 47 | 
 48 | <img src="./figures/ragas-score.webp" >
 49 | 
 50 | RAGAS represent evaluate the retrieval component (context_relevancy and context_recall) and the generative component (faithfulness and answer_relevancy) separately
 51 | 
 52 | **Faithfulness:** measures the factual accuracy of the generated response based on the retrieved documents.
 53 | 
 54 | **Answer Relevancy:** evaluates how relevant the generated response is to the original query.
 55 | 
 56 | **Context Precision:** measures the precision of the retrieved documents in providing relevant information to the query.
 57 | 
 58 | **Context Recall:** assesses how well the retrieved documents cover all relevant aspects of the query.
 59 | 
 60 | ## Sample RAGAS implementations
 61 | ```
 62 | from datasets import Dataset
 63 | from ragas import evaluate
 64 | from ragas.metrics import (
 65 |     answer_relevancy,
 66 |     faithfulness,
 67 |     context_recall,
 68 |     context_precision,
 69 |     context_entity_recall,
 70 |     answer_similarity,
 71 |     answer_correctness
 72 | )
 73 | 
 74 | from ragas.metrics.critique import (
 75 |     harmfulness, 
 76 |     maliciousness, 
 77 |     coherence, 
 78 |     correctness, 
 79 |     conciseness
 80 | )
 81 | 
 82 | # Example data
 83 | data = {
 84 |     "query": ["What is the capital of France?"],
 85 |     "generated_response": ["Paris is the capital of France."],
 86 |     "retrieved_documents": [["Paris is the capital of France. It is a major European city known for its culture."]]
 87 | }
 88 | 
 89 | # Convert the data to a Hugging Face Dataset
 90 | dataset = Dataset.from_dict(data)
 91 | 
 92 | # Define the metrics you want to evaluate
 93 | metrics = [
 94 |     faithfulness,
 95 |     answer_relevancy,
 96 |     answer_correctness,
 97 |     context_precision,
 98 |     context_recall,
 99 | ]
100 | 
101 | # Evaluate the dataset using the selected metrics
102 | results = evaluate(dataset, metrics)
103 | 
104 | # Display the results
105 | for metric_name, score in results.items():
106 |     print(f"{metric_name}: {score:.2f}")
107 | ```
108 | 
109 | ## Appendix 
110 | - Semantic Chunking
111 |   - Breakpoint-based Semantic Chunker
112 |   - Clustering-based Semantic Chunker
113 | - Agentic Chunking
114 | - [Qu et al. (2024)](https://arxiv.org/abs/2410.13070) Is Semantic Chunking Worth the Computational Cost? 
115 | - [Chen et al. (2024)](https://arxiv.org/pdf/2312.06648) Dense X Retrieval: What Retrieval Granularity Should We Use?
116 | - [Jiang et al. (2024)](https://arxiv.org/pdf/2406.15319v1) LongRAG: Enhancing Retrieval-Augmented Generation with Long-context LLMs
117 | - [Langchain-RAG](https://github.com/langchain-ai/rag-from-scratch?tab=readme-ov-file)


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/RAG-process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/RAG-process.png


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/RAG_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/RAG_workflow.png


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/allenembedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/allenembedding.png


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/cot-prompting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/cot-prompting.png


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/ir.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/ir.jpg


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/memory.png


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/rag-lewis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/rag-lewis.png


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/ragas-score.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/ragas-score.webp


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/retrieval.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/retrieval.jpeg


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/vectorstores.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/vectorstores.jpeg


--------------------------------------------------------------------------------
/Code/06 - RAG/figures/zero-shot-cot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/06 - RAG/figures/zero-shot-cot.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/README.md:
--------------------------------------------------------------------------------
 1 | # Reinforcement Learning with Human Feedback
 2 | 
 3 | 1. **Custom Dataset:** This model, which involves the creation and utilization of a custom dataset, can be considered the foundational step in the RLHF series.
 4 | 
 5 | 2. **InstructionGPT:** Following the development of models using custom datasets, the next logical step might be the introduction of InstructionGPT, leveraging the insights gained from the initial custom dataset.
 6 | 
 7 | 3. **SFT (Supervised Fine-tuning Trainer):** A tool that helps you easily adapt pre-trained models to specific tasks using labeled data, like teaching a language model to summarize news articles.
 8 | 
 9 | 4. **PPO (Proximal Policy Optimization):** As reinforcement learning used to train in tasks where an agent interacts with an environment to learn optimal behaviors. PPO aims to improve stability and sample efficiency in comparison to other policy optimization methods by constraining the policy updates to prevent large policy changes during training.
10 | 
11 | 4. **DPO (Direct Preference Optimization):** A stable and computationally lightweight algorithm for fine-tuning large-scale unsupervised language models, enabling precise control of their behavior by directly parameterizing the reward model and solving the reinforcement learning from human feedback problem with a simple classification loss, demonstrating effectiveness in aligning with human preferences across various tasks without the complexities of traditional RL methods.
12 | 
13 | 5. **RRHF (Rank Responses to Align Language Models with Human Feedback without tears):** A simpler and more efficient alternative to traditional methods like Proximal Policy Optimization (PPO) for aligning large language models with human preferences, achieving comparable performance in alignment with PPO on the Helpful and Harmless dataset while requiring only 1 to 2 models and avoiding complex hyperparameter tuning.
14 | 
15 | 
16 | This timeline highlights some key milestones, but it's crucial to remember that RLHF research is a rapidly evolving field with constant advancements. Many other relevant papers and projects exist, and the publication order can vary depending on specific subfields and applications.
17 | 
18 | For more specific information, I recommend searching for resources based on specific keywords or research areas within RLHF. Consider exploring databases like arXiv or Google Scholar with relevant queries like "reinforcement learning from human feedback," "human preferences in RL," or "policy learning with human guidance."
19 | 
20 | I hope this helps! Feel free to ask if you have any further questions about specific RLHF research or want to delve deeper into particular areas.
21 | 
22 | 
23 | # Reference Resource & code
24 | - https://vijayasriiyer.medium.com/rlhf-training-pipeline-for-llms-using-huggingface-821b76fc45c4


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/figures/Dromedary-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/07 - Human Preferences/figures/Dromedary-2.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/figures/RLHF_w_custom_dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/07 - Human Preferences/figures/RLHF_w_custom_dataset.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/figures/instructGPT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/07 - Human Preferences/figures/instructGPT.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/figures/label_studio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/07 - Human Preferences/figures/label_studio.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/figures/salmon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/07 - Human Preferences/figures/salmon.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/figures/u-net-architecture-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/07 - Human Preferences/figures/u-net-architecture-2.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/figures/u-net-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/07 - Human Preferences/figures/u-net-architecture.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/figures/workflow-rrhf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/07 - Human Preferences/figures/workflow-rrhf.png


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/huggingface/00-TRL.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# TRL - Transformer Reinforcement Learning\n",
  9 |     "\n",
 10 |     "TRL is a full stack library where we provide a set of tools to train transformer language models with Reinforcement Learning, from the Supervised Fine-tuning step (SFT), Reward Modeling step (RM) to the Proximal Policy Optimization (PPO) step. The library is integrated with 🤗 transformers.\n",
 11 |     "\n",
 12 |     "`pip install trl`"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 4,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import os\n",
 22 |     "import torch\n",
 23 |     "\n",
 24 |     "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments,BitsAndBytesConfig\n",
 25 |     "from datasets import load_dataset\n",
 26 |     "from trl import SFTTrainer\n",
 27 |     "from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training\n",
 28 |     "import torch\n",
 29 |     "import bitsandbytes as bnb\n",
 30 |     "from datasets import load_dataset\n"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "attachments": {},
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "## Prepare Dataset"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 8,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "data": {
 48 |       "text/plain": [
 49 |        "DatasetDict({\n",
 50 |        "    test: Dataset({\n",
 51 |        "        features: ['prompt', 'completion'],\n",
 52 |        "        num_rows: 327\n",
 53 |        "    })\n",
 54 |        "})"
 55 |       ]
 56 |      },
 57 |      "execution_count": 8,
 58 |      "metadata": {},
 59 |      "output_type": "execute_result"
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "output_dir=\"./results\"\n",
 64 |     "model_name =\"lmsys/fastchat-t5-3b-v1.0\"\n",
 65 |     "\n",
 66 |     "dataset = load_dataset(\"HuggingFaceH4/instruction-dataset\")\n",
 67 |     "dataset = dataset.remove_columns(\"meta\")\n",
 68 |     "dataset"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "attachments": {},
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "# Model"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 9,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "def find_all_linear_names(model):\n",
 86 |     "    cls = bnb.nn.Linear4bit\n",
 87 |     "    lora_module_names = set()\n",
 88 |     "    for name, module in model.named_modules():\n",
 89 |     "        if isinstance(module, cls):\n",
 90 |     "            names = name.split('.')\n",
 91 |     "            lora_module_names.add(names[0] if len(names) == 1 else names[-1])\n",
 92 |     "\n",
 93 |     "    return list(lora_module_names)\n",
 94 |     "\n",
 95 |     "\n",
 96 |     "def print_trainable_parameters(model):\n",
 97 |     "  \"\"\"\n",
 98 |     "  Prints the number of trainable parameters in the model.\n",
 99 |     "  \"\"\"\n",
100 |     "  trainable_params = 0\n",
101 |     "  all_param = 0\n",
102 |     "  for _, param in model.named_parameters():\n",
103 |     "    all_param += param.numel()\n",
104 |     "    if param.requires_grad:\n",
105 |     "      trainable_params += param.numel()\n",
106 |     "  print(\n",
107 |     "      f\"trainable params: {trainable_params} || all params: {all_param} || trainables%: {100 * trainable_params / all_param}\"\n",
108 |     "  )"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "bnb_config = BitsAndBytesConfig(\n",
118 |     "    load_in_4bit=True,\n",
119 |     "    bnb_4bit_quant_type=\"nf4\",\n",
120 |     "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
121 |     ")\n",
122 |     "\n",
123 |     "base_model = AutoModelForCausalLM.from_pretrained(\n",
124 |     "    model_name, \n",
125 |     "    torch_dtype=torch.bfloat16, \n",
126 |     "    quantization_config=bnb_config)\n",
127 |     "\n",
128 |     "base_model.config.use_cache = False\n",
129 |     "base_model = prepare_model_for_kbit_training(base_model)\n",
130 |     "\n",
131 |     "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
132 |     "tokenizer.pad_token = tokenizer.eos_token\n",
133 |     "tokenizer.padding_side = \"right\"  # Fix weird overflow issue with fp16 training\n",
134 |     "\n",
135 |     "# Change the LORA hyperparameters accordingly to fit your use case\n",
136 |     "peft_config = LoraConfig(\n",
137 |     "    r=128,\n",
138 |     "    lora_alpha=16,\n",
139 |     "    target_modules=find_all_linear_names(base_model),\n",
140 |     "    lora_dropout=0.05,\n",
141 |     "    bias=\"none\",\n",
142 |     "    task_type=\"CAUSAL_LM\",\n",
143 |     ")\n",
144 |     "\n",
145 |     "base_model = get_peft_model(base_model, peft_config)\n",
146 |     "print_trainable_parameters(base_model)"
147 |    ]
148 |   },
149 |   {
150 |    "attachments": {},
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "## Prompt Format"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "def formatting_prompts_func(example):\n",
164 |     "    output_texts = []\n",
165 |     "\n",
166 |     "    for i in range(len(example['prompt'])):\n",
167 |     "        text = f\"### Input: ```{example['prompt'][i]}```\\n ### Output: {example['completion'][i]}\"\n",
168 |     "        output_texts.append(text)\n",
169 |     "    return output_texts"
170 |    ]
171 |   },
172 |   {
173 |    "attachments": {},
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "## Trainer"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "# Parameters for training arguments details => https://github.com/huggingface/transformers/blob/main/src/transformers/training_args.py#L158\n",
187 |     "training_args = TrainingArguments(\n",
188 |     "    per_device_train_batch_size=4,\n",
189 |     "    gradient_accumulation_steps=4,\n",
190 |     "    gradient_checkpointing =True,\n",
191 |     "    max_grad_norm= 0.3,\n",
192 |     "    num_train_epochs=15, \n",
193 |     "    learning_rate=2e-4,\n",
194 |     "    bf16=True,\n",
195 |     "    save_total_limit=3,\n",
196 |     "    logging_steps=10,\n",
197 |     "    output_dir=output_dir,\n",
198 |     "    optim=\"paged_adamw_32bit\",\n",
199 |     "    lr_scheduler_type=\"cosine\",\n",
200 |     "    warmup_ratio=0.05,\n",
201 |     ")\n",
202 |     "\n",
203 |     "trainer = SFTTrainer(\n",
204 |     "    base_model,\n",
205 |     "    train_dataset=dataset,\n",
206 |     "    tokenizer=tokenizer,\n",
207 |     "    max_seq_length=2048,\n",
208 |     "    formatting_func=formatting_prompts_func,\n",
209 |     "    args=training_args\n",
210 |     ")\n",
211 |     "trainer.train() \n",
212 |     "trainer.save_model(output_dir)\n",
213 |     "\n",
214 |     "output_dir = os.path.join(output_dir, \"final_checkpoint\")\n",
215 |     "trainer.model.save_pretrained(output_dir)\n",
216 |     "tokenizer.save_pretrained(output_dir)"
217 |    ]
218 |   }
219 |  ],
220 |  "metadata": {
221 |   "kernelspec": {
222 |    "display_name": "Python 3",
223 |    "language": "python",
224 |    "name": "python3"
225 |   },
226 |   "language_info": {
227 |    "codemirror_mode": {
228 |     "name": "ipython",
229 |     "version": 3
230 |    },
231 |    "file_extension": ".py",
232 |    "mimetype": "text/x-python",
233 |    "name": "python",
234 |    "nbconvert_exporter": "python",
235 |    "pygments_lexer": "ipython3",
236 |    "version": "3.9.6"
237 |   },
238 |   "orig_nbformat": 4
239 |  },
240 |  "nbformat": 4,
241 |  "nbformat_minor": 2
242 | }
243 | 


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/huggingface/dpo-qlora-4bit.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from datasets import load_dataset
 3 | from peft import LoraConfig, get_peft_model
 4 | from transformers import AutoTokenizer, AutoModelForCausalLM
 5 | from trl import DPOTrainer
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 | 	model_name = "..."
10 | 	dataset = load_dataset(...)
11 | 	
12 | 	tokenizer = AutoTokenizer.from_pretrained(model_name)
13 | 	tokenizer.pad_token = tokenizer.eos_token
14 | 
15 | 	model = AutoModelForCausalLM.from_pretrained(
16 | 		model_name,
17 | 		low_cpu_mem_usage=True,
18 | 		torch_dtype=torch.bfloat16,
19 | 		load_in_4bit=True,
20 | 		use_flash_attention_2=True,
21 | 		bnb_4bit_compute_dtype=torch.bfloat16,
22 | 		bnb_4bit_quant_type="nf4",
23 | 	)
24 | 	model.resize_token_embeddings(len(tokenizer))
25 | 	model.config.pad_token_id = tokenizer.pad_token_id
26 | 	model.config.use_cache = False
27 | 
28 | 	ref_model = AutoModelForCausalLM.from_pretrained(
29 | 		model_name,
30 | 		low_cpu_mem_usage=True,
31 | 		torch_dtype=torch.bfloat16,
32 | 		load_in_4bit=True,
33 | 		use_flash_attention_2=True,
34 | 		bnb_4bit_compute_dtype=torch.bfloat16,
35 | 	).eval()
36 | 
37 | 	peft_config = LoraConfig(
38 | 		lora_alpha=128,
39 | 		lora_dropout=0.05,
40 | 		r=64,
41 | 		bias="none",
42 | 		task_type="CAUSAL_LM",
43 | 		target_modules=[
44 | 			"q_proj",
45 | 			"k_proj",
46 | 			"v_proj",
47 | 		],
48 | 	)
49 | 	model = get_peft_model(model, peft_config)
50 | 
51 | 	training_args = DPOConfig(
52 | 		num_train_epochs=3,
53 | 		learning_rate=5e-07,
54 | 		per_device_train_batch_size=1,
55 | 		do_eval=True,
56 | 		per_device_eval_batch_size=1,
57 | 		adam_epsilon=1e-08,
58 | 		lr_scheduler_type="linear",
59 | 		warmup_ratio=0.1,
60 | 		seed=42,
61 | 		logging_steps=100,
62 | 		save_steps=500,
63 | 		save_strategy="steps",
64 | 		output_dir="./output-dir",
65 | 		gradient_checkpointing=True,
66 | 		bf16=True,
67 | 		remove_unused_columns=False,
68 | 	)
69 | 
70 | 	dpo_trainer = DPOTrainer(
71 | 		model,
72 | 		ref_model,
73 | 		args=training_args,
74 | 		beta=training_args.beta,
75 | 		train_dataset=dataset["train"],
76 | 		eval_dataset=dataset["test"],
77 | 		tokenizer=tokenizer,
78 | 		max_length=training_args.max_length,
79 | 		max_prompt_length=training_args.max_prompt_length,
80 | 		peft_config=peft_config,
81 | 	)
82 | 	dpo_trainer.train()
83 | 	dpo_trainer.save_model()


--------------------------------------------------------------------------------
/Code/07 - Human Preferences/scratch/dpo-train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import random
  3 | import numpy as np
  4 | from functools import partial
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torch.optim import AdamW
 10 | 
 11 | from torch.utils.data import DataLoader
 12 | from datasets import load_dataset
 13 | from transformers import AutoTokenizer, AutoModelForCausalLM
 14 | 
 15 | import wandb
 16 | from tqdm import tqdm
 17 | 
 18 | def seed_everything(seed=2003):
 19 |     torch.manual_seed(seed)
 20 |     torch.cuda.manual_seed_all(seed)
 21 |     np.random.seed(seed)
 22 |     random.seed(seed)
 23 |     torch.backends.cudnn.deterministic = True
 24 | 
 25 | def calculate_DPO_loss(
 26 |     model_prefered_logprob, 
 27 |     model_disprefered_logprob,
 28 |     ref_prefered_logprob, 
 29 |     ref_disprefered_logprob,
 30 |     beta=0.5
 31 |     ):
 32 | 
 33 |     prefered_relative_logprob = model_prefered_logprob - ref_prefered_logprob
 34 |     disprefered_relative_logprob = model_disprefered_logprob - ref_disprefered_logprob
 35 | 
 36 |     reward_accuracies = (prefered_relative_logprob > disprefered_relative_logprob).float().mean(dim=-1)
 37 |     reward_margins = (prefered_relative_logprob - disprefered_relative_logprob).mean(dim=-1)
 38 | 
 39 |     loss = -F.logsigmoid(beta * (prefered_relative_logprob - disprefered_relative_logprob)).mean(dim=-1)
 40 | 
 41 |     return loss, prefered_relative_logprob.mean(dim=-1), disprefered_relative_logprob.mean(dim=-1), reward_accuracies, reward_margins
 42 | 
 43 | def get_log_prob(logits, labels):
 44 |     log_probs = F.log_softmax(logits, dim=-1)
 45 |     return torch.gather(log_probs, -1, labels.unsqueeze(-1)).squeeze(-1).mean(-1)
 46 | 
 47 | def collate_fn(batch, tokenizer, max_length, device):
 48 |     prompts = ['Instruct: ' + item['prompt'] + '\n' for item in batch]
 49 |     chosen_responses = ['Output: ' + item['chosen'] for item in batch]
 50 |     rejected_responses = ['Output: ' + item['rejected'] for item in batch]
 51 | 
 52 |     prompt_ids = tokenizer.batch_encode_plus(prompts, padding=True, return_tensors="pt", max_length=max_length, truncation=True)['input_ids'].to(device)
 53 |     prefered_ids = tokenizer.batch_encode_plus(chosen_responses, padding=True, return_tensors="pt", max_length=max_length, truncation=True)['input_ids'].to(device)
 54 |     disprefered_ids = tokenizer.batch_encode_plus(rejected_responses, padding=True, return_tensors="pt", max_length=max_length, truncation=True)['input_ids'].to(device)
 55 | 
 56 |     prompt_prefered_ids = torch.cat([prompt_ids, prefered_ids], dim=-1)
 57 |     prompt_disprefered_ids = torch.cat([prompt_ids, disprefered_ids], dim=-1)
 58 | 
 59 |     prompt_prefered_mask = torch.cat([torch.ones_like(prompt_ids), torch.zeros_like(prefered_ids)], dim=-1)
 60 |     prompt_disprefered_mask = torch.cat([torch.ones_like(prompt_ids), torch.zeros_like(disprefered_ids)], dim=-1)
 61 | 
 62 |     return {
 63 |         'prompt_prefered_ids': prompt_prefered_ids,
 64 |         'prompt_disprefered_ids': prompt_disprefered_ids,
 65 |         'prompt_prefered_mask': prompt_prefered_mask,
 66 |         'prompt_disprefered_mask': prompt_disprefered_mask
 67 |         }
 68 | 
 69 | def train(model, ref_model, tokenizer, optimizer, train_dataloader, epochs=1, beta=0.1):
 70 |     model.train()
 71 |     ref_model.eval()
 72 | 
 73 |     for epoch in range(epochs):
 74 |         for batch in tqdm(train_dataloader):
 75 |             optimizer.zero_grad()
 76 | 
 77 |             prompt_prefered_ids = batch['prompt_prefered_ids']
 78 |             prompt_disprefered_ids = batch['prompt_disprefered_ids']
 79 |             prompt_prefered_mask = batch['prompt_prefered_mask']
 80 |             prompt_disprefered_mask = batch['prompt_disprefered_mask']
 81 | 
 82 |             model_prefered_log_prob = get_log_prob(model(prompt_prefered_ids, attention_mask=prompt_prefered_mask).logits, prompt_prefered_ids)
 83 |             model_disprefered_log_prob = get_log_prob(model(prompt_disprefered_ids, attention_mask=prompt_disprefered_mask).logits, prompt_disprefered_ids)
 84 | 
 85 |             ref_prefered_log_prob = get_log_prob(ref_model(prompt_prefered_ids, attention_mask=prompt_prefered_mask).logits, prompt_prefered_ids)
 86 |             ref_disprefered_log_prob = get_log_prob(ref_model(prompt_disprefered_ids, attention_mask=prompt_disprefered_mask).logits, prompt_disprefered_ids)
 87 | 
 88 |             loss, prefered_relative_logprob, disprefered_relative_logprob, reward_accuracies, reward_margins = calculate_DPO_loss(
 89 |                 model_prefered_log_prob, 
 90 |                 model_disprefered_log_prob,
 91 |                 ref_prefered_log_prob, 
 92 |                 ref_disprefered_log_prob,
 93 |                 beta=beta
 94 |             )
 95 | 
 96 |             loss.backward()
 97 |             optimizer.step()
 98 | 
 99 |             wandb.log({
100 |                 'loss': loss.item(),
101 |                 'prefered_relative_logprob': prefered_relative_logprob,
102 |                 'disprefered_relative_logprob': disprefered_relative_logprob,
103 |                 'reward_accuracy': reward_accuracies,
104 |                 'reward_margin': reward_margins
105 |                 })
106 | 
107 | def main():
108 |     parser = argparse.ArgumentParser()
109 | 
110 |     parser.add_argument("--epochs", type=int, default=1)
111 |     parser.add_argument("--beta", type=float, default=0.1)
112 |     parser.add_argument("--batch_size", type=int, default=4)
113 |     parser.add_argument("--max_length", type=int, default=512)
114 |     parser.add_argument("--lr", type=float, default=1e-6)
115 |     parser.add_argument("--seed", type=int, default=2003)
116 |     parser.add_argument("--model_name", type=str, default="microsoft/phi-2")
117 |     parser.add_argument("--dataset_name", type=str, default="jondurbin/truthy-dpo-v0.1")
118 |     parser.add_argument("--wandb_project", type=str, default="truthy-dpo")
119 | 
120 |     args = parser.parse_args()
121 | 
122 |     seed_everything(args.seed)
123 | 
124 |     wandb.login()
125 |     wandb.init(project=args.wandb_project, config=args)
126 | 
127 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
128 | 
129 |     tokenizer = AutoTokenizer.from_pretrained(args.model_name)
130 |     tokenizer.pad_token = tokenizer.eos_token
131 |     model = AutoModelForCausalLM.from_pretrained(args.model_name).to(device)
132 |     ref_model = AutoModelForCausalLM.from_pretrained(args.model_name).to(device)
133 | 
134 |     optimizer = AdamW(model.parameters(), lr=args.lr)
135 | 
136 |     dataset = load_dataset(args.dataset_name, split="train")
137 |     train_dataloader = torch.utils.data.DataLoader(
138 |         dataset, 
139 |         batch_size  = args.batch_size, 
140 |         shuffle     = True, 
141 |         collate_fn  = partial(collate_fn, tokenizer=tokenizer, max_length=args.max_length, device=device))
142 | 
143 |     train(model, ref_model, tokenizer, optimizer, train_dataloader, epochs=args.epochs, beta=args.beta)
144 | 
145 |     model.save_pretrained("model-DPO.pt")
146 | 
147 | if __name__ == "__main__":
148 |     main()
149 | 


--------------------------------------------------------------------------------
/Code/08 - Speech/.gitignore:
--------------------------------------------------------------------------------
1 | /developing


--------------------------------------------------------------------------------
/Code/08 - Speech/ASR/01 - Voice + CNN1D.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Voice Classifcation + CNN 1D + Torch Audio"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": []
14 |   }
15 |  ],
16 |  "metadata": {
17 |   "language_info": {
18 |    "name": "python"
19 |   }
20 |  },
21 |  "nbformat": 4,
22 |  "nbformat_minor": 2
23 | }
24 | 


--------------------------------------------------------------------------------
/Code/08 - Speech/ASR/03 - Speech + LSTM + CTC.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Speech Recognition + LSTM + CTC + Torch Audio"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 3,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import torch\n",
 17 |     "import torch.nn as nn\n",
 18 |     "import torch.nn.functional as F\n",
 19 |     "\n",
 20 |     "class Encoder(nn.Module):\n",
 21 |     "    \"\"\"\n",
 22 |     "    Encoder network: processes the input sequence (e.g., acoustic features)\n",
 23 |     "    using a bidirectional LSTM and reduces the dimension with a linear layer.\n",
 24 |     "    \"\"\"\n",
 25 |     "    def __init__(self, input_dim, hidden_dim, num_layers=1):\n",
 26 |     "        super(Encoder, self).__init__()\n",
 27 |     "        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)\n",
 28 |     "        self.fc   = nn.Linear(hidden_dim * 2, hidden_dim)\n",
 29 |     "        \n",
 30 |     "    def forward(self, x):\n",
 31 |     "        # x: (batch, T, input_dim)\n",
 32 |     "        out, _ = self.lstm(x)   # out: (batch, T, 2 * hidden_dim)\n",
 33 |     "        out    = self.fc(out)      # out: (batch, T, hidden_dim)\n",
 34 |     "        return out\n",
 35 |     "\n",
 36 |     "class Predictor(nn.Module):\n",
 37 |     "    \"\"\"\n",
 38 |     "    Predictor network: a language model that predicts the next token based on previous tokens.\n",
 39 |     "    Uses an embedding layer and a unidirectional LSTM.\n",
 40 |     "    \"\"\"\n",
 41 |     "    def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers=1):\n",
 42 |     "        super(Predictor, self).__init__()\n",
 43 |     "        self.embedding = nn.Embedding(vocab_size, embed_dim)\n",
 44 |     "        self.lstm      = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)\n",
 45 |     "        \n",
 46 |     "    def forward(self, y):\n",
 47 |     "        # y: (batch, U) target token sequence\n",
 48 |     "        embedded = self.embedding(y)  # (batch, U, embed_dim)\n",
 49 |     "        out, _   = self.lstm(embedded)    # (batch, U, hidden_dim)\n",
 50 |     "        return out\n",
 51 |     "\n",
 52 |     "class JointNetwork(nn.Module):\n",
 53 |     "    \"\"\"\n",
 54 |     "    Joint network: combines encoder and predictor outputs using a specified combination mode.\n",
 55 |     "    \n",
 56 |     "    The available modes are:\n",
 57 |     "      - 'multiplicative' or 'mul': element-wise multiplication.\n",
 58 |     "      - 'additive' or 'add': element-wise addition.\n",
 59 |     "    \n",
 60 |     "    After combining the features, the network applies a tanh nonlinearity, then a fully connected layer\n",
 61 |     "    to project to the vocabulary space, and finally returns a probability distribution via softmax.\n",
 62 |     "    \"\"\"\n",
 63 |     "    MODES = {\n",
 64 |     "        'multiplicative': lambda f, g: f * g,\n",
 65 |     "        'mul': lambda f, g: f * g,\n",
 66 |     "        'additive': lambda f, g: f + g,\n",
 67 |     "        'add': lambda f, g: f + g\n",
 68 |     "    }\n",
 69 |     "    \n",
 70 |     "    def __init__(self, hidden_dim, joint_dim, vocab_size, mode='additive'):\n",
 71 |     "        \"\"\"\n",
 72 |     "        Args:\n",
 73 |     "            hidden_dim (int): Dimension of the encoder and predictor outputs.\n",
 74 |     "            joint_dim (int): Dimension of the joint space.\n",
 75 |     "            vocab_size (int): Number of tokens in the vocabulary.\n",
 76 |     "            mode (str): Combination mode, one of 'multiplicative'/'mul' or 'additive'/'add'.\n",
 77 |     "        \"\"\"\n",
 78 |     "        super(JointNetwork, self).__init__()\n",
 79 |     "        self.join_mode = self.MODES[mode]\n",
 80 |     "        self.fc_enc    = nn.Linear(hidden_dim, joint_dim)\n",
 81 |     "        self.fc_pred   = nn.Linear(hidden_dim, joint_dim)\n",
 82 |     "        self.fc_out    = nn.Linear(joint_dim, vocab_size)\n",
 83 |     "        \n",
 84 |     "    def forward(self, enc_out, pred_out):\n",
 85 |     "        \"\"\"\n",
 86 |     "        Combines the encoder and predictor outputs.\n",
 87 |     "        \n",
 88 |     "        Args:\n",
 89 |     "            enc_out (Tensor): Encoder output of shape (batch, T, hidden_dim).\n",
 90 |     "            pred_out (Tensor): Predictor output of shape (batch, U, hidden_dim).\n",
 91 |     "        \n",
 92 |     "        Returns:\n",
 93 |     "            Tensor: Vocabulary probability distribution of shape (batch, T, U, vocab_size).\n",
 94 |     "        \"\"\"\n",
 95 |     "        # Transform and expand dimensions for broadcasting:\n",
 96 |     "        f_enc = self.fc_enc(enc_out).unsqueeze(2)    # (batch, T, 1, joint_dim)\n",
 97 |     "        f_pred = self.fc_pred(pred_out).unsqueeze(1)    # (batch, 1, U, joint_dim)\n",
 98 |     "        \n",
 99 |     "        # Combine using the specified mode and apply tanh:\n",
100 |     "        joint = torch.tanh(self.join_mode(f_enc, f_pred))  # (batch, T, U, joint_dim)\n",
101 |     "        \n",
102 |     "        # Project to the vocabulary space and return softmax probabilities:\n",
103 |     "        logits = self.fc_out(joint)  # (batch, T, U, vocab_size)\n",
104 |     "        return torch.softmax(logits, dim=-1)\n",
105 |     "\n",
106 |     "class RNNT(nn.Module):\n",
107 |     "    \"\"\"\n",
108 |     "    RNN-Transducer (RNN-T) model combining the Encoder, Predictor, and JointNetwork.\n",
109 |     "    \"\"\"\n",
110 |     "    def __init__(self, input_dim, vocab_size, encoder_hidden_dim,\n",
111 |     "                 predictor_embed_dim, predictor_hidden_dim,\n",
112 |     "                 joint_dim, encoder_layers=1, predictor_layers=1, joint_mode='additive'):\n",
113 |     "        super(RNNT, self).__init__()\n",
114 |     "        self.encoder   = Encoder(input_dim, encoder_hidden_dim, encoder_layers)\n",
115 |     "        self.predictor = Predictor(vocab_size, predictor_embed_dim, predictor_hidden_dim, predictor_layers)\n",
116 |     "        self.joint     = JointNetwork(encoder_hidden_dim, joint_dim, vocab_size, mode=joint_mode)\n",
117 |     "        \n",
118 |     "    def forward(self, x, y):\n",
119 |     "        \"\"\"\n",
120 |     "        Args:\n",
121 |     "            x (Tensor): Input sequence (e.g., acoustic features) of shape (batch, T, input_dim).\n",
122 |     "            y (Tensor): Target token sequence of shape (batch, U).\n",
123 |     "        \n",
124 |     "        Returns:\n",
125 |     "            Tensor: Vocabulary distribution of shape (batch, T, U, vocab_size).\n",
126 |     "        \"\"\"\n",
127 |     "        enc_out  = self.encoder(x)     # (batch, T, hidden_dim)\n",
128 |     "        pred_out = self.predictor(y)  # (batch, U, hidden_dim)\n",
129 |     "        logits   = self.joint(enc_out, pred_out)  # (batch, T, U, vocab_size)\n",
130 |     "        return logits"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 4,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "Logits shape: torch.Size([2, 50, 20, 30])\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "# Hyperparameters\n",
148 |     "batch_size = 2\n",
149 |     "T = 50   # Length of input sequence (e.g., number of acoustic frames)\n",
150 |     "U = 20   # Length of target token sequence\n",
151 |     "input_dim = 40      # Dimension of input features\n",
152 |     "vocab_size = 30     # Vocabulary size (including blank token)\n",
153 |     "encoder_hidden_dim = 256\n",
154 |     "predictor_embed_dim = 128\n",
155 |     "predictor_hidden_dim = 256\n",
156 |     "joint_dim = 512\n",
157 |     "\n",
158 |     "# Instantiate the model\n",
159 |     "model = RNNT(\n",
160 |     "    input_dim, \n",
161 |     "    vocab_size, \n",
162 |     "    encoder_hidden_dim,\n",
163 |     "    predictor_embed_dim, \n",
164 |     "    predictor_hidden_dim, \n",
165 |     "    joint_dim\n",
166 |     ")\n",
167 |     "\n",
168 |     "# Dummy input: acoustic features and target sequences\n",
169 |     "x = torch.randn(batch_size, T, input_dim)\n",
170 |     "y = torch.randint(0, vocab_size, (batch_size, U))\n",
171 |     "\n",
172 |     "# Forward pass: obtain logits over the vocabulary\n",
173 |     "logits = model(x, y)\n",
174 |     "print(\"Logits shape:\", logits.shape)  # Expected shape: (batch, T, U, vocab_size)\n",
175 |     "\n",
176 |     "# To compute loss, you would typically use an RNN-T loss function,\n",
177 |     "# for example, from a third-party implementation."
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": []
186 |   }
187 |  ],
188 |  "metadata": {
189 |   "kernelspec": {
190 |    "display_name": "Python 3",
191 |    "language": "python",
192 |    "name": "python3"
193 |   },
194 |   "language_info": {
195 |    "codemirror_mode": {
196 |     "name": "ipython",
197 |     "version": 3
198 |    },
199 |    "file_extension": ".py",
200 |    "mimetype": "text/x-python",
201 |    "name": "python",
202 |    "nbconvert_exporter": "python",
203 |    "pygments_lexer": "ipython3",
204 |    "version": "3.9.6"
205 |   }
206 |  },
207 |  "nbformat": 4,
208 |  "nbformat_minor": 2
209 | }
210 | 


--------------------------------------------------------------------------------
/Code/08 - Speech/ASR/04 - Speech + Transformer + CTC.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Speech Recognition + Transformer + CTC + Torch Audio"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": []
16 |   }
17 |  ],
18 |  "metadata": {
19 |   "kernelspec": {
20 |    "display_name": "Python 3",
21 |    "language": "python",
22 |    "name": "python3"
23 |   },
24 |   "language_info": {
25 |    "codemirror_mode": {
26 |     "name": "ipython",
27 |     "version": 3
28 |    },
29 |    "file_extension": ".py",
30 |    "mimetype": "text/x-python",
31 |    "name": "python",
32 |    "nbconvert_exporter": "python",
33 |    "pygments_lexer": "ipython3",
34 |    "version": "3.9.6"
35 |   }
36 |  },
37 |  "nbformat": 4,
38 |  "nbformat_minor": 2
39 | }
40 | 


--------------------------------------------------------------------------------
/Code/08 - Speech/README.md:
--------------------------------------------------------------------------------
 1 | # Speech Recognition and Synthesis (ASR & TTS)
 2 | 
 3 | This repository contains resources and insights from a lecture on Automatic Speech Recognition (ASR) and Text-to-Speech (TTS) technologies. It covers various tasks related to speech processing and their application in NLP.
 4 | 
 5 | ## Table of Contents
 6 | - [Speech Recognition and Synthesis (ASR \& TTS)](#speech-recognition-and-synthesis-asr--tts)
 7 |   - [Table of Contents](#table-of-contents)
 8 |   - [Overview](#overview)
 9 |   - [Key Concepts](#key-concepts)
10 |     - [Automatic Speech Recognition (ASR)](#automatic-speech-recognition-asr)
11 |     - [Text-to-Speech (TTS)](#text-to-speech-tts)
12 |   - [Speech-Related Tasks](#speech-related-tasks)
13 |   - [Evaluation Metrics](#evaluation-metrics)
14 |   - [State-of-the-Art Models](#state-of-the-art-models)
15 |   - [Good to Read](#good-to-read)
16 |   - [Reference Resource \& Code](#reference-resource--code)
17 | 
18 | ## Overview
19 | This section covers the essentials of ASR and TTS, key components in speech processing, and their role in natural language understanding. It introduces various models and evaluation metrics used in speech tasks like transcription, synthesis, and translation.
20 | 
21 | ## Key Concepts
22 | 
23 | ### Automatic Speech Recognition (ASR)
24 | - ASR is a process that converts spoken language into text.
25 | - The **RNN-T** (Recurrent Neural Network-Transducer) model is essential for ASR, combining acoustic and language models.
26 | - **Word Error Rate (WER)** is used to evaluate ASR systems.
27 | 
28 | ### Text-to-Speech (TTS)
29 | - TTS systems convert text into spoken words.
30 | - **WaveNet** and other encoder-decoder models are used for synthesizing speech.
31 | - **Mean Opinion Score (MOS)** is used to evaluate TTS quality.
32 | 
33 | ## Speech-Related Tasks
34 | - **Wake Word Detection**: Detects specific trigger words (e.g., "Hey Siri").
35 | - **Speaker Diarization**: Identifies individual speakers in multi-speaker recordings.
36 | - **Voice Cloning**: Synthetically recreates voices.
37 | 
38 | ## Evaluation Metrics
39 | - **TTS**: Mean Opinion Score (MOS) for assessing synthetic speech.
40 | - **ASR**: Word Error Rate (WER) for transcription accuracy.
41 | 
42 | ## State-of-the-Art Models
43 | - **Whisper**: A robust ASR model by OpenAI.
44 | - **Tacotron**: A deep-learning TTS model for natural speech synthesis.
45 | 
46 | 
47 | ## Good to Read
48 | - [Latif et al. (2023)](https://arxiv.org/pdf/2308.12792)Sparks of Large Audio Models: A Survey and Outlook
49 | 
50 | 
51 | ## Reference Resource & Code
52 | - https://github.com/MajoRoth/ASR


--------------------------------------------------------------------------------
/Code/08 - Speech/TTS/01 - WaveNet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "import torch.nn as nn\n",
 11 |     "import torch.nn.functional as F\n",
 12 |     "\n",
 13 |     "class CausalConv1d(nn.Module):\n",
 14 |     "    \"\"\"\n",
 15 |     "    A 1D causal convolution layer that pads the input on the left.\n",
 16 |     "    \"\"\"\n",
 17 |     "    def __init__(self, in_channels, out_channels, kernel_size, dilation):\n",
 18 |     "        super(CausalConv1d, self).__init__()\n",
 19 |     "        self.kernel_size = kernel_size\n",
 20 |     "        self.dilation = dilation\n",
 21 |     "        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, padding=0, dilation=dilation)\n",
 22 |     "    \n",
 23 |     "    def forward(self, x):\n",
 24 |     "        # Compute the required padding for causal convolution.\n",
 25 |     "        padding = self.dilation * (self.kernel_size - 1)\n",
 26 |     "        # Pad only on the left (i.e. the beginning of the sequence)\n",
 27 |     "        x = F.pad(x, (padding, 0))\n",
 28 |     "        return self.conv(x)\n",
 29 |     "\n",
 30 |     "class WaveNetBlock(nn.Module):\n",
 31 |     "    \"\"\"\n",
 32 |     "    A single WaveNet residual block with a gated activation unit.\n",
 33 |     "    \"\"\"\n",
 34 |     "    def __init__(self, residual_channels, skip_channels, kernel_size, dilation):\n",
 35 |     "        super(WaveNetBlock, self).__init__()\n",
 36 |     "        # Two parallel causal convolutions for filter and gate.\n",
 37 |     "        self.filter_conv = CausalConv1d(residual_channels, residual_channels, kernel_size, dilation)\n",
 38 |     "        self.gate_conv   = CausalConv1d(residual_channels, residual_channels, kernel_size, dilation)\n",
 39 |     "        # 1x1 convolutions for residual and skip connections.\n",
 40 |     "        self.residual_conv = nn.Conv1d(residual_channels, residual_channels, kernel_size=1)\n",
 41 |     "        self.skip_conv     = nn.Conv1d(residual_channels, skip_channels, kernel_size=1)\n",
 42 |     "    \n",
 43 |     "    def forward(self, x):\n",
 44 |     "        # Apply dilated convolutions for filter and gate.\n",
 45 |     "        filter_out = torch.tanh(self.filter_conv(x))\n",
 46 |     "        gate_out = torch.sigmoid(self.gate_conv(x))\n",
 47 |     "        # Elementwise multiplication of filter and gate outputs.\n",
 48 |     "        out = filter_out * gate_out\n",
 49 |     "        # Create skip and residual outputs.\n",
 50 |     "        skip = self.skip_conv(out)\n",
 51 |     "        residual = self.residual_conv(out)\n",
 52 |     "        # Residual connection: add the block input back to the output.\n",
 53 |     "        residual = residual + x\n",
 54 |     "        return residual, skip\n",
 55 |     "\n",
 56 |     "class WaveNet(nn.Module):\n",
 57 |     "    \"\"\"\n",
 58 |     "    A simplified WaveNet model that stacks multiple WaveNet blocks.\n",
 59 |     "    \"\"\"\n",
 60 |     "    def __init__(self, in_channels, residual_channels, skip_channels, end_channels,\n",
 61 |     "                 kernel_size, num_layers, dilation_cycle_length):\n",
 62 |     "        \"\"\"\n",
 63 |     "        Args:\n",
 64 |     "            in_channels (int): Number of input channels (e.g. number of quantization channels).\n",
 65 |     "            residual_channels (int): Number of channels in the residual layers.\n",
 66 |     "            skip_channels (int): Number of channels in the skip connections.\n",
 67 |     "            end_channels (int): Number of channels in the post-processing layers.\n",
 68 |     "            kernel_size (int): Size of the convolutional kernel.\n",
 69 |     "            num_layers (int): Total number of WaveNet blocks.\n",
 70 |     "            dilation_cycle_length (int): Number of layers before dilation factors repeat.\n",
 71 |     "        \"\"\"\n",
 72 |     "        super(WaveNet, self).__init__()\n",
 73 |     "        # Initial 1x1 convolution to match channel dimensions.\n",
 74 |     "        self.input_conv = nn.Conv1d(in_channels, residual_channels, kernel_size=1)\n",
 75 |     "        self.blocks = nn.ModuleList()\n",
 76 |     "        # Create a series of WaveNet blocks with exponentially increasing dilations.\n",
 77 |     "        for i in range(num_layers):\n",
 78 |     "            dilation = 2 ** (i % dilation_cycle_length)\n",
 79 |     "            self.blocks.append(WaveNetBlock(residual_channels, skip_channels, kernel_size, dilation))\n",
 80 |     "        self.relu = nn.ReLU()\n",
 81 |     "        # Post-processing: 1x1 convolutions to produce final output logits.\n",
 82 |     "        self.output_conv1 = nn.Conv1d(skip_channels, end_channels, kernel_size=1)\n",
 83 |     "        self.output_conv2 = nn.Conv1d(end_channels, in_channels, kernel_size=1)\n",
 84 |     "    \n",
 85 |     "    def forward(self, x):\n",
 86 |     "        \"\"\"\n",
 87 |     "        Args:\n",
 88 |     "            x (Tensor): Input tensor of shape (batch, channels, time).\n",
 89 |     "        Returns:\n",
 90 |     "            Tensor: Output logits over the quantized audio channels.\n",
 91 |     "        \"\"\"\n",
 92 |     "        x = self.input_conv(x)\n",
 93 |     "        skip_connections = []\n",
 94 |     "        # Process the input through each residual block.\n",
 95 |     "        for block in self.blocks:\n",
 96 |     "            x, skip = block(x)\n",
 97 |     "            skip_connections.append(skip)\n",
 98 |     "        # Sum all skip connection outputs.\n",
 99 |     "        out = sum(skip_connections)\n",
100 |     "        out = self.relu(out)\n",
101 |     "        out = self.output_conv1(out)\n",
102 |     "        out = self.relu(out)\n",
103 |     "        out = self.output_conv2(out)\n",
104 |     "        return out"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 2,
110 |    "metadata": {},
111 |    "outputs": [
112 |     {
113 |      "name": "stdout",
114 |      "output_type": "stream",
115 |      "text": [
116 |       "Output shape: torch.Size([2, 256, 16000])\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     " # Example parameters (adjust these as needed)\n",
122 |     "batch_size = 2\n",
123 |     "# Suppose audio is quantized into 256 channels (e.g. μ-law quantization)\n",
124 |     "in_channels = 256  \n",
125 |     "sequence_length = 16000  # e.g., one second of audio at 16kHz\n",
126 |     "model = WaveNet(in_channels=in_channels, residual_channels=32, skip_channels=32,\n",
127 |     "                end_channels=32, kernel_size=2, num_layers=10, dilation_cycle_length=10)\n",
128 |     "# Create a dummy input tensor (batch, channels, time)\n",
129 |     "x = torch.randn(batch_size, in_channels, sequence_length)\n",
130 |     "out = model(x)\n",
131 |     "print(\"Output shape:\", out.shape)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": []
140 |   }
141 |  ],
142 |  "metadata": {
143 |   "kernelspec": {
144 |    "display_name": "Python 3",
145 |    "language": "python",
146 |    "name": "python3"
147 |   },
148 |   "language_info": {
149 |    "codemirror_mode": {
150 |     "name": "ipython",
151 |     "version": 3
152 |    },
153 |    "file_extension": ".py",
154 |    "mimetype": "text/x-python",
155 |    "name": "python",
156 |    "nbconvert_exporter": "python",
157 |    "pygments_lexer": "ipython3",
158 |    "version": "3.9.6"
159 |   }
160 |  },
161 |  "nbformat": 4,
162 |  "nbformat_minor": 2
163 | }
164 | 


--------------------------------------------------------------------------------
/Code/08 - Speech/TTS/02 - Tacotron.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/08 - Speech/TTS/02 - Tacotron.ipynb


--------------------------------------------------------------------------------
/Code/08 - Speech/TTS/03 - Whisper.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/08 - Speech/TTS/03 - Whisper.ipynb


--------------------------------------------------------------------------------
/Code/08 - Speech/figures/FT.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/08 - Speech/figures/FT.webp


--------------------------------------------------------------------------------
/Code/08 - Speech/figures/MelScale.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/08 - Speech/figures/MelScale.gif


--------------------------------------------------------------------------------
/Code/08 - Speech/figures/Spectrogram.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/08 - Speech/figures/Spectrogram.webp


--------------------------------------------------------------------------------
/Code/09 - Multimodal/README.md:
--------------------------------------------------------------------------------
 1 | # Multimodal Large Language Models
 2 | 
 3 | Determining the order of publication in this case is a bit tricky because different papers might be released publicly at different times even though their official publication date might be the same. However, I can provide some information on the potential order these papers were released based on available information:
 4 | 
 5 | 1. **ViT (Vision Transformer):** This paper was released in January 2021 along with its code, making it the earliest among the listed models.
 6 | 2. **BEIT (BERT Pre-Training of Image Transformers):** While the exact release date might be slightly later than ViT, BEIT was also introduced in early 2021.
 7 | 3. **BLIP-2 (BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models):** This paper was presented at ICLR 2022, which took place in April 2022. Therefore, it's likely to be released after ViT and BEIT.
 8 | 4. **CLIP (Contrastive Language-Image Pre-training):** CLIP was introduced in January 2021, roughly around the same time as ViT and BEIT. However, the exact order between these three papers in terms of public release might be difficult to pinpoint precisely.
 9 | 5. **Flamingo (a Visual Language Model for Few-Shot Learning):** Information on the exact release date of Flamingo is currently limited. While it was also presented at ICLR 2022, the specific month or even quarter of its release might not be readily available yet.
10 | 
11 | Therefore, the most likely order of publication based on currently available information is:
12 | 
13 | 1. ViT
14 | 2. BEIT
15 | 3. CLIP
16 | 4. BLIP-2
17 | 5. Flamingo
18 | 
19 | However, it's important to keep in mind that this is just an estimate based on limited information. The actual order of public release might vary slightly depending on the exact dates and availability of the papers.
20 | 
21 | I hope this helps! Let me know if you have any other questions.
22 | 


--------------------------------------------------------------------------------
/Code/09 - Multimodal/appendix/04-SimVLM-tentative.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "attachments": {},
 5 |    "cell_type": "markdown",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# [SimVLM: Simple Visual Language Model Pretraining with Weak Supervision](https://arxiv.org/pdf/2108.10904v3.pdf)\n",
 9 |     "\n",
10 |     "<img src=\"./figures/simvlm.png\" title=\"SimVLM\" />\n",
11 |     "\n",
12 |     "reference\n",
13 |     "- https://github.com/YulongBonjour/SimVLM\n",
14 |     "- https://github.com/FerryHuang/SimVLM/tree/master"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": null,
20 |    "metadata": {
21 |     "vscode": {
22 |      "languageId": "plaintext"
23 |     }
24 |    },
25 |    "outputs": [],
26 |    "source": []
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "language_info": {
31 |    "name": "python"
32 |   },
33 |   "orig_nbformat": 4
34 |  },
35 |  "nbformat": 4,
36 |  "nbformat_minor": 2
37 | }
38 | 


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/ViTArchitecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/ViTArchitecture.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/beit_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/beit_architecture.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/blip2_architecture_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/blip2_architecture_1.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/blip2_architecture_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/blip2_architecture_2.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/brain-mri-lgg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/brain-mri-lgg.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/coca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/coca.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/flamingo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/flamingo.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/gated-xattn-dense-layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/gated-xattn-dense-layers.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/patch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/patch.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/perceiver-resample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/perceiver-resample.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/simvlm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/simvlm.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/teaser.png


--------------------------------------------------------------------------------
/Code/09 - Multimodal/figures/vit.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/09 - Multimodal/figures/vit.gif


--------------------------------------------------------------------------------
/Code/10 - Benchmark/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmarking and Evaluation in Natural Language Processing (NLP)
 2 | 
 3 | This repository is a comprehensive guide on benchmarking and evaluation techniques used in Natural Language Processing (NLP). It covers both closed-ended and open-ended evaluations, with a focus on the methods, metrics, and challenges associated with assessing the performance of NLP systems.
 4 | 
 5 | ## Table of Contents
 6 | - [Benchmarking and Evaluation in Natural Language Processing (NLP)](#benchmarking-and-evaluation-in-natural-language-processing-nlp)
 7 |   - [Table of Contents](#table-of-contents)
 8 |   - [Overview](#overview)
 9 |   - [Key Concepts](#key-concepts)
10 |     - [Closed-Ended Evaluations](#closed-ended-evaluations)
11 |     - [Open-Ended Evaluations](#open-ended-evaluations)
12 |   - [Evaluation Metrics](#evaluation-metrics)
13 |     - [Content Overlap Metrics](#content-overlap-metrics)
14 |     - [Model-Based Metrics](#model-based-metrics)
15 |     - [Human Evaluation](#human-evaluation)
16 |   - [State-of-the-Art Models](#state-of-the-art-models)
17 |   - [Challenges and Solutions](#challenges-and-solutions)
18 |     - [Challenges](#challenges)
19 |     - [Solutions](#solutions)
20 | 
21 | ## Overview
22 | Benchmarking and evaluation are critical for understanding the capabilities and limitations of NLP models. This resource explores methods for evaluating language models in tasks such as sentiment analysis, question answering, summarization, translation, and more.
23 | 
24 | ## Key Concepts
25 | 
26 | ### Closed-Ended Evaluations
27 | - **Definition**: Tasks with a limited number of possible answers, often one or a few correct ones. These evaluations are common in supervised learning.
28 | - **Examples**:
29 |   - **Sentiment Analysis**: Classifying sentiment in text (e.g., SST, IMDB).
30 |   - **Named Entity Recognition (NER)**: Identifying entities like people, organizations, and locations in text (e.g., CoNLL-2003).
31 |   - **Question Answering**: Extracting answers from a given context (e.g., SQuAD).
32 | - **Benchmark**: SuperGLUE is a prominent multi-task benchmark designed for general-purpose language understanding.
33 | 
34 | ### Open-Ended Evaluations
35 | - **Definition**: Tasks that require generating text with no fixed set of correct answers, such as summarization, translation, or chatbot conversations.
36 | - **Examples**:
37 |   - **Summarization**: Condensing articles into summaries (e.g., CNN/Daily Mail).
38 |   - **Machine Translation**: Translating text from one language to another (e.g., WMT).
39 |   - **Chatbot Evaluation**: Assessing conversation quality (e.g., Chatbot Arena, AlpacaEval).
40 | 
41 | ## Evaluation Metrics
42 | 
43 | ### Content Overlap Metrics
44 | - **BLEU**: Measures n-gram precision, commonly used in machine translation.
45 | - **ROUGE**: Measures n-gram recall, widely used in summarization and other text generation tasks.
46 | - These metrics are simple and efficient but not ideal for tasks like summarization or dialogue systems due to their lack of semantic understanding.
47 | 
48 | ### Model-Based Metrics
49 | - **BERTScore**: Uses contextual embeddings from BERT to compute similarity between generated and reference texts.
50 | - **BLEURT**: An even more advanced model-based metric designed to correlate better with human evaluations.
51 | 
52 | ### Human Evaluation
53 | - **Definition**: Human evaluations remain the gold standard for text generation tasks, providing insights into fluency, coherence, factual accuracy, and more.
54 | - **Challenges**: Human evaluation can be slow, costly, and inconsistent. New methods like AlpacaEval aim to reduce human evaluation costs and improve scalability.
55 | 
56 | ## State-of-the-Art Models
57 | - **SuperGLUE**: A challenging benchmark for general language understanding tasks.
58 | - **MMLU**: A benchmark measuring the performance of language models across 57 diverse knowledge-intensive tasks.
59 | - **BIG-BENCH**: A comprehensive set of 204 language tasks for testing large-scale models.
60 | 
61 | ## Challenges and Solutions
62 | 
63 | ### Challenges
64 | - **Evaluating Long-Form Generation**: Open-ended tasks like summarization, translation, and chatbot evaluation pose significant challenges due to the subjective nature of "correct" answers.
65 | - **Metric Limitations**: Current evaluation metrics (e.g., BLEU, ROUGE) often fail to capture semantic similarity and can favor extractive over abstractive methods.
66 | 
67 | ### Solutions
68 | - **Reference-Free Evaluation**: Models like AlpacaEval use LLMs to provide evaluation scores without human references, offering a more scalable solution for tasks like instruction-following.
69 | - **Comprehensive Evaluation**: The **HELM** initiative provides holistic benchmarks for evaluating language models, combining multiple tasks and metrics into one unified framework.
70 | 
71 | ---
72 | 
73 | This `README.md` captures the key elements of the "Benchmarking and Evaluation" content. Let me know if you'd like further adjustments or additions!


--------------------------------------------------------------------------------
/Code/11 - Agentic AI/03 - Memory/draft.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "<img src=\"../figures/type_memory.png\" >"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "<img src=\"../figures/key_idea_memgpt.png\" >"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "markdown",
19 |    "metadata": {},
20 |    "source": []
21 |   }
22 |  ],
23 |  "metadata": {
24 |   "language_info": {
25 |    "name": "python"
26 |   }
27 |  },
28 |  "nbformat": 4,
29 |  "nbformat_minor": 2
30 | }
31 | 


--------------------------------------------------------------------------------
/Code/11 - Agentic AI/README.md:
--------------------------------------------------------------------------------
 1 | # Agentic AI (tentative)
 2 | 
 3 | Welcome to the **Agentic AI** module.  This directory groups practical, reusable components that help you turn large-language-models into *agents*—software entities that can plan, act, reflect, and improve over time.
 4 | 
 5 | > *“An agent is an LLM with a purpose, a plan, the ability to call tools, **and the capacity to critique itself.**”*
 6 | 
 7 | Status: 🚧 Work in progress — first public launch planned for 2026.
 8 | 
 9 | The sub-folders follow the canonical 2025 agent stack:
10 | 
11 | | Folder            | Role                                    |
12 | | ----------------- | --------------------------------------- |
13 | | **01-Workflows**  | Orchestrate thinking & acting           |
14 | | **02-Tools**      | Interfaces to the external world        |
15 | | **03-Memory**     | Short- & long-term context store        |
16 | | **04-Reflection** | Self-evaluation & iterative improvement |
17 | | **05-Evaluation** | Automated regression & benchmarking     |
18 | 
19 | ---
20 | 
21 | ## ⚙️ 01 – Workflows
22 | 
23 | Orchestrates *how* an agent thinks and acts.
24 | 
25 | * Goal → Plan → Act → Observe → Refine templates
26 | * LangGraph / state-machine examples
27 | * Retry, timeout, and tracing utilities
28 | 
29 | **Start here:** [Building Effective Agents – Anthropic (2025)](https://www.anthropic.com/engineering/building-effective-agents) & [AgentRecipes](https://www.agentrecipes.com/)
30 | 
31 | ---
32 | 
33 | ## 🛠 02 – Tools
34 | 
35 | APIs and functions the agent can invoke to affect the outside world.
36 | 
37 | * Tool-spec schema (`tool.json`) with signatures & rate-limit hints
38 | * Wrappers for search, code-exec, and domain APIs
39 | * Guard-rails: allow-lists, parameter validation, safe-exec
40 | 
41 | ---
42 | 
43 | ## 🧠 03 – Memory
44 | 
45 | Keeps relevant context accessible across multiple turns or sessions.
46 | 
47 | * Conversation buffer, vector store, summarisation & forgetting policies
48 | * **Reference:** *MemGPT: Towards a Scalable Memory-Augmented Language Model* (An et al., 2023)
49 | 
50 | ---
51 | 
52 | ## 🔄 04 – Reflection
53 | 
54 | Gives the agent meta-cognition: the ability to critique, debug, and refine its own outputs.
55 | 
56 | * *Reflexion* and *RAG-as-critic* patterns
57 | * Rubric-based self-grading prompts
58 | * Caching of reflection results to control cost
59 | 
60 | > **TODO:** Provide a `reflection_runner.py` demo that re-asks the LLM to rate its answer and propose an improvement.
61 | 
62 | ---
63 | 
64 | ## 🧪 05 – Evaluation
65 | 
66 | Ensures changes don’t silently break agent behaviour.
67 | 
68 | * Smoke tests executed in CI (`pytest` + `langsmith` traces)
69 | * Golden-conversation dataset for regression
70 | * Automatic pass/fail based on rubric scores or reference answers
71 | 
72 | > **TODO:** Add `eval/` with sample YAML spec and Harness script.
73 | 
74 | ---
75 | 


--------------------------------------------------------------------------------
/Code/11 - Agentic AI/figures/key_idea_memgpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/11 - Agentic AI/figures/key_idea_memgpt.png


--------------------------------------------------------------------------------
/Code/11 - Agentic AI/figures/type_memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/11 - Agentic AI/figures/type_memory.png


--------------------------------------------------------------------------------
/Code/11 - Agentic AI/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/11 - Agentic AI/requirements.txt


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/.gitignore:
--------------------------------------------------------------------------------
1 | quiz/*
2 | solution/*


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2024/A1_Engine_Search.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2024/A1_Engine_Search.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2024/A2_Language_Model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2024/A2_Language_Model.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2024/A3_Machine_Translation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2024/A3_Machine_Translation.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2024/A4_Resume_Parser.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2024/A4_Resume_Parser.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2024/A5_Sentence_Embedding.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2024/A5_Sentence_Embedding.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2024/A6_Student_Layers_Initialization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2024/A6_Student_Layers_Initialization.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2024/A7_AIT_GPT_Chatbot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2024/A7_AIT_GPT_Chatbot.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2024/A8_Alpaca_Instruction_Eval_hf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2024/A8_Alpaca_Instruction_Eval_hf.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2025/A1_That_s_What_I_LIKE.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2025/A1_That_s_What_I_LIKE.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2025/A2_Language_Model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2025/A2_Language_Model.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2025/A3_Make_Your_Own_Machine_Translation_Language.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2025/A3_Make_Your_Own_Machine_Translation_Language.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2025/A4_Do_you_AGREE.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2025/A4_Do_you_AGREE.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2025/A5_DPO.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2025/A5_DPO.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2025/A6_TALK_W_DOCS.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2025/A6_TALK_W_DOCS.pdf


--------------------------------------------------------------------------------
/Code/Assignment-Quiz/assignment/2025/A7_DISTIL_VS_LORA.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/Code/Assignment-Quiz/assignment/2025/A7_DISTIL_VS_LORA.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Natural Language Processing
 2 | 
 3 | This is the repository for the Natural Language Processing course at the Asian Institute of Technology.
 4 | 
 5 | Google slide lectures can be found:  <https://drive.google.com/drive/folders/14x9_-Y_aWysPIZFLaVrZE2ngy2h_beJj?usp=sharing>  
 6 | 
 7 | YT playlist: <https://www.youtube.com/playlist?list=PLqL-7eLmqd9V3faivSAST76YQClS44dSz>
 8 | 
 9 | ## Credits
10 | 
11 | I would also like to give huge credits to several GitHub / web resources that I have revised to create this:
12 | 
13 | - <https://web.stanford.edu/class/cs224n/>
14 | - <http://spacy.pythonhumanities.com/>
15 | - <http://courses.spacy.com>
16 | - <https://github.com/kushalj001/pytorch-question-answering>
17 | - <https://github.com/DSKSD/DeepNLP-models-Pytorch>
18 | - <https://github.com/bentrevett>
19 | - <https://github.com/graykode/nlp-tutorial>
20 | - <https://huggingface.co/course>
21 | - <https://kikaben.com>
22 | - <https://github.com/kipgparker/soft-prompt-tuning>
23 | - <https://github.com/moein-shariatnia/OpenAI-CLIP>
24 | 
25 | Useful GitHub
26 | 
27 | - <https://github.com/keon/awesome-nlp> (collection of all NLP learning resources)
28 | - <https://github.com/sebastianruder/NLP-progress> (omg...this is like a mini Wikipedia for NLP!)
29 | - <https://github.com/mhagiwara/100-nlp-papers> (listed first 100 influential NLP papers)
30 | - <https://captum.ai/tutorials/> (good for working on explainable AI)
31 | 
32 | I would also like to thank students who have contributed:
33 | 
34 | - Amanda Raj Shrestha;  Email: <st122245@ait.ac.th>
35 | - Pranisaa Charnparttarvanit; Email: <st121720@ait.ac.th>
36 | - Chanapa Pananookooln; Email: <st121395@ait.ac.th>
37 | - Todsavad Tangtortan; Email: <st123012@ait.ac.th>
38 | 
39 | ### Outline
40 | 
41 | 1. Fundamentals
42 |    - Word Vectors - Word2Vec (Naive)
43 |    - Word Vectors - Word2Vec (Negative sampling)
44 |    - Word Vectors - GloVe
45 |    - Window-Based Name Entity Recognition
46 |    - Dependency Parsing
47 |    - Information Retrieval / Salient Spans
48 | 2. DL
49 |    - Classification
50 |    - Sequence-to-Sequence Models
51 |       - LSTM, biGRU, CNN, Transformer
52 |    - Langauge Models
53 |       - LSTM, Transformer
54 |    - Masked Language Models
55 |       - BERT
56 |       - Much more...
57 |    - Case studies
58 |       - QA
59 |       - Summarization
60 |       - Pruning
61 |       - distilBERT
62 |       - SentenceBERT
63 |       - SimCSE
64 |       - Much more...
65 | 
66 | 3. SpaCy
67 | 4. Huggingface
68 |    - Case studies
69 | 
70 | 5. Retrieval Augmented Generation
71 |    - Prompt, Chain, Tools, Agent
72 | 6. Multimodal Language Model 
73 |    - ViT, BEIT, CLIP, SimVLM, Flamingo, BLIP-2, CoCa
74 | 7. Reinforcement Learning with Human Feedback
75 |    - SFT, PPO, DPO, RRHF


--------------------------------------------------------------------------------
/figures/contextlength.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/contextlength.png


--------------------------------------------------------------------------------
/figures/dependency.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/dependency.png


--------------------------------------------------------------------------------
/figures/fasttokenqa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/fasttokenqa.png


--------------------------------------------------------------------------------
/figures/heads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/heads.png


--------------------------------------------------------------------------------
/figures/historybert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/historybert.png


--------------------------------------------------------------------------------
/figures/karpathy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/karpathy.jpg


--------------------------------------------------------------------------------
/figures/ner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/ner.png


--------------------------------------------------------------------------------
/figures/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/pipeline.png


--------------------------------------------------------------------------------
/figures/pretoken.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/pretoken.png


--------------------------------------------------------------------------------
/figures/rnn_weight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/rnn_weight.png


--------------------------------------------------------------------------------
/figures/semanticsearch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/semanticsearch.png


--------------------------------------------------------------------------------
/figures/sentiment1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment1.png


--------------------------------------------------------------------------------
/figures/sentiment12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment12.png


--------------------------------------------------------------------------------
/figures/sentiment12_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment12_2.png


--------------------------------------------------------------------------------
/figures/sentiment13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment13.png


--------------------------------------------------------------------------------
/figures/sentiment14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment14.png


--------------------------------------------------------------------------------
/figures/sentiment15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment15.png


--------------------------------------------------------------------------------
/figures/sentiment2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment2.png


--------------------------------------------------------------------------------
/figures/sentiment3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment3.png


--------------------------------------------------------------------------------
/figures/sentiment4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment4.png


--------------------------------------------------------------------------------
/figures/sentiment9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/sentiment9.png


--------------------------------------------------------------------------------
/figures/tokenizer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/tokenizer.png


--------------------------------------------------------------------------------
/figures/unigram1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/unigram1.png


--------------------------------------------------------------------------------
/figures/unigram2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chaklam-silpasuwanchai/Python-fo-Natural-Language-Processing/80bcbf5071eeef1c9dea87969f1d03976da96db2/figures/unigram2.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | --extra-index-url https://download.pytorch.org/whl/cu118
 2 | 
 3 | torch 
 4 | torchvision
 5 | torchtext
 6 | 
 7 | numpy>=1.24.0
 8 | pandas>=2.0.0
 9 | scipy>=1.10.0
10 | scikit-learn>=1.2.0
11 | matplotlib>=3.7.0
12 | seaborn>=0.12.0
13 | transformers>=4.30.0
14 | huggingface-hub>=0.19.0
15 | datasets>=2.14.0
16 | evaluate>=0.4.0
17 | sentencepiece>=0.1.99
18 | accelerate>=0.24.0
19 | gensim>=4.3.0
20 | spacy>=3.7.0
21 | langchain>=0.0.300
22 | docker>=6.1.0
23 | docker-compose>=1.29.2
24 | fastapi>=0.100.0
25 | tqdm>=4.65.0
26 | 


--------------------------------------------------------------------------------