├── .gitignore ├── LICENSE ├── README.md ├── imgs ├── 4-gram.png ├── LN1.png ├── LN2.png ├── LN3.png ├── The-Transformer-model-architecture.png ├── Transformer-encoder-decoder.jpeg ├── attention_mask.png ├── beam-search-2.png ├── datasets-style.png ├── dist.png ├── encoder-decoder.png ├── encoder-layer.png ├── kd.png ├── layer_norm_pos.png ├── ln_post.png ├── ln_post_pre.png ├── mha.png ├── post_pre_ln.jpeg ├── pretrain.png ├── qkv.png ├── scaled-dot-attn-img.png ├── scaled-dot-prod-attn.png ├── t5.png ├── top_k_top_p.png ├── trainer.png ├── transformer_as_feature_extractor.png ├── trg_len_-100.jpeg └── warmup_scheduler.png └── tutorials ├── 01_fine_tune_transformers_on_classification.ipynb ├── 02_transformer_architecture_self_attention.ipynb ├── 03_transformer_architecture_multi_head_attention.ipynb ├── 04_ffn_layer_norm_skip_conn.ipynb ├── 05_transformer_decoder_layer.ipynb ├── 06_gpt2_overall.ipynb ├── 07_gpt2_decoding(generation).ipynb ├── 08_gpt2_decoding_sampling.ipynb ├── 09_t5_overall.ipynb ├── 10_t5_summarization.ipynb ├── 11_knowledge_distillation_basics.ipynb ├── 12_意图识别任务_性能评估.ipynb ├── 13_kd_pipeline.ipynb ├── BLEU、ROUGE.ipynb ├── TrainingArguments & Trainer.ipynb ├── ar_lm_loss_PPL-perplexity.ipynb ├── attention ├── SDPA_torch.ipynb ├── causal_attn_mask.ipynb ├── deepseek_nsa.ipynb ├── flash_attn.ipynb ├── flash_attn_supp.ipynb ├── fused_kernel.ipynb └── imgs │ ├── attention_steps.png │ ├── flash-attn-algo.png │ ├── flash-attn.png │ ├── hf_flash_attn.png │ ├── packing_padding.jpeg │ ├── sram_comp.png │ └── stand_attn.png ├── bert 变体.ipynb ├── bpe.ipynb ├── ckpts_weights_model_bin.ipynb ├── data_collator.ipynb ├── data_processing ├── data │ ├── data.json │ └── data.jsonl ├── dataset_dataloader.ipynb └── json_jsonl.ipynb ├── dataset_dataloader.ipynb ├── embedding_visualize_analysis.ipynb ├── encoder-decoder ├── bow_wo_pe.ipynb ├── reshape_permute_einsum.ipynb ├── src_mask_key_padding_mask.ipynb └── transformer-encoder-decoder.ipynb ├── encoder_decoder_models.ipynb ├── flan-t5.ipynb ├── gpt2_train └── basics.ipynb ├── gpt2_training_inference_ppl.ipynb ├── hf_basics_datasets_metrics.ipynb ├── hf_transformers_basics.ipynb ├── huggingface_utils.ipynb ├── huggingfaceh4.ipynb ├── layer_norm_pre_post.ipynb ├── lm_decoder_only_padding.ipynb ├── nltk.ipynb ├── padding.ipynb ├── pipeline.ipynb ├── sbert-tutorials ├── embedding_对比.ipynb ├── sbert-applications.ipynb ├── sbert_源码分析.ipynb └── sentence-transformers-pipeline.ipynb ├── sin_position_encoding.ipynb ├── tasks ├── NER_Named_Entity_Recognition.ipynb └── nlp_basics_tasks.ipynb ├── tied_shared_tensors_lm_head_token_embedding.ipynb ├── tinystories └── tinystories.ipynb ├── transformer-circuits ├── basics.ipynb └── imgs │ ├── Basic-model-of-sparse-representation-theory.png │ └── residual-stream.png ├── transformers_utils.py └── viz └── bertviz.ipynb /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/README.md -------------------------------------------------------------------------------- /imgs/4-gram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/4-gram.png -------------------------------------------------------------------------------- /imgs/LN1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/LN1.png -------------------------------------------------------------------------------- /imgs/LN2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/LN2.png -------------------------------------------------------------------------------- /imgs/LN3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/LN3.png -------------------------------------------------------------------------------- /imgs/The-Transformer-model-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/The-Transformer-model-architecture.png -------------------------------------------------------------------------------- /imgs/Transformer-encoder-decoder.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/Transformer-encoder-decoder.jpeg -------------------------------------------------------------------------------- /imgs/attention_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/attention_mask.png -------------------------------------------------------------------------------- /imgs/beam-search-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/beam-search-2.png -------------------------------------------------------------------------------- /imgs/datasets-style.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/datasets-style.png -------------------------------------------------------------------------------- /imgs/dist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/dist.png -------------------------------------------------------------------------------- /imgs/encoder-decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/encoder-decoder.png -------------------------------------------------------------------------------- /imgs/encoder-layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/encoder-layer.png -------------------------------------------------------------------------------- /imgs/kd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/kd.png -------------------------------------------------------------------------------- /imgs/layer_norm_pos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/layer_norm_pos.png -------------------------------------------------------------------------------- /imgs/ln_post.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/ln_post.png -------------------------------------------------------------------------------- /imgs/ln_post_pre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/ln_post_pre.png -------------------------------------------------------------------------------- /imgs/mha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/mha.png -------------------------------------------------------------------------------- /imgs/post_pre_ln.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/post_pre_ln.jpeg -------------------------------------------------------------------------------- /imgs/pretrain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/pretrain.png -------------------------------------------------------------------------------- /imgs/qkv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/qkv.png -------------------------------------------------------------------------------- /imgs/scaled-dot-attn-img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/scaled-dot-attn-img.png -------------------------------------------------------------------------------- /imgs/scaled-dot-prod-attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/scaled-dot-prod-attn.png -------------------------------------------------------------------------------- /imgs/t5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/t5.png -------------------------------------------------------------------------------- /imgs/top_k_top_p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/top_k_top_p.png -------------------------------------------------------------------------------- /imgs/trainer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/trainer.png -------------------------------------------------------------------------------- /imgs/transformer_as_feature_extractor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/transformer_as_feature_extractor.png -------------------------------------------------------------------------------- /imgs/trg_len_-100.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/trg_len_-100.jpeg -------------------------------------------------------------------------------- /imgs/warmup_scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/imgs/warmup_scheduler.png -------------------------------------------------------------------------------- /tutorials/01_fine_tune_transformers_on_classification.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/01_fine_tune_transformers_on_classification.ipynb -------------------------------------------------------------------------------- /tutorials/02_transformer_architecture_self_attention.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/02_transformer_architecture_self_attention.ipynb -------------------------------------------------------------------------------- /tutorials/03_transformer_architecture_multi_head_attention.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/03_transformer_architecture_multi_head_attention.ipynb -------------------------------------------------------------------------------- /tutorials/04_ffn_layer_norm_skip_conn.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/04_ffn_layer_norm_skip_conn.ipynb -------------------------------------------------------------------------------- /tutorials/05_transformer_decoder_layer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/05_transformer_decoder_layer.ipynb -------------------------------------------------------------------------------- /tutorials/06_gpt2_overall.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/06_gpt2_overall.ipynb -------------------------------------------------------------------------------- /tutorials/07_gpt2_decoding(generation).ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/07_gpt2_decoding(generation).ipynb -------------------------------------------------------------------------------- /tutorials/08_gpt2_decoding_sampling.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/08_gpt2_decoding_sampling.ipynb -------------------------------------------------------------------------------- /tutorials/09_t5_overall.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/09_t5_overall.ipynb -------------------------------------------------------------------------------- /tutorials/10_t5_summarization.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/10_t5_summarization.ipynb -------------------------------------------------------------------------------- /tutorials/11_knowledge_distillation_basics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/11_knowledge_distillation_basics.ipynb -------------------------------------------------------------------------------- /tutorials/12_意图识别任务_性能评估.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/12_意图识别任务_性能评估.ipynb -------------------------------------------------------------------------------- /tutorials/13_kd_pipeline.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/13_kd_pipeline.ipynb -------------------------------------------------------------------------------- /tutorials/BLEU、ROUGE.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/BLEU、ROUGE.ipynb -------------------------------------------------------------------------------- /tutorials/TrainingArguments & Trainer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/TrainingArguments & Trainer.ipynb -------------------------------------------------------------------------------- /tutorials/ar_lm_loss_PPL-perplexity.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/ar_lm_loss_PPL-perplexity.ipynb -------------------------------------------------------------------------------- /tutorials/attention/SDPA_torch.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/SDPA_torch.ipynb -------------------------------------------------------------------------------- /tutorials/attention/causal_attn_mask.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/causal_attn_mask.ipynb -------------------------------------------------------------------------------- /tutorials/attention/deepseek_nsa.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/deepseek_nsa.ipynb -------------------------------------------------------------------------------- /tutorials/attention/flash_attn.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/flash_attn.ipynb -------------------------------------------------------------------------------- /tutorials/attention/flash_attn_supp.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/flash_attn_supp.ipynb -------------------------------------------------------------------------------- /tutorials/attention/fused_kernel.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/fused_kernel.ipynb -------------------------------------------------------------------------------- /tutorials/attention/imgs/attention_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/imgs/attention_steps.png -------------------------------------------------------------------------------- /tutorials/attention/imgs/flash-attn-algo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/imgs/flash-attn-algo.png -------------------------------------------------------------------------------- /tutorials/attention/imgs/flash-attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/imgs/flash-attn.png -------------------------------------------------------------------------------- /tutorials/attention/imgs/hf_flash_attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/imgs/hf_flash_attn.png -------------------------------------------------------------------------------- /tutorials/attention/imgs/packing_padding.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/imgs/packing_padding.jpeg -------------------------------------------------------------------------------- /tutorials/attention/imgs/sram_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/imgs/sram_comp.png -------------------------------------------------------------------------------- /tutorials/attention/imgs/stand_attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/attention/imgs/stand_attn.png -------------------------------------------------------------------------------- /tutorials/bert 变体.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/bert 变体.ipynb -------------------------------------------------------------------------------- /tutorials/bpe.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/bpe.ipynb -------------------------------------------------------------------------------- /tutorials/ckpts_weights_model_bin.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/ckpts_weights_model_bin.ipynb -------------------------------------------------------------------------------- /tutorials/data_collator.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/data_collator.ipynb -------------------------------------------------------------------------------- /tutorials/data_processing/data/data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/data_processing/data/data.json -------------------------------------------------------------------------------- /tutorials/data_processing/data/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/data_processing/data/data.jsonl -------------------------------------------------------------------------------- /tutorials/data_processing/dataset_dataloader.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/data_processing/dataset_dataloader.ipynb -------------------------------------------------------------------------------- /tutorials/data_processing/json_jsonl.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/data_processing/json_jsonl.ipynb -------------------------------------------------------------------------------- /tutorials/dataset_dataloader.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/dataset_dataloader.ipynb -------------------------------------------------------------------------------- /tutorials/embedding_visualize_analysis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/embedding_visualize_analysis.ipynb -------------------------------------------------------------------------------- /tutorials/encoder-decoder/bow_wo_pe.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/encoder-decoder/bow_wo_pe.ipynb -------------------------------------------------------------------------------- /tutorials/encoder-decoder/reshape_permute_einsum.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/encoder-decoder/reshape_permute_einsum.ipynb -------------------------------------------------------------------------------- /tutorials/encoder-decoder/src_mask_key_padding_mask.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/encoder-decoder/src_mask_key_padding_mask.ipynb -------------------------------------------------------------------------------- /tutorials/encoder-decoder/transformer-encoder-decoder.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/encoder-decoder/transformer-encoder-decoder.ipynb -------------------------------------------------------------------------------- /tutorials/encoder_decoder_models.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/encoder_decoder_models.ipynb -------------------------------------------------------------------------------- /tutorials/flan-t5.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/flan-t5.ipynb -------------------------------------------------------------------------------- /tutorials/gpt2_train/basics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/gpt2_train/basics.ipynb -------------------------------------------------------------------------------- /tutorials/gpt2_training_inference_ppl.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/gpt2_training_inference_ppl.ipynb -------------------------------------------------------------------------------- /tutorials/hf_basics_datasets_metrics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/hf_basics_datasets_metrics.ipynb -------------------------------------------------------------------------------- /tutorials/hf_transformers_basics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/hf_transformers_basics.ipynb -------------------------------------------------------------------------------- /tutorials/huggingface_utils.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/huggingface_utils.ipynb -------------------------------------------------------------------------------- /tutorials/huggingfaceh4.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/huggingfaceh4.ipynb -------------------------------------------------------------------------------- /tutorials/layer_norm_pre_post.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/layer_norm_pre_post.ipynb -------------------------------------------------------------------------------- /tutorials/lm_decoder_only_padding.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/lm_decoder_only_padding.ipynb -------------------------------------------------------------------------------- /tutorials/nltk.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/nltk.ipynb -------------------------------------------------------------------------------- /tutorials/padding.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/padding.ipynb -------------------------------------------------------------------------------- /tutorials/pipeline.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/pipeline.ipynb -------------------------------------------------------------------------------- /tutorials/sbert-tutorials/embedding_对比.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/sbert-tutorials/embedding_对比.ipynb -------------------------------------------------------------------------------- /tutorials/sbert-tutorials/sbert-applications.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/sbert-tutorials/sbert-applications.ipynb -------------------------------------------------------------------------------- /tutorials/sbert-tutorials/sbert_源码分析.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/sbert-tutorials/sbert_源码分析.ipynb -------------------------------------------------------------------------------- /tutorials/sbert-tutorials/sentence-transformers-pipeline.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/sbert-tutorials/sentence-transformers-pipeline.ipynb -------------------------------------------------------------------------------- /tutorials/sin_position_encoding.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/sin_position_encoding.ipynb -------------------------------------------------------------------------------- /tutorials/tasks/NER_Named_Entity_Recognition.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/tasks/NER_Named_Entity_Recognition.ipynb -------------------------------------------------------------------------------- /tutorials/tasks/nlp_basics_tasks.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/tasks/nlp_basics_tasks.ipynb -------------------------------------------------------------------------------- /tutorials/tied_shared_tensors_lm_head_token_embedding.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/tied_shared_tensors_lm_head_token_embedding.ipynb -------------------------------------------------------------------------------- /tutorials/tinystories/tinystories.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/tinystories/tinystories.ipynb -------------------------------------------------------------------------------- /tutorials/transformer-circuits/basics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/transformer-circuits/basics.ipynb -------------------------------------------------------------------------------- /tutorials/transformer-circuits/imgs/Basic-model-of-sparse-representation-theory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/transformer-circuits/imgs/Basic-model-of-sparse-representation-theory.png -------------------------------------------------------------------------------- /tutorials/transformer-circuits/imgs/residual-stream.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/transformer-circuits/imgs/residual-stream.png -------------------------------------------------------------------------------- /tutorials/transformers_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/transformers_utils.py -------------------------------------------------------------------------------- /tutorials/viz/bertviz.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chunhuizhang/bert_t5_gpt/HEAD/tutorials/viz/bertviz.ipynb --------------------------------------------------------------------------------