├── .gitignore
├── Meta-Scheduler
├── infra.excalidraw.svg
└── new_infra.excalidraw.svg
├── README.md
├── ai_compiler
├── IREE
│ ├── IREE_Survey.md
│ ├── evaluate
│ │ ├── auto-scheduler.md
│ │ ├── benchmark-module.md
│ │ └── img_benchmark-module
│ │ │ └── compilation_flow.png
│ ├── img_IREE_Survey
│ │ ├── v2-5b69d56e33512deeb65eda364c343859_1440w.webp
│ │ ├── v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg
│ │ ├── 截屏2022-12-07 17.43.39.png
│ │ ├── 截屏2022-12-07 21.42.13.png
│ │ ├── 截屏2023-02-28 09.31.38.png
│ │ └── 截屏2023-02-28 09.31.47.png
│ ├── img_会议文件
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-01_18.33.01.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-02_12.36.57.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-02_12.42.52.png
│ │ ├── 20200130-IREE_Jan_2020_MLIR_ODM-_External.pdf
│ │ ├── 20200820-IREE_CodeGen_-_Public.pdf
│ │ ├── 20210609_-_IREE_Runtime_Design_Slides.pdf
│ │ ├── 20220505-IREE_targeting_Vulkan_Zhang_May22.pdf
│ │ ├── HALDialect.png
│ │ ├── HALOps.png
│ │ ├── Untitled 1.png
│ │ ├── Untitled 10.png
│ │ ├── Untitled 11.png
│ │ ├── Untitled 12.png
│ │ ├── Untitled 13.png
│ │ ├── Untitled 14.png
│ │ ├── Untitled 15.png
│ │ ├── Untitled 16.png
│ │ ├── Untitled 17.png
│ │ ├── Untitled 18.png
│ │ ├── Untitled 19.png
│ │ ├── Untitled 2.png
│ │ ├── Untitled 20.png
│ │ ├── Untitled 21.png
│ │ ├── Untitled 22.png
│ │ ├── Untitled 23.png
│ │ ├── Untitled 24.png
│ │ ├── Untitled 25.png
│ │ ├── Untitled 26.png
│ │ ├── Untitled 27.png
│ │ ├── Untitled 28.png
│ │ ├── Untitled 29.png
│ │ ├── Untitled 3.png
│ │ ├── Untitled 30.png
│ │ ├── Untitled 4.png
│ │ ├── Untitled 5.png
│ │ ├── Untitled 6.png
│ │ ├── Untitled 7.png
│ │ ├── Untitled 8.png
│ │ ├── Untitled 9.png
│ │ └── Untitled.png
│ ├── pipeline
│ │ ├── img_linalg-vector-gpu-llvm
│ │ │ ├── Untitled 1.png
│ │ │ ├── Untitled 1.txt
│ │ │ ├── Untitled 2.txt
│ │ │ ├── Untitled 3.txt
│ │ │ ├── Untitled.png
│ │ │ └── Untitled.txt
│ │ ├── img_pipeline
│ │ │ ├── Untitled 1.png
│ │ │ ├── Untitled 2.png
│ │ │ └── Untitled.png
│ │ ├── linalg-vector-gpu-llvm.md
│ │ └── pipeline.md
│ └── 会议文件.md
├── LLVM
│ ├── LLVM简介.md
│ ├── TableGen.md
│ ├── img_LLVM简介
│ │ ├── webp-1664801155586-3.webp
│ │ ├── webp-1664801163209-6.webp
│ │ ├── webp-1664802251158-9.webp
│ │ └── webp.webp
│ └── img_TableGen
│ │ └── type.png
├── MLIR
│ ├── MLIR_CodeGen_summary.md
│ ├── MLIR_Note.md
│ ├── MLIR_Survey.md
│ ├── composition
│ │ ├── Analysis.md
│ │ ├── Builder.md
│ │ ├── Dialect.md
│ │ ├── Interface.md
│ │ └── PDLL.md
│ ├── img_CodeGen_summary
│ │ ├── 0.png
│ │ ├── 1.png
│ │ ├── 2.png
│ │ ├── 3.png
│ │ ├── 4.png
│ │ ├── 5.png
│ │ ├── 6.png
│ │ ├── 7.jpeg
│ │ ├── codegen-dialect-hierarchy-20230214213053227.svg
│ │ └── cover1.png
│ ├── img_MLIR_Note
│ │ ├── Untitled 1.png
│ │ ├── Untitled 2.png
│ │ └── Untitled.png
│ ├── img_MLIR_Survey
│ │ ├── cover1.png
│ │ ├── cover2.png
│ │ ├── image-111.png
│ │ ├── image-121.png
│ │ ├── image-131.png
│ │ ├── image-211.png
│ │ ├── image-212.png
│ │ ├── image-213.png
│ │ ├── image-221.png
│ │ ├── image-231.png
│ │ ├── image-232.png
│ │ ├── image-251.png
│ │ ├── image-301.png
│ │ ├── image-302.png
│ │ ├── image-321.png
│ │ ├── image-322.png
│ │ ├── image-331.png
│ │ ├── image-332.png
│ │ ├── image-334.png
│ │ ├── image-342.png
│ │ ├── image-431.png
│ │ ├── image-511.png
│ │ ├── image-512.png
│ │ ├── image-513.png
│ │ ├── image-521.png
│ │ ├── image-522.png
│ │ ├── image-523.png
│ │ ├── image-524.png
│ │ └── image-611.png
│ ├── meeting pdf
│ │ ├── 2021-10-07-The-Torch-MLIR-project.pdf
│ │ ├── Structured Ops in MLIR.pdf
│ │ ├── Tensor Codegen Thoughts - Jan 23, 2020 MLIR- external ODM.pdf
│ │ └── Tutorial-AminiVasilacheZinenko-MLIR.pdf
│ └── pipeline
│ │ ├── MLIR_matmul性能测试.md
│ │ ├── hlo2linalg.md
│ │ ├── img_hlo2linalg
│ │ ├── 1251718-20210923060706363-1852351942.png
│ │ ├── 3-7236750.png
│ │ └── 截屏2023-02-13 17.45.49.png
│ │ ├── img_linalg
│ │ ├── 3.png
│ │ ├── 73613629-c5586580-45c5-11ea-94b7-074aeea94c7b.png
│ │ ├── 73613904-2f720a00-45c8-11ea-8265-1c856c02525b.png
│ │ └── codegen-dialect-hierarchy.svg
│ │ ├── img_matmul性能测试
│ │ └── Untitled.png
│ │ └── linalg.md
├── TVM
│ ├── [TVM] vectorize 和 tensorize Pass.md
│ ├── [TVM]MLC课程.md
│ ├── [TVM]简介.md
│ ├── [TVM]编译安装.md
│ ├── img_MLC课程
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-30_12.08.14.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_10.33.29.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_15.15.02.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_19.55.57.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_21.08.19.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_21.34.01.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-06-01_11.56.31.png
│ │ └── image-20230329133901335.png
│ ├── img_vectorize 和 tensorize Pass
│ │ ├── image-20230329133901335.png
│ │ └── v2-ee6ca5e08aee17b8f9998dd3a3da75c1_r.jpg
│ ├── img_简介
│ │ ├── 4type.png
│ │ ├── 4type2.png
│ │ ├── DNN1.png
│ │ ├── DNN2.png
│ │ ├── TVMSoftwareStack.png
│ │ ├── TVMflow.png
│ │ ├── all_unity.png
│ │ ├── automation.png
│ │ ├── cooperate.png
│ │ ├── horizontal.png
│ │ ├── now_q1.png
│ │ ├── unify.png
│ │ └── unity.png
│ └── img_编译安装
│ │ ├── 201907192304343.png
│ │ ├── image-20220925220059169.png
│ │ ├── image-20220925220149543.png
│ │ ├── image-20220925220653571.png
│ │ ├── image-20220925220718150.png
│ │ ├── image-20220925220751150.png
│ │ ├── image-20220925222016904.png
│ │ ├── image-20220925234257698.png
│ │ ├── image-20220926002042777.png
│ │ ├── image-20220926002050138.png
│ │ ├── image-20220926002056920.png
│ │ ├── image-20220926085918902.png
│ │ ├── image-20220926090051685.png
│ │ ├── image-20220926090558359.png
│ │ ├── image-20220926090943230.png
│ │ ├── image-20220926095351317.png
│ │ ├── image-20220926100008289.png
│ │ ├── image-20220926100817561.png
│ │ └── watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MjA4MTM4OQ==,size_16,color_FFFFFF,t_70.png
├── Triton
│ ├── Triton_base.md
│ ├── Triton_example.md
│ ├── Triton_language.md
│ ├── Triton_linalg.md
│ ├── Triton_optim.md
│ ├── img_Triton_base
│ │ ├── cta_wrap_thread.png
│ │ ├── cuda_triton.png
│ │ ├── cuda_vs_triton.png
│ │ ├── distribute_layout.png
│ │ ├── gpu_arch.png
│ │ ├── layout.png
│ │ ├── swizzled.png
│ │ ├── triton_arch.png
│ │ └── triton_arch_now.png
│ ├── img_Triton_language
│ │ ├── load.png
│ │ ├── loadpid0.png
│ │ ├── loadpid1.png
│ │ ├── loadpid2.png
│ │ └── store.png
│ ├── img_Triton_linalg
│ │ ├── bqb1.png
│ │ ├── dialect.png
│ │ ├── diff_with_triton_shared.png
│ │ ├── mlir_pipeline.png
│ │ ├── opt.png
│ │ └── success.png
│ └── 编译安装.md
├── XLA
│ ├── img_xla2hlo
│ │ ├── Screen%2BShot%2B2017-02-27%2Bat%2B9.54.12%2BAM.png
│ │ ├── codegen-dialect-hierarchy.svg
│ │ ├── how-does-xla-work.png
│ │ ├── v2-16e964ead53e7c71c0cc4dff6ed11851_b.jpg
│ │ ├── v2-84cd6a3244ebcd2f210626887a09c33f_b.jpg
│ │ ├── v2-ae9fc9f5aeb969d0c25940cd9f8f24c3_b.jpg
│ │ └── 截屏2023-02-14 17.54.11.png
│ └── xla2hlo.md
└── ai_compiler_commom
│ ├── AI_Compiler_Survey.md
│ ├── Graph_Partition.md
│ ├── img_AI_Compiler_Survey
│ ├── MS框架.png
│ ├── hlo优化.png
│ ├── 发展.png
│ ├── 后端优化.png
│ ├── 技术框架.png
│ └── 结构.png
│ └── img_Graph_Partition
│ └── ansor.png
├── basic
├── Architecture
│ └── Architecture.md
├── CMake
│ └── cmake.md
├── DataReuse
│ ├── DataReuse.md
│ └── img_data_reuse
│ │ ├── Untitled 1.jpeg
│ │ ├── Untitled 1.png
│ │ ├── Untitled 10.png
│ │ ├── Untitled 11.png
│ │ ├── Untitled 12.png
│ │ ├── Untitled 13.png
│ │ ├── Untitled 14.png
│ │ ├── Untitled 15.png
│ │ ├── Untitled 2.png
│ │ ├── Untitled 3.png
│ │ ├── Untitled 4.png
│ │ ├── Untitled 5.png
│ │ ├── Untitled 6.png
│ │ ├── Untitled 7.png
│ │ ├── Untitled 8.png
│ │ ├── Untitled 9.png
│ │ ├── Untitled.jpeg
│ │ └── Untitled.png
├── GPU
│ ├── GPU架构发展.md
│ └── img_GPU架构
│ │ ├── %E6%88%AA%E5%B1%8F2023-06-03_18.04.41.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-06-03_18.08.11.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-06-03_18.09.48.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-06-03_18.10.47.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-06-04_12.39.32.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-06-04_15.15.51.png
│ │ ├── IMG_8232.jpg
│ │ ├── Untitled 1.jpeg
│ │ ├── Untitled 1.png
│ │ ├── Untitled 10.jpeg
│ │ ├── Untitled 11.jpeg
│ │ ├── Untitled 12.jpeg
│ │ ├── Untitled 13.jpeg
│ │ ├── Untitled 14.jpeg
│ │ ├── Untitled 15.jpeg
│ │ ├── Untitled 16.jpeg
│ │ ├── Untitled 17.jpeg
│ │ ├── Untitled 18.jpeg
│ │ ├── Untitled 19.jpeg
│ │ ├── Untitled 2.jpeg
│ │ ├── Untitled 2.png
│ │ ├── Untitled 20.jpeg
│ │ ├── Untitled 3.jpeg
│ │ ├── Untitled 3.png
│ │ ├── Untitled 4.jpeg
│ │ ├── Untitled 4.png
│ │ ├── Untitled 5.jpeg
│ │ ├── Untitled 5.png
│ │ ├── Untitled 6.jpeg
│ │ ├── Untitled 6.png
│ │ ├── Untitled 7.jpeg
│ │ ├── Untitled 8.jpeg
│ │ ├── Untitled 9.jpeg
│ │ ├── Untitled.jpeg
│ │ └── Untitled.png
├── PolyhedralCompilation
│ ├── PolyhedralCompilation.md
│ └── img_Polyhedral_Compilation
│ │ ├── Untitled 1.png
│ │ ├── Untitled 10.png
│ │ ├── Untitled 11.png
│ │ ├── Untitled 12.png
│ │ ├── Untitled 13.png
│ │ ├── Untitled 14.png
│ │ ├── Untitled 15.png
│ │ ├── Untitled 16.png
│ │ ├── Untitled 17.png
│ │ ├── Untitled 18.png
│ │ ├── Untitled 19.png
│ │ ├── Untitled 2.png
│ │ ├── Untitled 20.png
│ │ ├── Untitled 21.png
│ │ ├── Untitled 22.png
│ │ ├── Untitled 23.png
│ │ ├── Untitled 24.png
│ │ ├── Untitled 25.png
│ │ ├── Untitled 26.png
│ │ ├── Untitled 27.png
│ │ ├── Untitled 28.png
│ │ ├── Untitled 29.png
│ │ ├── Untitled 3.png
│ │ ├── Untitled 30.png
│ │ ├── Untitled 31.png
│ │ ├── Untitled 4.png
│ │ ├── Untitled 5.png
│ │ ├── Untitled 6.png
│ │ ├── Untitled 7.png
│ │ ├── Untitled 8.png
│ │ ├── Untitled 9.png
│ │ ├── Untitled.jpeg
│ │ └── Untitled.png
└── PyTorch
│ ├── PyTorch2.0.md
│ └── img_PyTorch2.0
│ ├── Untitled 1.png
│ ├── Untitled 2.png
│ ├── Untitled 3.png
│ └── Untitled.png
├── coding
├── CPP.md
├── coding_note.md
└── img_coding_note
│ └── computational_complexity.png
├── paper_read
├── Astitch
│ ├── AStitch.pptx
│ ├── Astitch.md
│ └── img_Astitch
│ │ ├── image-20221128113128495.png
│ │ ├── image-20221128113426967.png
│ │ ├── image-20221128115126772.png
│ │ ├── image-20221128125650543.png
│ │ ├── image-20221128131909434.png
│ │ ├── image-20221128135909877.png
│ │ ├── image-20221128141107379.png
│ │ ├── image-20221128143319632.png
│ │ ├── image-20221128145444483.png
│ │ ├── image-20221128153319148.png
│ │ ├── image-20221128185111129.png
│ │ ├── image-20221128224708418.png
│ │ ├── image-20221129103644172.png
│ │ └── image-20221129104739007.png
├── Astra、Rammer、Roller
│ ├── img_Astra_Rammer_Roller
│ │ ├── Untitled 1.png
│ │ ├── Untitled 2.png
│ │ └── Untitled.png
│ └── 短记_Astra_Rammer_Roller.md
├── Attention
│ ├── Attention.md
│ ├── img_Attention
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-21_19.09.47.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-21_19.10.11.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_17.04.41.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_17.13.41.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_17.14.05.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_17.20.42.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_20.17.01.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_22.57.54.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_23.31.10.png
│ │ ├── Untitled 1.png
│ │ ├── Untitled 10.png
│ │ ├── Untitled 11.png
│ │ ├── Untitled 13.png
│ │ ├── Untitled 14.png
│ │ ├── Untitled 2.png
│ │ ├── Untitled 3.png
│ │ ├── Untitled 4.png
│ │ ├── Untitled 5.png
│ │ ├── Untitled 6.png
│ │ ├── Untitled 7.png
│ │ ├── Untitled 8.png
│ │ ├── Untitled 9.png
│ │ └── Untitled.png
│ └── test_attention
│ │ ├── attention.ipynb
│ │ └── attention.py
├── Auto-parallelism summary
│ ├── Auto_Parallelism.md
│ ├── auto-parallelism.pptx
│ └── img_auto_parallelism
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_11.35.50.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_11.36.20.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_14.21.53.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_15.06.46.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_21.22.37.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_21.23.07.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-02_22.59.26.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-02_23.01.41.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_15.04.36.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_16.29.54.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_17.39.56.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.18.39.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.24.00.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.27.19.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.42.48.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.46.39.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-02-24_23.10.38.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-02-24_23.21.48.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-02-24_23.52.54.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.15.44.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.29.39.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.35.43.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.38.09.png
│ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.39.49.png
│ │ ├── Untitled 1.png
│ │ ├── Untitled 10.png
│ │ ├── Untitled 11.png
│ │ ├── Untitled 12.png
│ │ ├── Untitled 13.png
│ │ ├── Untitled 14.png
│ │ ├── Untitled 15.png
│ │ ├── Untitled 16.png
│ │ ├── Untitled 17.png
│ │ ├── Untitled 18.png
│ │ ├── Untitled 19.png
│ │ ├── Untitled 2.png
│ │ ├── Untitled 20.png
│ │ ├── Untitled 21.png
│ │ ├── Untitled 3.png
│ │ ├── Untitled 4.png
│ │ ├── Untitled 5.png
│ │ ├── Untitled 6.png
│ │ ├── Untitled 7.png
│ │ ├── Untitled 8.png
│ │ ├── Untitled 9.png
│ │ ├── Untitled.png
│ │ └── mlsysdistribute_system.png
├── Composable and Modular Code Generation in MLIR
│ ├── Composable and Modular Code Generation in MLIR.md
│ ├── Composable and Modular Code Generation in MLIR.pptx
│ └── img_CodeGenerationInMLIR
│ │ ├── MLIRtoLLVM.png
│ │ ├── MLIRtoLLVMandIntrinsics.png
│ │ ├── classical优化.png
│ │ ├── image-213.png
│ │ ├── inplace-op.png
│ │ ├── relevant dialects1.png
│ │ ├── relevant dialects2.png
│ │ ├── silos.png
│ │ ├── special优化.png
│ │ ├── step1.png
│ │ ├── step2.png
│ │ ├── step3.png
│ │ ├── transformation1.png
│ │ ├── transformation2.png
│ │ ├── transformation3.png
│ │ ├── transformation4.png
│ │ ├── v2-5b69d56e33512deeb65eda364c343859_1440w.webp
│ │ ├── v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg
│ │ ├── x1.png
│ │ ├── 截屏2022-12-07 21.42.13.png
│ │ ├── 截屏2022-12-10 16.26.12.png
│ │ ├── 截屏2022-12-13 14.54.32.png
│ │ ├── 截屏2022-12-13 22.23.23.png
│ │ ├── 截屏2022-12-14 16.52.45.png
│ │ ├── 截屏2022-12-15 11.47.39.png
│ │ ├── 截屏2022-12-16 14.16.10.png
│ │ ├── 截屏2022-12-21 18.42.21.png
│ │ ├── 截屏2022-12-21 18.54.30.png
│ │ ├── 截屏2023-01-09 17.47.49.png
│ │ ├── 截屏2023-01-09 17.56.53.png
│ │ ├── 截屏2023-01-09 17.57.09.png
│ │ ├── 截屏2023-01-09 17.57.25.png
│ │ ├── 截屏2023-01-09 17.59.03.png
│ │ ├── 截屏2023-01-09 18.00.04.png
│ │ └── 截屏2023-01-09 18.00.43.png
├── DISC
│ ├── DISC.md
│ └── img_DISC
│ │ ├── %E6%88%AA%E5%B1%8F2023-03-02_23.20.46.png
│ │ ├── %E6%88%AA%E5%B1%8F2023-03-02_23.26.10.png
│ │ ├── Untitled 1.jpeg
│ │ ├── Untitled.jpeg
│ │ ├── Untitled.svg
│ │ └── v2-e68e87bbfeab11afd84bb40a17e1a179_r.jpg
├── Graphene
│ ├── ASPLOS’23 - Graphene An IR for Optimized Tensor Computations.pdf
│ ├── ASPLOS’23 - Graphene.pptx
│ ├── Graphene.md
│ └── img_Graphene
│ │ ├── ASPLOS_23_-_Graphene_An_IR_for_Optimized_Tensor_Computations.pdf
│ │ ├── Untitled 1.png
│ │ ├── Untitled 10.png
│ │ ├── Untitled 11.png
│ │ ├── Untitled 12.png
│ │ ├── Untitled 13.png
│ │ ├── Untitled 14.png
│ │ ├── Untitled 15.png
│ │ ├── Untitled 16.png
│ │ ├── Untitled 17.png
│ │ ├── Untitled 18.png
│ │ ├── Untitled 19.png
│ │ ├── Untitled 2.png
│ │ ├── Untitled 20.png
│ │ ├── Untitled 21.png
│ │ ├── Untitled 3.png
│ │ ├── Untitled 4.png
│ │ ├── Untitled 5.png
│ │ ├── Untitled 6.png
│ │ ├── Untitled 7.png
│ │ ├── Untitled 8.png
│ │ ├── Untitled 9.png
│ │ └── Untitled.png
├── Ray
│ ├── Ray.md
│ └── img_Ray
│ │ ├── Untitled.jpeg
│ │ └── Untitled.png
├── Slapo
│ └── ASPLOS24_Slapo.pptx
└── TensorIR
│ ├── ASPLOS’23 - TensorIR.pptx
│ └── TensorIR.md
└── tools
├── conda.md
├── git.md
├── linux.md
├── macos.md
├── tmux.md
├── vim.md
└── vim_file.zip
/.gitignore:
--------------------------------------------------------------------------------
1 | # .gitignore_global
2 | ####################################
3 | ######## OS generated files ########
4 | ####################################
5 | .DS_Store
6 | .DS_Store?
7 | *.swp
8 | ._*
9 | .Spotlight-V100
10 | .Trashes
11 | Icon?
12 | ehthumbs.db
13 | Thumbs.db
14 | ####################################
15 | ############# packages #############
16 | ####################################
17 | *.7z
18 | *.dmg
19 | *.gz
20 | *.iso
21 | *.jar
22 | *.rar
23 | *.tar
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mlsys-study-note
2 |
3 | 更新博客内容见: [tfruan2000.github.io](https://tfruan2000.github.io/)
4 |
5 | 现在这里只是我同步资料的小仓库啦~
6 |
7 | ```
8 | ├── ai_compiler
9 | | ├── TVM
10 | | ├── IREE
11 | | └── MLIR
12 | |
13 | ├── basic
14 | | ├── optimization
15 | | ├── tech: cmake.
16 | | └── lesson
17 | |
18 | ├── coding
19 | | ├── C++ / Rust / Python
20 | | └── ...
21 | |
22 | ├── paper_read
23 | | ├── 笔记咋都是纸质版的😫
24 | | └── ...
25 | |
26 | └── tools
27 | ├── git / vim / tmux 等使用笔记
28 | ├── macOS 配置
29 | └── ...
30 | ```
31 |
--------------------------------------------------------------------------------
/ai_compiler/IREE/IREE_Survey.md:
--------------------------------------------------------------------------------
1 | # 👻 IREE
2 |
3 | ## 1. IREE 简介
4 |
5 | > 官方网站:https://openxla.github.io/iree/
6 | >
7 | > mlir类比cpp,dialect类比stl,iree类比一个完整的项目
8 |
9 | [IREE](https://github.com/google/iree#iree-intermediate-representation-execution-environment) (Intermediate Representation Execution Environment)是一种基于MLIR的端到端编译器,可以将ML模型lower到统一的IR。具有它自己的高级表示以及一组 dialects,从代码生成的目的来说,**这些 dialects 正在向 Linalg-on-tensors 的方向发展**,严重依赖于tensor层级上的fusion。IREE-specific dialects 主要用于组织计算有效载荷,目前可以表示为MHLO、TOSA、Linalg-on-tensors等。
10 |
11 | > 在tensor级别fusion通常更简单,因为不需要跟踪对buffer的读取和写入
12 |
13 | 讲解下图: https://drive.google.com/drive/u/0/folders/1sRAsgsd8Bvpm_IxREmZf2agsGU2KvrK-
14 |
15 |

16 |
17 | 
18 |
19 | 主要特征:
20 |
21 | - 提前编译调度和执行逻辑
22 | - 支持dynamic shapes, flow control, streaming和其他高级模型功能
23 | - 针对许多 CPU 和 GPU 架构进行了优化
24 | - 低开销、流水线执行以实现高效的功率和资源使用
25 | - 嵌入式系统上的二进制文件大小低至 30KB
26 | - 调试和分析支持
27 |
28 | ## 2. IREE 结构
29 |
30 | IREE对ML模型编译采用整体方法(holistic approach):生成的IR既包含==调度逻辑==,又包括==执行逻辑==。
31 |
32 | > 调度逻辑:需要将数据依赖性传达给低级并行流水线硬件/API (low-level parallel pipelined hardware/API)(如 [Vulkan](https://www.khronos.org/vulkan/))。
33 | >
34 | > 执行逻辑:将硬件上的密集计算编码为特定于硬件/API 的二进制文件,如[SPIR-V](https://www.khronos.org/spir/)。
35 |
36 |
37 |
38 | a) **导入您的模型**
39 |
40 | [使用受支持的框架](https://iree-org.github.io/iree/getting-started/#supported-frameworks)之一开发程序,然后使用 IREE 的导入工具之一运行模型。
41 |
42 | b) **选择您的[硬件部署配置](https://iree-org.github.io/iree/deployment-configurations/)**
43 |
44 | 确定目标平台、加速器和其他限制。
45 |
46 | c) **编译你的模型**
47 |
48 | 通过 IREE 编译,根据您的部署配置选择编译目标。
49 |
50 | d) **运行你的模型**
51 |
52 | 使用 IREE 的运行时组件来执行编译后的模型。
53 |
54 | ## 3. IREE Compiler
55 |
56 | - **IREE Compiler (LLVM Target)**
57 |
58 |
59 |
60 | 大多数转换都发生在 Linalg Dialect 中,在 tensor 或者 buffer 级别,以及 bufferization 过程(tensor向buffer转换)。执行文件的首选路径是**lower到 Vector Dialect**,在这里可以进行额外的转换。当从 Linalg Dialect 往下 lowering 时,SCF 可用于围绕向量操作的控制流(control flow around vector operations),但对这些操作不执行任何转换。去生成 SCF Dialect 本质上意味着不再进行进一步的结构优化。Vector Dialect 可以逐步 lower 到复杂度较低的抽象,直到最终生成 LLVM Dialect。
61 |
62 | - **IREE Compiler (SPIR-V Target)**
63 |
64 |
65 |
66 | [SPIR-V](https://mlir.llvm.org/docs/Dialects/SPIR-V/)(Standard Portable Intermediate Representation, [Khronos group](https://www.khronos.org/spir/) standard.)是IREE编译器的主要目标。顶层流程类似于生成 LLVM IR 的流程,**大多数转换都发生在 Linalg-on-tensor 和 Vector 级别上**。从这里开始,lowering 倾向于直接转到 SPIR-V ,SPIR-V 具有一组跨越多个抽象级别的丰富操作集,操作集中包含:高级操作、结构化控制流和类指令的原语(high-level operations, structured control flow and instruction-like primitives)。该流程通过 GPU Dialect 进行 device-only operations,如工作项标识符提取,并依赖 IREE 的 runtime 来管理 GPU 内核。
67 |
68 | > SPIR-V 最初发布于 2015 年。SPIR-V 是多个 Khronos API 共用的中间语言,包括 Vulkan, OpenGL, 以及 OpenCL。
69 | >
70 | > Khronos Group 的标语是“连接软件与硬件”,简明扼要地总结了它的任务。这种连接是通过标准规范 (standard) 和编程接口。**Khronos Group 定义标准规范以及编程接口;硬件厂商提供它们的硬件实现,软件厂商则可以让软件在所有支持的平台与设备上运行。**Khronos Group 定义维护了很多标准规范,比较著名的有 Vulkan, OpenGL, 以及 OpenCL。
71 | >
72 | > SPIR-V 支持通过多种机制来扩展其功能,包括添加新的枚举值,引入新的扩展 (extension),或者通过某个命名空间引入一整套指令 (extended instruction set)。其扩展也分为不同等级——厂商自有扩展 (vendor specific)、多厂商联合支持的扩展 (EXT)、 以及 Khronos 级别的扩展 (KHR)。
73 |
74 | 最近的一些工作实现了 允许 IREE 从 Vector Dialect 转换到 GPU Dialect,将 GPU 线程暴露为向量通道(在warp或block级别)。类似地,有些工作中实现了 绕过中间阶段,直接从 Linalg 和 Vector 转换到 SPIR-V,但可能会被渐近式的 lowering 方法取代。
75 |
76 |
77 |
78 | ## 4. IREE opt
79 |
80 | > 在https://github.com/iree-org/iree/commit/823fe5ace7285e5fda555ef12dbb029a130e73ef中提到
81 | >
82 | > "iree-hlo-to-linalg-on-tensors" 改成了 "iree-codegen-hlo-to-linalg-on-tensors",
83 |
84 | iree-opt -h | grep hlo
85 | --iree-codegen-flow-hlo-to-hlo-preprocessing - Apply hlo to hlo transformations for some hlo ops
86 | --iree-codegen-hlo-to-linalg-on-buffers - Convert from XLA-HLO ops to Linalg ops on buffers
87 | --iree-codegen-hlo-to-linalg-on-tensors - Convert from XLA-HLO ops to Linalg ops on tensors
88 | --iree-codegen-shape-convert-hlo - Converts dynamic shape dependent HLO ops to shaped variants.
89 | --lhlo-legalize-to-linalg - Legalize from LHLO dialect to Linalg dialect
90 | --hlo-legalize-to-linalg - Legalize from HLO dialect to Linalg dialect
91 |
92 | 编译参考:https://openxla.github.io/iree/building-from-source/getting-started/#prerequisites
93 |
94 | 编译好的iree-opt在`iree-build/tools`
95 |
96 | ## 5. IREE 发展路线
97 |
98 | 待翻译:
99 |
100 | https://github.com/openxla/iree/blob/main/docs/developers/design_roadmap.md
101 |
102 |
103 |
104 |
--------------------------------------------------------------------------------
/ai_compiler/IREE/evaluate/img_benchmark-module/compilation_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/evaluate/img_benchmark-module/compilation_flow.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_IREE_Survey/v2-5b69d56e33512deeb65eda364c343859_1440w.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/v2-5b69d56e33512deeb65eda364c343859_1440w.webp
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_IREE_Survey/v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_IREE_Survey/截屏2022-12-07 17.43.39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/截屏2022-12-07 17.43.39.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_IREE_Survey/截屏2022-12-07 21.42.13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/截屏2022-12-07 21.42.13.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_IREE_Survey/截屏2023-02-28 09.31.38.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/截屏2023-02-28 09.31.38.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_IREE_Survey/截屏2023-02-28 09.31.47.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/截屏2023-02-28 09.31.47.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-01_18.33.01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-01_18.33.01.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-02_12.36.57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-02_12.36.57.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-02_12.42.52.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-02_12.42.52.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/20200130-IREE_Jan_2020_MLIR_ODM-_External.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/20200130-IREE_Jan_2020_MLIR_ODM-_External.pdf
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/20200820-IREE_CodeGen_-_Public.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/20200820-IREE_CodeGen_-_Public.pdf
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/20210609_-_IREE_Runtime_Design_Slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/20210609_-_IREE_Runtime_Design_Slides.pdf
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/20220505-IREE_targeting_Vulkan_Zhang_May22.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/20220505-IREE_targeting_Vulkan_Zhang_May22.pdf
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/HALDialect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/HALDialect.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/HALOps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/HALOps.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 1.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 10.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 11.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 12.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 13.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 14.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 15.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 16.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 17.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 18.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 19.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 2.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 20.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 21.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 22.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 23.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 24.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 24.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 25.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 25.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 26.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 26.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 27.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 27.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 28.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 28.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 29.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 29.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 3.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 30.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 4.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 5.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 6.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 7.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 8.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled 9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 9.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/img_会议文件/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/pipeline/img_linalg-vector-gpu-llvm/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_linalg-vector-gpu-llvm/Untitled 1.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/pipeline/img_linalg-vector-gpu-llvm/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_linalg-vector-gpu-llvm/Untitled.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/pipeline/img_pipeline/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_pipeline/Untitled 1.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/pipeline/img_pipeline/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_pipeline/Untitled 2.png
--------------------------------------------------------------------------------
/ai_compiler/IREE/pipeline/img_pipeline/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_pipeline/Untitled.png
--------------------------------------------------------------------------------
/ai_compiler/LLVM/LLVM简介.md:
--------------------------------------------------------------------------------
1 | # LLVM
2 |
3 | > 定义:LVM项目是模块化、可重用的编译器以及工具链技术的集合.
4 |
5 | ## 1. 传统的编译器架构
6 |
7 | 
8 |
9 | - Frontend:前端
10 |
11 | ==词法==分析、==语法==分析、==语义==分析、==生成中间代码==
12 |
13 | - Optimizer:优化器
14 |
15 | 中间代码优化
16 |
17 | - Backend:后端
18 |
19 | 生成机器码
20 |
21 | ## 2. LLVM架构
22 |
23 | 
24 |
25 | - 不同的前端后端使用统一的中间代码LLVM ==Intermediate Representation==(LLVM IR)
26 |
27 | - 如果需要支持一种新的==编程语言==,那么只需要实现一个新的==前端==
28 | - 如果需要支持一种新的==硬件设备==,那么只需要实现一个新的==后端==
29 | - 优化阶段是一个通用的阶段,它正对的是统一的 ==LLVM IR==,不论是哪种编程语言或那种硬件设备,都不需要对优化阶段进行修改
30 | - 相比之下,GCC的前端和后端没有分得太开,前端和后端耦合在了一起,所以GCC为了支持一门新的语言或者新的目标平台,就变得特别困难
31 |
32 | > 相比之下,GCC的前端和后端没有分得太开,前端和后端耦合在了一起,所以GCC为了支持一门新的语言或者新的目标平台,就变得特别困难
33 |
34 | - LLVM现在被作为实现各种静态和运行时编译语言的通用基础结构
35 |
36 | ### Clang
37 |
38 | Clang是LLVM项目的一个子项目,是基于LLVM框架的**C/C++/Objective-C编译器的前端**。
39 |
40 | **相较于GCC,Clang具有以下优点:**
41 |
42 | - 编译速度快
43 | - 占用内存小:Clang生成的AST占有内存只为GCC的五分之一左右
44 | - 模块化设计:Clang采用基于库的模块化设计
45 | - 诊断信息可读性强:在编译过程中,Clang创建并保留了大量详细的元数据,有利于调试和错误报告
46 | - 易于扩展
47 |
48 | ### Clang和LLVM的关系
49 |
50 | 
51 |
52 | clang是llvm整体框架的前端
53 |
54 | 源代码(c/c++) $\rightarrow$ 经过clang $\rightarrow$ 中间代码 $\rightarrow$ 经过一系列的优化(pass) $\rightarrow$ 机器码
55 |
56 | ## 3. OC源文件的编译过程
57 |
58 | Xcode创建一个Test项目,然后cd到main.m的上一路径。
59 | 命令行查看编译的过程:$ clang -ccc-print-phases main.m
60 |
61 | ```cpp
62 | $ clang -ccc-print-phases main.m
63 |
64 | 0: input, "main.m", objective-c
65 | 1: preprocessor, {0}, objective-c-cpp-output
66 | 2: compiler, {1}, ir
67 | 3: backend, {2}, assembler
68 | 4: assembler, {3}, object
69 | 5: linker, {4}, image
70 | 6: bind-arch, "x86_64", {5}, image
71 | ```
72 |
73 | 找到main.m文件 $\rightarrow$ 预处理器,处理include、import、宏定义 $\rightarrow$ 编译器编译,生成ir中间代码 $\rightarrow$ 后端,生成目标代码 $\rightarrow$ 汇编 $\rightarrow$ 链接其他动态库静态库 $\rightarrow$ 编译成适合某个架构的代码
74 |
75 | 查看preprocessor(预处理器)的处理结果:$ clang -R main.m
76 |
77 | 会打印出大量信息
78 |
79 | ```cpp
80 | # 1 "main.m"
81 | # 1 "" 1
82 | # 1 "" 3
83 | # 353 "" 3
84 | # 1 "" 1
85 | # 1 "" 2
86 | # 1 "main.m" 2
87 | .
88 | .
89 | .
90 | int main(int argc, const char * argv[]) {
91 | @autoreleasepool {
92 | NSLog(@"Hello, World!");
93 | }
94 | return 0;
95 | }
96 | ```
97 |
98 | ### 词法分析(生成多个token)
99 |
100 | 词法分析,生成Token: `$ clang -fmodules -E -Xclang -dump-tokens main.m`
101 |
102 | 将代码分成一个个小单元(token)
103 |
104 | ```cpp
105 | void test(int a, int b){
106 | int c = a + b - 3;
107 | }
108 | ```
109 |
110 | 生成
111 |
112 | ```cpp
113 | void 'void' [StartOfLine] Loc=
114 | identifier 'test' [LeadingSpace] Loc=
115 | l_paren '(' Loc=
116 | int 'int' Loc=
117 | identifier 'a' [LeadingSpace] Loc=
118 | comma ',' Loc=
119 | int 'int' [LeadingSpace] Loc=
120 | identifier 'b' [LeadingSpace] Loc=
121 | r_paren ')' Loc=
122 | l_brace '{' Loc=
123 | int 'int' [StartOfLine] [LeadingSpace] Loc=
124 | identifier 'c' [LeadingSpace] Loc=
125 | equal '=' [LeadingSpace] Loc=
126 | identifier 'a' [LeadingSpace] Loc=
127 | plus '+' [LeadingSpace] Loc=
128 | identifier 'b' [LeadingSpace] Loc=
129 | minus '-' [LeadingSpace] Loc=
130 | numeric_constant '3' [LeadingSpace] Loc=
131 | semi ';' Loc=
132 | r_brace '}' [StartOfLine] Loc=
133 | eof '' Loc=
134 | ```
135 |
136 | 可以看出,词法分析时,上面的代码被拆分成一个个token,后面数字表示某一行的第几个字符,例如第一个void,表示18行第一个字符。
137 |
138 | ### 语法分析(生成AST)
139 |
140 | 语法分析后生成语法树(Abstract Syntax Tree):`$ clang -fmodules -fsyntax-only -Xclang -ast-dump main.m`
141 |
142 | ```cpp
143 | |-FunctionDecl 0x7fa1439f5630 line:18:6 test 'void (int, int)'
144 | | |-ParmVarDecl 0x7fa1439f54b0 col:15 used a 'int'
145 | | |-ParmVarDecl 0x7fa1439f5528 col:22 used b 'int'
146 | | `-CompoundStmt 0x7fa142167c88
147 | | `-DeclStmt 0x7fa142167c70
148 | | `-VarDecl 0x7fa1439f5708 col:9 c 'int' cinit
149 | | `-BinaryOperator 0x7fa142167c48 'int' '-'
150 | | |-BinaryOperator 0x7fa142167c00 'int' '+'
151 | | | |-ImplicitCastExpr 0x7fa1439f57b8 'int'
152 | | | | `-DeclRefExpr 0x7fa1439f5768 'int' lvalue ParmVar 0x7fa1439f54b0 'a' 'int'
153 | | | `-ImplicitCastExpr 0x7fa1439f57d0 'int'
154 | | | `-DeclRefExpr 0x7fa1439f5790 'int' lvalue ParmVar 0x7fa1439f5528 'b' 'int'
155 | | `-IntegerLiteral 0x7fa142167c28 'int' 3
156 |
157 | `-
158 | ```
159 |
160 | AST图形化如下显示
161 |
162 | 
163 |
164 | ### 语义分析(生成中间代码 LLVM IR)
165 |
166 | LLVM IR有三种表示形式(本质是等价的)
167 |
168 | - text:便于阅读的文本格式,类似于汇编,扩展名 .II, $ clang -S -emit-llvm main.m
169 | - memery:内存格式
170 | - bitcode:二进制格式,扩展名 .bc, $ clang -c -emit-llvm main.m
171 |
172 | 以text形式编译查看
173 |
174 | ```cpp
175 | ; Function Attrs: noinline nounwind optnone ssp uwtable
176 | define void @test(i32, i32) #2 {
177 | %3 = alloca i32, align 4
178 | %4 = alloca i32, align 4
179 | %5 = alloca i32, align 4
180 | store i32 %0, i32* %3, align 4
181 | store i32 %1, i32* %4, align 4
182 | %6 = load i32, i32* %3, align 4
183 | %7 = load i32, i32* %4, align 4
184 | %8 = add nsw i32 %6, %7
185 | %9 = sub nsw i32 %8, 3
186 | store i32 %9, i32* %5, align 4
187 | ret void
188 | }
189 | ```
190 |
191 | ## 4. IR基本语法
192 |
193 | 注释以分号 ; 开头
194 | 全局标识符以@开头,局部标识符以%开头
195 | alloca,在当前函数栈帧中分配内存
196 | i32,32bit,4个字节的意思
197 | align,内存对齐
198 | store,写入数据
199 | load,读取数据
200 | 官方语法参考[LLVM Language Reference Manual — LLVM 16.0.0git documentation](https://llvm.org/docs/LangRef.html)
201 |
202 | 
203 |
204 |
205 |
206 |
207 |
208 |
--------------------------------------------------------------------------------
/ai_compiler/LLVM/img_LLVM简介/webp-1664801155586-3.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_LLVM简介/webp-1664801155586-3.webp
--------------------------------------------------------------------------------
/ai_compiler/LLVM/img_LLVM简介/webp-1664801163209-6.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_LLVM简介/webp-1664801163209-6.webp
--------------------------------------------------------------------------------
/ai_compiler/LLVM/img_LLVM简介/webp-1664802251158-9.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_LLVM简介/webp-1664802251158-9.webp
--------------------------------------------------------------------------------
/ai_compiler/LLVM/img_LLVM简介/webp.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_LLVM简介/webp.webp
--------------------------------------------------------------------------------
/ai_compiler/LLVM/img_TableGen/type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_TableGen/type.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/MLIR_Note.md:
--------------------------------------------------------------------------------
1 | # [MLIR] Code Note
2 |
3 | https://github.com/tfruan2000/tfruan2000.github.io/blob/main/_posts/MLIR/2024-08-07-mlir-code-note.md
4 |
5 | https://tfruan2000.github.io/posts/mlir-code-note/
6 |
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/0.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/1.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/2.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/3.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/4.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/5.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/6.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/7.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/7.jpeg
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_CodeGen_summary/cover1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/cover1.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Note/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Note/Untitled 1.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Note/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Note/Untitled 2.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Note/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Note/Untitled.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/cover1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/cover1.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/cover2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/cover2.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-111.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-111.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-121.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-131.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-131.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-211.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-211.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-212.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-212.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-213.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-213.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-221.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-221.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-231.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-231.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-232.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-232.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-251.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-251.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-301.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-301.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-302.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-302.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-321.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-321.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-322.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-322.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-331.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-331.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-332.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-332.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-334.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-334.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-342.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-342.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-431.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-431.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-511.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-511.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-512.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-513.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-513.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-521.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-521.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-522.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-522.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-523.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-523.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-524.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-524.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/img_MLIR_Survey/image-611.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-611.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/meeting pdf/2021-10-07-The-Torch-MLIR-project.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/meeting pdf/2021-10-07-The-Torch-MLIR-project.pdf
--------------------------------------------------------------------------------
/ai_compiler/MLIR/meeting pdf/Structured Ops in MLIR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/meeting pdf/Structured Ops in MLIR.pdf
--------------------------------------------------------------------------------
/ai_compiler/MLIR/meeting pdf/Tensor Codegen Thoughts - Jan 23, 2020 MLIR- external ODM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/meeting pdf/Tensor Codegen Thoughts - Jan 23, 2020 MLIR- external ODM.pdf
--------------------------------------------------------------------------------
/ai_compiler/MLIR/meeting pdf/Tutorial-AminiVasilacheZinenko-MLIR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/meeting pdf/Tutorial-AminiVasilacheZinenko-MLIR.pdf
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/hlo2linalg.md:
--------------------------------------------------------------------------------
1 | # hlo到linalg
2 |
3 | > 在mlir中,hlo是一个广泛使用的高级表示,能够承接不同的前端框架输入;**linalg方言是目前比较重要的一层方言**,包括寒武纪在内的很多公司都使用了这一层的方言,在他的基础上去干**编译优化、调优和后端代码生成**。熟悉这两种方言并了解从 hlo→ linalg 的下降流程,看现在的支持情况和以后我们如何在上面添加支持。
4 |
5 | 两种方向:
6 |
7 | - TF→xla_hlo→IREE Flow→LinAlg→... (IREE使用的模式)
8 | - TF→xla_mhlo→xla_lhlo→... (TF 代码生成策略)
9 |
10 | > [HLO to LinAlg on buffers - no conversion](https://github.com/iree-org/iree/issues/2011#top)
11 | >
12 | > HLO → LHLO过程会完成buffer的分配,而 IREE 会事先进行缓冲区分配。
13 | >
14 | > HLO+XLA buffer assignment → lhlo
15 | >
16 | > LHLO存在是为了保留XLA的强大机制,用于 HLO 级别的layout/buffer allocation(和其他优化)并重新进入MLIR Codegen
17 | >
18 | > 现在努力地将IREE的相关代码复制到TF Codegen中,这样来实现去除LHLO
19 |
20 | 使用到的工具链:tf-opt、mlir-hlo-opt,mlir编译完成后只有mlir-opt,[过程中使用到的工具链编译](codegen工具链)
21 |
22 | **之前了解的从hlo到linalg流程**
23 |
24 | `tf-opt train.mhlo.mlir --hlo-legalize-to-linalg -o train.linalg.mlir`
25 |
26 |
27 |
28 |
29 |
30 |
31 | 完整的流程:模型文件 → TF dialect(tf_executor Dialect → tf Dialect)→ xla_hlo→xla_lhlo → linalg Dialect
32 |
33 | 
34 |
35 | 下文参考:https://discourse.llvm.org/t/llvm-ir-segmentation-fault-core-dumped/4302/1
36 |
37 | 1. 从模型 .pbtxt翻译为tf_executor Dialect (得到add.mlir)
38 |
39 | 使用`tf-mlir-translate`工具进行翻译,其中各种选项指定了输入输出的类型以及尺寸
40 |
41 | ```cmake
42 | $ tf-mlir-translate -graphdef-to-mlir -tf-enable-shape-inference-on-import=false add.pbtxt -tf-input-arrays=input0,input1 -tf-input-data-types=DT_INT32,DT_INT32 -tf-input-shapes=10:10 -tf-output-arrays=Add -o add.mlir
43 | ```
44 |
45 | 2. tf_executor Dialect → tf Dialect (得到add-func.mlir)
46 |
47 | ```cmake
48 | $ tf-opt -tf-executor-to-functional-conversion add.mlir -o add-func.mlir
49 | ```
50 |
51 | 3. tf Dialect → mhlo Dialect (得到add-mhlo.mlir)
52 |
53 | ```cmake
54 | $ tf-opt --tf-to-hlo-pipeline add-func.mlir -o add-mhlo.mlir
55 | ```
56 |
57 | 4. mhlo Dialect → lhlo Dialect (得到add-lhlo.mlir)
58 |
59 | ```cmake
60 | $ mlir-hlo-opt add-mhlo.mlir -hlo-legalize-to-lhlo -o add-lhlo.mlir
61 | ```
62 |
63 | 5. lhlo Dialect → linalg Dialect (得到add-linalg.mlir)
64 |
65 | ```cmake
66 | $ tf-opt add-lhlo.mlir -lhlo-legalize-to-linalg -o add-linalg.mlir
67 | ```
68 |
69 | 到此为止,这一阶段的各种表达式变换以及工具链是由TensorFlow社区维护的。从这一步往后的编译过程以及基础设施,就是由MLIR社区进行维护的了。
70 |
71 | 6. linalg Dialect → Scf Dialect (得到add-scf.mlir)
72 |
73 | ```cmake
74 | mlir-opt add-linalg.mlir -convert-linalg-to-loops -o add-scf.mlir
75 | ```
76 |
77 | 7. Scf Dialect → Std Dialect (得到add-std.mlir)
78 |
79 | ```cmake
80 | mlir-opt add-scf.mlir -convert-scf-to-std -o add-std.mlir
81 | ```
82 |
83 | 8. Std Dialect → LLVM Dialect (得到add-llvm.mlir)
84 |
85 | ```cmake
86 | mlir-opt add-std.mlir -convert-std-to-llvm -o add-llvm.mlir
87 | ```
88 |
89 | 9. LLVM Dialect → LLVM IR file (得到add.ll)
90 |
91 | ```cmake
92 | mlir-translate add-llvm.mlir -mlir-to-llvmir -o add.ll
93 | ```
94 |
95 | 10. Use `lli` to run the .ll file
96 |
97 |
98 |
99 | 
100 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/img_hlo2linalg/1251718-20210923060706363-1852351942.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_hlo2linalg/1251718-20210923060706363-1852351942.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/img_hlo2linalg/3-7236750.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_hlo2linalg/3-7236750.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/img_hlo2linalg/截屏2023-02-13 17.45.49.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_hlo2linalg/截屏2023-02-13 17.45.49.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/img_linalg/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_linalg/3.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/img_linalg/73613629-c5586580-45c5-11ea-94b7-074aeea94c7b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_linalg/73613629-c5586580-45c5-11ea-94b7-074aeea94c7b.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/img_linalg/73613904-2f720a00-45c8-11ea-8265-1c856c02525b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_linalg/73613904-2f720a00-45c8-11ea-8265-1c856c02525b.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/img_matmul性能测试/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_matmul性能测试/Untitled.png
--------------------------------------------------------------------------------
/ai_compiler/MLIR/pipeline/linalg.md:
--------------------------------------------------------------------------------
1 | # linalg
2 |
3 | > https://mlir.llvm.org/docs/Dialects/Linalg/
4 | >
5 | >
6 | > OpGraph: Graph of Tensor ops in MLIR
7 | >
8 | > TSOWB(e.g. LHLO): Target Special ops with Buffers
9 | >
10 | > CGASel: CodeGen Alogrithm Selector,图分治算法(搜索或RL)
11 | >
12 | > HHO(e.g. Linalg):
13 | >
14 | > MHA: Memory Hierarchy Abstraction,循环层级
15 | >
16 | > HLTSIR: High Level Target Special IR,vector+target intrinsics
17 | >
18 | > TSIR(e.g. llamas): Target Special IR,寄存器分配、调度、机器码生成
19 | >
20 | > https://mlir.llvm.org/docs/Rationale/RationaleLinalgDialect/
21 |
22 | [Linalg Dialect](https://mlir.llvm.org/docs/Dialects/Linalg/) 中,基于结构化数据对结构化计算使用了通用的表示形式(a versatile representation of structured computation on structured data)。这种dialect是为了transformations而专门设计出来的,**只需要很少量的分析就可以完成转换**;并且它**同时支持 tensor 和 buffer 作为操作数**(在tensor和memref容器上运行的更高级别的计算原语),bufferization 过程(实现tensor到buffer的转换)也可以在不改变操作本身完成。
23 |
24 | 此外, Linalg Dialect 提供了具有特定负载的 [“named” operations ](https://mlir.llvm.org/docs/Dialects/Linalg/#named-payload-carrying-opsa-namenamed_opsa)(如:矩阵乘法和卷积),也提供了用于定义 structure 的 [“generic” operations](https://mlir.llvm.org/docs/Dialects/Linalg/#payload-carrying-opsa-namepayload_opsa)。这两种形式之间可以互相转换。Linalg Dialect 的迭代结构允许它们转换为向量(vector)操作,以及基于向量或标量操作的(仿射,Affine Dialect)循环。
25 |
26 | > 结构化的代码具有高度可组合性和可重用性:tiling和fusion转换在各个数据结构阶段都是完全通用的
27 | >
28 | > 结构化Ops为模式匹配和重写提供了自然锚点。
29 |
30 |
31 |
32 |
33 |
34 | 1. 只需要很少量的分析就可以完成转换:
35 |
36 | **Linalg ==generic op== 本质是多层完美嵌套循环(perfect loop nest)的 op 化表示。**
37 |
38 | > Linalg generic op 里面用 indexing map 来隐性表示每层循环与输入输出的 access 关系,用附加的 region 表示针对这些输入输出进行的计算。
39 |
40 | - linalg op 通过其 indexing map 来指定循环变量 (loop induction variable) 如何访问 (access) 操作数 (operand) 以及结果 (result)。
41 | - linalg op region 内的负载操作则指定了循环内部所进行的计算。
42 |
43 | (1)linalg op背后**统一的结构** `-->` 有助于简化转换的逻辑。 因为转换只需要针对 indexing map 以及 region 进行操作,而无需考虑这具体是哪一个 linalg op。(匹配 indexing map和region的情况,而不是匹配linalg op)
44 |
45 | (2)Linalg generic op本质是perfect loop nest的op化表示`-->` 针对 loop 做各种 transformation 的时候不可能存在非完美的情况,这样可以取消用来检测和维持 loop 完美性的逻辑,避免复杂的分析。for example, loop tiling can be applied to the loop nest computing a matrix multiplication, no need to additionally rely on affine dependence analysis to check this
46 |
47 |
48 |
49 | ==named ops== 基本就是 generic ops 上面提供的 sugar:每个 named op 都有明确的隐性的 indexing map 和 compute region,它们定义了一个 named op。**named op 是可以和 generic op 相互转换的。**
50 |
51 | named ops 存在的作用是和上层对接变得简单。**算子层到 Linalg 层可以直接产生这些 named ops。但是在 Linalg 以及以下的层次上,transformation 主要操作的是 generic ops**,确切地说是 generic ops 背后的 [op interface](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td)。这两种形式之间可以互相转换,编译器 transformation 不需要修改,因为这些 named ops 都有同样的 op interface,既有 transformation 可以直接操作。
52 |
53 |
54 |
55 | 2. linalg中同时存在tensor和buffer的表示
56 |
57 | | Tensor | Buffer |
58 | | --------------------------------------------------------- | ------------------------------------------------------------ |
59 | | 不一定与内存相关联的不可变值 (immutable values),重写简单 | 可变的,可能会受到混叠的影响(多个对象可能指向同样的底层存储位置) |
60 | | 高层级运算(TF、torch、HLO) | 框架(组合、卷积) --> 结构(loop、vector) --> 编程模式(SIMD) |
61 |
62 | tensor到buffer的转换是通过bufferization完成
63 |
64 |
65 |
66 |
67 |
68 | 3. 下图是TF到LLVM IR的一种codegen
69 |
70 | 从 [MHLO ](https://github.com/tensorflow/mlir-hlo#meta-hlo-dialect-mhlo)去生成 `Linalg-on-tensors`(转换局限在tensor层面,其目的并非递降,而是为接下来的转换做准备),**并在 Linalg 上调用 bufferization 之前,在该级别上执行融合(IREE也是专注于tensor级别的fusion)。**进一步的循环转换(loop transformations)(如tiling)发生在 SCF Dialect 级别,然后转换为 target-specific GPU dialect;而有效负载操作(payload operations)则先转换为 Standard Dialect 再转换为 LLVM Dialect。
71 |
72 |
73 |
74 |
75 |
76 | 4. https://mlir.llvm.org/docs/Rationale/RationaleLinalgDialect/
77 |
78 | 利用线性代数语义定义ops和转换:Linalg **defines ops and transformations declaratively** and was originally restricted to ops with *linear-algebra like* semantics (`pointwise`, `matmul`, `conv`…).
79 |
80 | 针对 dense tensors 使用较多
81 |
82 | 尽可能地保持信息:This information captures the legality and applicability of transformations and is **not lost by lowering prematurely to loop or CFG form**.
83 |
84 | 使用混合抽象:reflect on and integrate the key elements of the prior art success as well as avoid the common pitfalls in the area of code generation
85 |
86 | 本地改写机制:uses local rewrite rules implemented with the MLIR [Declarative Rewrite Rules](https://mlir.llvm.org/docs/DeclarativeRewrites/)
87 |
88 |
89 |
90 | 5.
91 |
92 | 
93 |
94 | 
95 |
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-30_12.08.14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-30_12.08.14.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_10.33.29.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_10.33.29.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_15.15.02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_15.15.02.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_19.55.57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_19.55.57.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_21.08.19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_21.08.19.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_21.34.01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_21.34.01.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-06-01_11.56.31.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-06-01_11.56.31.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_MLC课程/image-20230329133901335.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/image-20230329133901335.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_vectorize 和 tensorize Pass/image-20230329133901335.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_vectorize 和 tensorize Pass/image-20230329133901335.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_vectorize 和 tensorize Pass/v2-ee6ca5e08aee17b8f9998dd3a3da75c1_r.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_vectorize 和 tensorize Pass/v2-ee6ca5e08aee17b8f9998dd3a3da75c1_r.jpg
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/4type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/4type.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/4type2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/4type2.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/DNN1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/DNN1.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/DNN2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/DNN2.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/TVMSoftwareStack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/TVMSoftwareStack.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/TVMflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/TVMflow.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/all_unity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/all_unity.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/automation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/automation.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/cooperate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/cooperate.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/horizontal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/horizontal.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/now_q1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/now_q1.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/unify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/unify.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_简介/unity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/unity.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/201907192304343.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/201907192304343.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220925220059169.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220059169.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220925220149543.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220149543.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220925220653571.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220653571.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220925220718150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220718150.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220925220751150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220751150.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220925222016904.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925222016904.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220925234257698.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925234257698.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926002042777.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926002042777.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926002050138.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926002050138.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926002056920.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926002056920.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926085918902.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926085918902.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926090051685.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926090051685.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926090558359.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926090558359.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926090943230.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926090943230.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926095351317.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926095351317.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926100008289.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926100008289.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/image-20220926100817561.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926100817561.png
--------------------------------------------------------------------------------
/ai_compiler/TVM/img_编译安装/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MjA4MTM4OQ==,size_16,color_FFFFFF,t_70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MjA4MTM4OQ==,size_16,color_FFFFFF,t_70.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/cta_wrap_thread.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/cta_wrap_thread.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/cuda_triton.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/cuda_triton.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/cuda_vs_triton.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/cuda_vs_triton.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/distribute_layout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/distribute_layout.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/gpu_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/gpu_arch.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/layout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/layout.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/swizzled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/swizzled.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/triton_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/triton_arch.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_base/triton_arch_now.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/triton_arch_now.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_language/load.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/load.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_language/loadpid0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/loadpid0.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_language/loadpid1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/loadpid1.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_language/loadpid2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/loadpid2.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_language/store.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/store.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_linalg/bqb1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/bqb1.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_linalg/dialect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/dialect.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_linalg/diff_with_triton_shared.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/diff_with_triton_shared.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_linalg/mlir_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/mlir_pipeline.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_linalg/opt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/opt.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/img_Triton_linalg/success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/success.png
--------------------------------------------------------------------------------
/ai_compiler/Triton/编译安装.md:
--------------------------------------------------------------------------------
1 | # Triton编译安装
2 |
3 | ## 配置python环境
4 |
5 | 建议使用conda配置,选择python3.10会稳定些
6 |
7 | `conda create -n triton_env python=3.10`
8 |
9 | - 根据cuda版本安装pytorch(gpu版)
10 |
11 | 例如我用的是cuda11.8,那么
12 |
13 | ```bash
14 | conda install pytorch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 pytorch-cuda=11.8 -c pytorch -c nvidia
15 | ```
16 |
17 | 详细见官网:https://pytorch.org/get-started/previous-versions/
18 |
19 | - 安装常见的包
20 |
21 | numpy matplotlib pybind11 lit pytest isort pandas tabulate scipy flake8 autopep8
22 |
23 | pybind11安装后需要配置环境变量,否则会找不到头文件
24 |
25 | ```bash
26 | export PYBIND_INCLUDE_PATH=/xxxx/miniconda/envs/triton_env/lib/python3.10/site-packages/pybind11/include
27 | ```
28 |
29 | 下面的源挺好用的 `vim ~/.condarc`
30 |
31 | ```bash
32 | show_channel_urls: true
33 | channels:
34 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
35 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
36 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
37 | - defaults
38 | auto_activate_base: false
39 | ```
40 |
41 | ## 捷径
42 |
43 | ```bash
44 | git clone https://github.com/triton-lang/triton.git
45 | ```
46 |
47 | clone llvm 很难搞,如果不用修改源码,就直接安装吧
48 |
49 |
50 | ```bash
51 | pip install git+https://github.com/LLNL/hatchet
52 |
53 | pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly --use-deprecated legacy-resolver
54 | ```
55 |
56 | 运行一下
57 |
58 | ```bash
59 | Python 3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0] on linux
60 | Type "help", "copyright", "credits" or "license" for more information.
61 | >>> import triton
62 | >>> triton.__version__
63 | '3.0.0'
64 |
65 |
66 | cd triton/python/tutorials/
67 | python 03-matrix-multiplication.py
68 | ```
69 |
70 |
71 | ## 编译llvm
72 |
73 | ```bash
74 | git clone https://github.com/triton-lang/triton.git
75 | git clone https://github.com/llvm/llvm-project.git
76 | ```
77 |
78 | 如果拉取出现下面报错,在repo内输入 `git config --global http.postBuffer 1024288000`
79 |
80 | ```bash
81 | remote: Compressing objects: 100% (1151/1151), done.
82 | error: RPC failed; result=18, HTTP code = 200| 592.00 KiB/s
83 | fatal: The remote end hung up unexpectedly
84 | fatal: 过早的文件结束符(EOF)
85 | fatal: index-pack failed
86 | ```
87 |
88 | - 切换llvm commit
89 |
90 | git checkout xxx,其中xxx是triton对应的llvm版本号,可以使用 `cat triton/cmake/llvm-hash.txt` 找到
91 |
92 | - build
93 |
94 | cmake 版本要求3.20以上,记得安装ninja。如果没有root权限就下编译好的二进制,解压后加PATH即可。
95 |
96 | ```bash
97 | cd xxxpath/llvm-project
98 | mkdir build && cd build
99 |
100 | cmake -G Ninja ../llvm \
101 | -DLLVM_ENABLE_PROJECTS="mlir;llvm" \
102 | -DLLVM_BUILD_EXAMPLES=ON \
103 | -DLLVM_TARGETS_TO_BUILD="X86;NVPTX;AMDGPU" \
104 | -DMLIR_ENABLE_CUDA_RUNNER=ON \
105 | -DCMAKE_BUILD_TYPE=Release \
106 | -DLLVM_ENABLE_ASSERTIONS=ON \
107 | -DLLVM_ENABLE_RTTI=ON \
108 | -DLLVM_INSTALL_UTILS=ON \
109 | -DMLIR_INCLUDE_INTEGRATION_TESTS=ON \
110 | -DCMAKE_INSTALL_PREFIX="xxxpath/tools_build/llvm"
111 |
112 | ninja -j32
113 | ninja install
114 |
115 | cmake --build . --target check-mlir
116 | ```
117 |
118 | 编译时target只能是"X86;NVPTX;AMDGPU",如果多了riscv,后续编译出的libtriton.so是有问题的,会报错
119 |
120 | ```bash
121 | ImportError: /lustre/S/ruantingfeng/triton/triton_repo/python/triton/_C/libtriton.so: undefined symbol: LLVMInitializeRISCVAsmParser
122 | ```
123 |
124 | - 增加环境变量在.bashrc
125 |
126 | ```bash
127 | export PATH=xxxpath/tools_build/llvm/bin:$PATH
128 | export LLVM_BUILD_DIR=xxxpath/tools_build/llvm
129 | export LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include
130 | export LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib
131 | export LLVM_SYSPATH=$LLVM_BUILD_DIR
132 | ```
133 |
134 | ## 编译triton
135 |
136 | ```bash
137 | cd xxxpath/triton
138 | conda actiave triron_env
139 | LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include \
140 | LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib \
141 | LLVM_SYSPATH=$LLVM_BUILD_DIR \
142 | pip install -e python
143 | ```
144 |
145 | 编译好的内容在 `xxxpath/triton/python/build` 中
146 |
147 | 而且 `libtriton.so` 已经加到 `_c` 中了
148 |
149 | ```bash
150 | $ ll python/triton/_C/
151 | include/ libtriton.so
152 | ```
153 |
154 | 再加个环境变量
155 |
156 | ```bash
157 | export TRITON_HOME=/lustre/S/ruantingfeng/triton/triton_repo
158 | export PYTHONPATH=$TRITON_HOME/python:${PYTHONPATH}
159 | ```
160 |
161 | 测试一下,没啥问题就可以运行 `python/tutorials` 中的测试(跑了一下03-matrix-multiplication.py,看起来暂时干不过cuBLAS)
162 |
163 | ```bash
164 | $ python
165 | Python 3.10.14 (main, Mar 21 2024, 16:24:04) [GCC 11.2.0] on linux
166 | Type "help", "copyright", "credits" or "license" for more information.
167 | >>> import triton
168 | >>> triton.__version__
169 | '3.0.0'
170 | >>>
171 | ```
--------------------------------------------------------------------------------
/ai_compiler/XLA/img_xla2hlo/Screen%2BShot%2B2017-02-27%2Bat%2B9.54.12%2BAM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/Screen%2BShot%2B2017-02-27%2Bat%2B9.54.12%2BAM.png
--------------------------------------------------------------------------------
/ai_compiler/XLA/img_xla2hlo/how-does-xla-work.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/how-does-xla-work.png
--------------------------------------------------------------------------------
/ai_compiler/XLA/img_xla2hlo/v2-16e964ead53e7c71c0cc4dff6ed11851_b.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/v2-16e964ead53e7c71c0cc4dff6ed11851_b.jpg
--------------------------------------------------------------------------------
/ai_compiler/XLA/img_xla2hlo/v2-84cd6a3244ebcd2f210626887a09c33f_b.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/v2-84cd6a3244ebcd2f210626887a09c33f_b.jpg
--------------------------------------------------------------------------------
/ai_compiler/XLA/img_xla2hlo/v2-ae9fc9f5aeb969d0c25940cd9f8f24c3_b.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/v2-ae9fc9f5aeb969d0c25940cd9f8f24c3_b.jpg
--------------------------------------------------------------------------------
/ai_compiler/XLA/img_xla2hlo/截屏2023-02-14 17.54.11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/截屏2023-02-14 17.54.11.png
--------------------------------------------------------------------------------
/ai_compiler/XLA/xla2hlo.md:
--------------------------------------------------------------------------------
1 | ## xla
2 |
3 | [XLA 源码深入解读](https://zhuanlan.zhihu.com/p/427444916)
4 |
5 | XLA(加速线性代数)是一种针对特定领域的线性代数编译器,能够加快 TensorFlow 模型的运行速度。
6 |
7 | > XLA good at
8 | >
9 | > (1) transforming code back and forth between the scalar and the vector worlds
10 | >
11 | > (2) passing function boundaries for handling both host and device code
12 | >
13 | > (3) complying to stringent requirements imposed by energy-efficient xPUs
14 |
15 | 更一般来说,XLA 可以获取 TensorFlow 操作的整个子图,并将它们融合到需要最少内核启动次数的高效循环中。获得的融合内核可以利用模型专属信息进行优化。(算子融合、中间值传输)
16 |
17 |
18 |
19 |
20 |
21 | ```python
22 | def model_fn(x, y, z):
23 | return tf.reduce_sum(x + y * z)
24 | ```
25 |
26 | 例如上述代码中,原始的tf会启动三个内核:分别对应于乘法、加法和减法运算。但XLA能将加法、乘法和减法“融合”到一个 GPU 内核中,以只使用一个内核就可以完成计算。融合操作不会将由 `y*z` 和 `x+y*z` 生成的中间值写出到内存中;而是直接将这些中间计算的结果“流式传输”给用户,同时将它们完全保留在 GPU 寄存器中。
27 |
28 |
29 |
30 | XLA 接受在 HLO 中定义的计算图(“计算”)并将其编译为适用于各种架构的机器指令。
31 |
32 | XLA对输入的HLO计算图进行**与目标设备无关的优化,如CSE(公共子表达式消除),算子融合,运行时内存分配分析**。输出为优化后的HLO计算图 HLO IR。
33 | 然后,将HLO计算图发送到后端(Backend),后端结合特定的硬件属性对HLO计算图进行进一步的HLO级优化,例如将某些操作或其组合进行模式匹配从而优化计算库调用。最后,后端将HLO IR转化为LLVM IR,LLVM再进行低级优化并生成机器码。
34 |
35 | > XLA IR在优化中,会将一些具名算子节点(BatchNormalization)直接替换为包含计算细节(+-*/),同时插入一些相关的add、multiply和maximum等节点;或者将另外的具名算子(Conv)替换为cuDNN API,并且插入相应的call、reshape等节点。接下来,会做一些fusion和dse等优化操作。
36 |
37 | 
38 |
39 |
40 |
41 | tf2lxa:graph compile -> hlo graph build -> hlo pass pipeline -> hlo dataflow analysis -> codegen
42 |
43 |
44 |
45 | ## HLO
46 |
47 |
48 |
49 | ### 层次划分
50 |
51 | HLO IR可以分成三个层次,HloModule, HloComputation和HloInstruction。
52 |
53 | 
54 |
55 | - HloModule
56 |
57 | 一个编译单元,相当于一个完整的**可执行程序**,所以有入口函数,也就是 entry_computation, 有且仅有一个。输入可以是多个参数,但输出只有一个(root instruction的值),**如果要返回多个值,需要把多个值构造成一个元组(tuple)返回**。一个module可以包含多个computation,除了entry_computation,其他的都是"nested",也就是被调用。
58 |
59 | ENTRY ----------------> 程序入口,ROOT--------------> 程序输出
60 |
61 | 一个HloModule可以包含很多个HloComputation
62 |
63 | - HloComputation
64 |
65 | 是HLO IR中间层的表示,相当于程序中的一个**函数**。一个HloModule只能有一个entry_conputation,其他的computation是被entry_computation调用的。我们可以把entry_computation类比作main函数。每个HloComputation可以包含多个HloInstruction,但只能有一个root_instruction,root_instruction的output就是该computation的output。
66 |
67 | - HloInstruction
68 |
69 | HLO IR最底层的表示,相当于程序中的一条指令,一个HloInstruction可包含多个算子。computation的input用parameter表示。HloInstruction也可以调用HloComputation。一个HloInstruction只有一个输出,如果需要多个output,就打包成一个Tuple。
70 |
71 | data dependency:如果一个instruction的output是另一个instruction的input,我们就说两个instruction之间存在data dependency。HLO中使用operand和uses两个fields来表示data dependency。
72 |
73 | ```cpp
74 | class HloInstruction {
75 | ...
76 | InstructionVector operands_;
77 | std::vector users_;
78 | absl::flat_hash_map user_map_;
79 | ...
80 | };
81 | ```
82 |
83 | 
84 |
85 | control dependency:有些instruction之间并没有数据依赖,但是我们仍然可能对这些instruction的执行顺序有额外的要求。在HloInstruction中有control_successors\_和control_predecessors_两个fields来表示control dependency。
86 |
87 |
88 |
89 | ### 多种HLO
90 |
91 | https://github.com/tensorflow/mlir-hlo
92 |
93 | - `DHLO`:Dynamic HLO
94 |
95 | 在XLA的HLO IR基础上,扩展了一套具有完备动态shape表达能力的IR。静态场景下,HLO IR中的shape表达会被静态化,所有的shape计算会被固化为编译时常量保留在编译结果中;而在动态shape场景下,IR本身需要有足够的能力表达shape计算和动态shape信息的传递。
96 |
97 | - `CHLO`:"Client" HLO
98 |
99 | 最初设计用于映射XLA的client APi,更接近前端。其中的ops可以来自XlaBuilder或者XLA helper functions,支持隐式广播、支持动态型型状。设计初衷是为了贴近client级别,方便渐进细化下降。
100 |
101 | > 隐式广播指的是那些**没有具体指定发送给哪个应用程序的广播**
102 |
103 | | CHLO | |
104 | | :--- | ------------------------------------------------------------ |
105 | | 入口 | 通过XlaBuilder API,TF2XLA kernels、JAX、PyTorch bridge直接使用这些API;
legalization from TensorFlow ops in the TF Graph Compiler |
106 | | 出口 | MHLO;
调用XlaBuilder API导出到 xla::HloInstructionProto |
107 |
108 | - `MHLO`: "Meta"-HLO
109 |
110 | https://tensorflow.google.cn/mlir/hlo_ops
111 |
112 | 和xla_hlo相近,没有隐式广播,但支持动态形状
113 |
114 | 隐式捕获控制流ops,便于优化;输出结果多个,不用组合成一个元组;拥有不能加在client dialect或server dialect的ops;verification发生在boundary;更彻底地支持动态形状,无需更新所有users/backends
115 |
116 | 大量得映射在 linalg named ops
117 |
118 | | MHLO | |
119 | | :--- | :----------------------------------------------------------- |
120 | | 入口 | Legalization from CHLO dialect or conversion from XLA HLO;
直接从TF Graph Compiler获得 |
121 | | 出口 | LMHLO;
Linalg IREE |
122 |
123 | - `LMHLO`: "Late"-"Meta"-HLO
124 |
125 | 与 MHLO 相同,但在 buffers (e.g., memref) 而不是 tensors 上,在缓冲区分配后作用
126 |
127 | **LMHLO是为了帮助迁移 XLA 后端而临时引入的**,在这个级别上建模的大部分内容已经可以表示出来了`linalg`。在 LHLO 上执行转换的通道应该理想地遵守 Linalg 上存在的严格接口并且足够通用。
128 |
129 | > tensor values (immutable) 和in-memory buffers (side-effecting)
130 |
131 | | LMHLO | |
132 | | :---- | :-------------------- |
133 | | 入口 | 缓冲区分配后从XLA转入 |
134 | | 出口 | Codegen(LLVM IR) |
135 |
136 | > [HLO to LHLO conversion and fusion #41424](https://github.com/tensorflow/tensorflow/issues/41424)
137 |
138 |
139 |
140 | 
141 |
--------------------------------------------------------------------------------
/ai_compiler/ai_compiler_commom/Graph_Partition.md:
--------------------------------------------------------------------------------
1 | # 切图算法
2 |
3 | 在 [Auto-Parallelism](../../paper_read/Auto-parallelism%20summary/Auto_Parallelism.md) 文件中讲到了ai compiler可能涉及的两种切图算法:
4 |
5 | (1)为了设备分布式执行切图,将计算图(静态图)切为子图, 为了在内存受限的独立设备上执行
6 |
7 | 例如:FlexFlow、Alpa
8 |
9 | (2)为了launch kernel切图(算子融合),子图对于硬件kernel可能过大,无法融合为1个kernel,为了便于在auto-scheduler时直接融(tile)为一个kernel,故将将子图切为适应硬件kernel大小的片段
10 |
11 | 当时在 [Auto-Parallelism](../../paper_read/Auto-parallelism%20summary/Auto_Parallelism.md) 中分析了第一类切图算法,本文就学习一下第二类——为了launch kernel切图(算子融合)
12 |
13 | ## 相关工作
14 |
15 | 为了launch kernel切图也经常以算子融合的形式实现,很多ai compiler都实现了各自的切图算法
16 |
17 | ### 手写pattern
18 |
19 | 手写一些融合的pattern,例如:`conv2d + relu`,`conv2d + bn + relu`,`conv2d + bn + relu + pool`等,常见让计算密集型和访存密集型算子融合,减小整体执行开销。针对特定场景有较好的收益,但泛化性较差(总不能手动枚举所有优化可能吧,总得编译时间和性能trade-off)
20 |
21 | > 推荐两篇讲访存和计算的知乎文章:https://zhuanlan.zhihu.com/p/600489819, https://zhuanlan.zhihu.com/p/688613416
22 |
23 | ### XLA
24 |
25 | XLA的op fusion是在HLO IR层做的,相关代码见[fusion_pipeline.cc](https://github.com/openxla/xla/blob/main/xla/service/gpu/fusion_pipeline.cc)。
26 |
27 | - PriorityFusion: 实现了FusionQueue,queue中都是producer。根据cost model来fuse instructions,并且通过动态地更新优先级(benefit最大)来选择下一个fusion对象。
28 |
29 | - InstructionFusion: 纵向fusion,将从producer到consumer的所有指令融合为一个kernel。
30 |
31 | - FusionMerger: 当合并结果不会增加字节传输,将融合后的指令进行合并。
32 |
33 | - MultiOutputFusion: 横向融合的一种,将多个output的指令融合为一个kernel。
34 |
35 | - HorizontalLoopFusion: 将多个fusion op横向融合在一起launch
36 |
37 | ### TVM
38 |
39 | TVM的算子融合是在relay层做的,相关代码见[graph_partitioner.cc](https://github.com/apache/tvm/blob/main/src/relay/analysis/graph_partitioner.cc)和[fuse_op.cc](https://github.com/apache/tvm/blob/main/src/relay/transforms/fuse_ops.cc)
40 |
41 | 其将算子分为7类:ElemWise,Broadcast,Injective,CommReduce,OutEWiseFusable,Tuple,Opaque,不同的算子间有不同的融合rule
42 |
43 | 主体实现代码:
44 |
45 | ```cpp
46 | std::vector GraphPartitioner::Partition(
47 | const IndexedForwardGraph& graph) {
48 | // 每个节点初始化为一个group
49 | this->InitGroups(graph);
50 | if (opt_level_ == 0) return std::move(groups_);
51 | // get post dominator tree
52 | auto post_dom_tree = DominatorTree::PostDom(arena_, graph);
53 | // run fusion algorithm.
54 | for (int phase = 0; phase < 3; ++phase) {
55 | this->RunFuse(graph, post_dom_tree, phase);
56 | }
57 | return std::move(groups_);
58 | }
59 | ```
60 |
61 | 个人想法:ansor生成Sketch已经是在图划分后,而划图就已经考虑了op的融合行为,所以给定的rule中会有融合的rule,会尽量尝试融合
62 |
63 | 
64 |
65 |
--------------------------------------------------------------------------------
/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/MS框架.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/MS框架.png
--------------------------------------------------------------------------------
/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/hlo优化.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/hlo优化.png
--------------------------------------------------------------------------------
/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/发展.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/发展.png
--------------------------------------------------------------------------------
/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/后端优化.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/后端优化.png
--------------------------------------------------------------------------------
/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/技术框架.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/技术框架.png
--------------------------------------------------------------------------------
/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/结构.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/结构.png
--------------------------------------------------------------------------------
/ai_compiler/ai_compiler_commom/img_Graph_Partition/ansor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_Graph_Partition/ansor.png
--------------------------------------------------------------------------------
/basic/Architecture/Architecture.md:
--------------------------------------------------------------------------------
1 | # Architecture
2 |
3 | 传统nn网络对硬件的需求:weight以及中间结果,通过存算一体搞一个weight station,再分析数据流以减少层之间的中间结果,就可以靠dsa叠算力将吞吐拉到极大
4 |
5 | 但是llm推理本质上是多次inference,通过一次input给出多个token,每次inference都将当前获得的token和input拼接再输入来预测下一个token。多次inference的过程是串行的,所以需要保存大量上下文信息。新一轮的inference的某些计算结果都可以复用上一轮的某些结果,这便是kv-cache技术。 但kv-cache对芯片显存的要求可大可小,和请求数、上下文长度相关。
6 |
7 | llm专用芯片的瓶颈在于显存容量和带宽,kv-cache是热数据,每次inference生成token都要读一遍,如果kv-cache高达几百GB,那么想实现100token/s那就是几十TB/s的带宽需求。提高并发度一定程度可以加大对权重读取带宽的复用,但是并发度越高,kv-cache容量需求越大,容量又会bound。
--------------------------------------------------------------------------------
/basic/CMake/cmake.md:
--------------------------------------------------------------------------------
1 | # cmake 简介
2 |
3 | 存在多种make工具(不同平台、不同应用),cmake被设计出来实现”write once, run everywhere”
4 |
5 | 基础流程:
6 |
7 | 1. 写 CMake 的配置文件 `CMakeLists.txt`
8 |
9 | 2. 执行命令
10 |
11 | ```
12 | cmake PATH
13 | ```
14 |
15 | 或
16 |
17 | ```
18 | ccmake PATH
19 | ```
20 |
21 | 生成
22 |
23 | ```
24 | Makefile
25 | ```
26 |
27 | 1. PATH 是 CMakeLists.txt 所在的目录
28 | 2. ccmake 比 cmake 多交互式页面
29 |
30 | 3. 使用 `make` 命令进行编译
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 1.jpeg
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 1.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 10.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 11.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 12.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 13.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 14.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 15.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 2.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 3.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 4.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 5.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 6.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 7.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 8.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled 9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 9.png
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled.jpeg
--------------------------------------------------------------------------------
/basic/DataReuse/img_data_reuse/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.04.41.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.04.41.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.08.11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.08.11.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.09.48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.09.48.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.10.47.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.10.47.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-04_12.39.32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-04_12.39.32.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-04_15.15.51.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-04_15.15.51.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/IMG_8232.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/IMG_8232.jpg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 1.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 1.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 10.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 10.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 11.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 11.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 12.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 12.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 13.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 13.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 14.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 14.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 15.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 15.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 16.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 16.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 17.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 17.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 18.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 18.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 19.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 19.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 2.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 2.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 20.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 20.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 3.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 3.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 4.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 4.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 4.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 5.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 5.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 5.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 6.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 6.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 6.png
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 7.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 7.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 8.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 8.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled 9.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 9.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled.jpeg
--------------------------------------------------------------------------------
/basic/GPU/img_GPU架构/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 1.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 10.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 11.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 12.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 13.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 14.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 15.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 16.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 17.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 18.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 19.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 2.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 20.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 21.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 22.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 23.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 24.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 24.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 25.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 25.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 26.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 26.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 27.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 27.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 28.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 28.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 29.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 29.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 3.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 30.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 31.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 31.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 4.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 5.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 6.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 7.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 8.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 9.png
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled.jpeg
--------------------------------------------------------------------------------
/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled.png
--------------------------------------------------------------------------------
/basic/PyTorch/PyTorch2.0.md:
--------------------------------------------------------------------------------
1 | # PyTorch2.0
2 |
3 | DL framework三种运行加速
4 |
5 | 
6 |
7 | Torch Compiler分3步
8 |
9 | 1. Graph Acquisition: Dynamo (forward) + AOTAutograd (backward)
10 | 2. Graph Lowering: ATen / Prim IR
11 | 3. Graph Compilation: TorchInductor
12 |
13 | 
14 |
15 | TorchInductor分为四层,三大核心技术
16 |
17 | 1. 核心 IR 是 loop level IR,是 python callable。做 codegen 或者 analysis,只需要直接 execute。
18 | 2. 用 SymPy(一个符号计算库)支持动态 shape。
19 | 3. 在 CPU 上用的 OpenMP,跟 intel 一起搞的。GPU 选了 OpenAI Triton。
20 |
21 | AOTInductor = torch.export (whole graph capture) + Inductor (AOT compilation)
22 |
23 | 
24 |
25 | Dynamo 的 2 个精髓:
26 |
27 | 1. partial graph capture。遇到不支持的,就保留并拆分出前后的子图,分别编译。
28 | 2. guard。解决了 trace 的经典难题 -- control flow 等导致 capture 不能用。guard 会自动报警,不会出错。
29 |
30 | 三种模式
31 |
32 | 
--------------------------------------------------------------------------------
/basic/PyTorch/img_PyTorch2.0/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PyTorch/img_PyTorch2.0/Untitled 1.png
--------------------------------------------------------------------------------
/basic/PyTorch/img_PyTorch2.0/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PyTorch/img_PyTorch2.0/Untitled 2.png
--------------------------------------------------------------------------------
/basic/PyTorch/img_PyTorch2.0/Untitled 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PyTorch/img_PyTorch2.0/Untitled 3.png
--------------------------------------------------------------------------------
/basic/PyTorch/img_PyTorch2.0/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PyTorch/img_PyTorch2.0/Untitled.png
--------------------------------------------------------------------------------
/coding/img_coding_note/computational_complexity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/coding/img_coding_note/computational_complexity.png
--------------------------------------------------------------------------------
/paper_read/Astitch/AStitch.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/AStitch.pptx
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128113128495.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128113128495.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128113426967.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128113426967.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128115126772.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128115126772.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128125650543.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128125650543.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128131909434.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128131909434.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128135909877.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128135909877.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128141107379.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128141107379.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128143319632.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128143319632.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128145444483.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128145444483.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128153319148.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128153319148.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128185111129.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128185111129.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221128224708418.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128224708418.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221129103644172.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221129103644172.png
--------------------------------------------------------------------------------
/paper_read/Astitch/img_Astitch/image-20221129104739007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221129104739007.png
--------------------------------------------------------------------------------
/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled 1.png
--------------------------------------------------------------------------------
/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled 2.png
--------------------------------------------------------------------------------
/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled.png
--------------------------------------------------------------------------------
/paper_read/Astra、Rammer、Roller/短记_Astra_Rammer_Roller.md:
--------------------------------------------------------------------------------
1 | # Astra、Rammer、Roller
2 |
3 | ## Astra
4 |
5 | DNN任务的特点:重复性(数据重复、指令重复)和可预测性(一般不会有复杂的控制流)
6 |
7 | Astra利用运行时对优化结果进行评估,其优化对象为整个模型程序(需要优化的就是全局程序的运行情况,并不单独优化某些参数或者特征)
8 |
9 | 在运行时运行多个版本编译的程序,通过运行时间筛选运行最快的编译版本
10 |
11 | 三个用于管理优化空间的技术:
12 |
13 | - 配置粗粒度的静态信息(*在某些维度上使用先验知识约束枚举器的可选项,排除无效选项*)
14 | - 在细粒度层面,并行的评估多个版本的编译。Astra框架同时对多个独立的参数进行评估。
15 | - 以初始的测验结果作为信号来智能的修剪动态状态空间。
16 |
17 | 可以进行的优化空间:
18 |
19 | - 内核内参数优化,如线程块大小、tiling数据大小和共享内存大小等参数。
20 | - 将多个内核函数融合,并指出内核融合需要测验,举例融合可能会带来性能下降。
21 | - 使用多个流
22 | - 其他的优化:
23 | - 全图优化,如内存规划
24 | - 分布式或GPU训练
25 |
26 | 优化器 = 枚举器 + custom-wirer(捕获器)
27 |
28 | 编译器用来枚举相关优化的状态空间,并使用静态信息对状态空间进行修剪
29 |
30 | 运行时对被可捕获的优化选项进行排序,从而选出最优的优化集合
31 |
32 | 自适应变量被组成一颗更新树,更新树有几种被探索的模式:
33 |
34 | - 并行模式:所有子节点都相互独立时
35 | - 详尽模式:所有子树需要被暴力的遍历,时间成本是指数级的
36 | - 前缀模式:需要按顺序分层级遍历子代。当上一个子代遍历完其最佳结果确定,开始下一子代的遍历
37 |
38 | 修剪探索状态空间的方式:
39 |
40 | - 并行探索:选择维度独立,并行每个组进行探索
41 | - 层次探索:探索空间引入一个内存分配策略,在探索完分配之后,再为每个分配构建最佳配置。
42 | - 屏障探索:不同流中调度内核的性能受到之前内核调度历史的影响。Astra引入屏障探索的概念,设立超时期,在超时期的边界对所有流进行强制屏障同步,并行的同时对多个超时期进行探索。可以分摊跨流屏障同步的成本。
43 | - 前缀探索:为了在超时期进一步控制状态空间,将超时期基于依赖关系进一步划分成不同的时期。时期中的操作可以进行跨流的调度。
44 | - 等价探索
45 |
46 | ## **Rammer**
47 |
48 | 通过算子**之间和内部**的协同,为DNN生成有效的**静态时空调度**
49 |
50 | 算子间并行 && 算子内并行:通过降低算子内部并行,从而提高算子间的并行
51 |
52 | 单个算子派发将贪婪的使用硬件资源,阻止了其他可并发的算子并行使用硬件资源
53 |
54 | 一个算子(rOperator)=多个rTask(调度的最小单元,加速器单个执行单元的运行对象)
55 |
56 | 以rOperator构成的数据流图依然保持着数据间的依赖关系,但是**以rTask为粒度的调度**将算子内部的并行信息暴露给编译器
57 |
58 | 每个rOperator有多个版本的rKernel的具体实现,每个rKernel有不同的tiling策略
59 |
60 | 虚拟化的并行设备(vDevice) 包含多个虚拟化的执行单元(vEU)
61 |
62 | rTask被分派到vEU上进行执行,每个vEU可以独立的执行rTask
63 |
64 | 为了保证具有依赖的算子的顺序执行,定义了屏障rTask
65 |
66 | Rammer在vDevice上将DFG组成成rProgram,rProgram被表示成一个prog[EU_id][order]的二维数组
67 |
68 | EU_id表示执行任务的单元,order表示EU设备上执行的顺序
69 |
70 | Rammer将调度决策从运行时迁移到编译期间(AOT)
71 |
72 | 编译过程中,Rammer会提供信息:rTask在vEU上独立执行时间、rTask的资源占用情况,如本地内存或寄存器、rProgram的总体执行时间
73 |
74 | 调度策略如下:
75 |
76 |
77 |
78 | 算子间调度的开销:内核启动、上下文初始化、主机和设备之间的通信
79 |
80 | 使用operator fusion技术减少算子间调度开销
81 |
82 | Rammer将输入格式统一转换成由rOperator表达的DFG图,在DFG图上做一些常见的图优化转换:常量折叠、公共子表达式消除、基于模式的算子融合
83 |
84 | 对于每个优化后的rOpertaor,Rammer在不同渠道加载一个或多个版本的rKernel实现:其他框架的转换器、手调内核函数、核函数代码生成器
85 |
86 | 后续,DFG划分成多个子图,并为每个子图编译出对应的rProgram,每个rProgram进一步转换为加速器上运行的设备代码
87 |
88 | 在NVIDIA GPU中,rTask会被分配到SM上,将其作为一个线程块实现。为了绕过CUDA内置的硬件调度器,Rammer采用持久线程块(PTB)来在硬件中实现vEU的映射
89 |
90 | - 抽象rTask,暴露算子内部的并行性
91 | - 抽线vEU,暴露硬件内部的调度能力
92 | - 利用DNN计算的可预测性,将运行时调度问题转换为编译器rTask执行优化
93 |
94 | ## Roller
95 |
96 | 高效生成内核,基于构造的方法
97 |
98 | rTile是Roller的核心,其**封装了与硬件加速器关键特性对齐的张量形状**
99 |
100 | 对tile进行抽象,从而通过对形状选择进行限制,以此实现高效的探索
101 |
102 | Roller基于rTile生成最终的rProgram
103 |
104 | rProgram的性能可以通过微性能模型进行评估
105 |
106 | Roller使用一种*scale-up-then-scale-out* 的方法:先执行放大过程,采用基于递归的rTile构造算法,逐步增大rTile的大小,构造硬件加速器的单个执行单元的饱和运行
107 |
108 | Roller系统结构
109 |
110 |
111 |
112 | 从张量表达式中提取张量的形状,并利用硬件的相关规范来构造rTiles
113 |
114 | 在rTiles的基础上使用*scale-upthen-scale-out*的递归构造算法生成rProgram
115 |
116 | 构造算法通过评估rProgram的性能来识别rTile合适的配置
117 |
118 | rTile必须和给定张量表达式中的张量形状和底层硬件特性对齐
119 |
120 | 
121 |
122 | 给定一个数据处理管道,其rProgram的优化目标是最大化管道的吞吐量,该目标可以转换为:
123 |
124 | - 计算和数据移动需要充分利用硬件特性
125 | - 吞吐量需要达到性能瓶颈
126 | - 需要足够的并行以充分利用并行执行单元
127 |
128 | Roller首先使用scale up以满足前两点,之后利用scale out利用多核并行性
129 |
130 | - scale up:Roller从内存结构由顶至下构建不同层次的rTile尺寸,Roller首先进行rTile的初始化,之后迭代扩大rTile的大小,最大程度提升数据重用得分。
131 | - scale out:Roller在最底层的内存尺寸上构造单一单元执行的rProgram,并通过将每层的rTile平均分配来将rProgram复制到其他单元。一般会在reduce的轴上分配单元
132 |
133 | Roller尝试沿着具有最小数据重用得分的轴上收缩rTiles以实现足够的并行度
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-01-21_19.09.47.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-01-21_19.09.47.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-01-21_19.10.11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-01-21_19.10.11.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.04.41.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.04.41.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.13.41.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.13.41.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.14.05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.14.05.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.20.42.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.20.42.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_20.17.01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_20.17.01.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_22.57.54.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_22.57.54.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_23.31.10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_23.31.10.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 1.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 10.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 11.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 13.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 14.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 2.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 3.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 4.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 5.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 6.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 7.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 8.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled 9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 9.png
--------------------------------------------------------------------------------
/paper_read/Attention/img_Attention/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled.png
--------------------------------------------------------------------------------
/paper_read/Attention/test_attention/attention.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from torch import nn
4 | class ScaledDotProductAttension(nn.Module):
5 | """ Scaled Dot-Product Attention """
6 | def __init__(self, scale):
7 | super().__init__() #声明父类的Init方法
8 | self.scale = scale
9 | self.softmax = nn.Softmax(dim = 2) #沿哪一维实施softmax
10 | def forward(self, q, k, v, mask=None):
11 | #TORCH.BMM 执行batch内两矩阵乘积运算:bmm(b*n*m, b*m*p) -> size(b*n*p)
12 | #TORCH.BMM 输入必须是3-dim tensor
13 | # 1.score = q \cdot k ,使用query和k点乘(matmul)获得相关度
14 | u = torch.bmm(q, k.transpose(1, 2))
15 | # 2.缩放
16 | u = u / self.scale
17 | # 3.mask(opt)
18 | if mask is not None:
19 | u = u.masked_fill(mask, -np.inf)
20 | # 4.softmax
21 | attn = self.softmax(u)
22 | # 每个key都是等权
23 | output = torch.bmm(attn, v)
24 | return attn, output
25 |
26 | class MultiHeadAttention(nn.Module):
27 | """ Multi-Head Attention """
28 | def __init__(self, n_head, d_k_, d_v_, d_k, d_v, d_o):
29 | super().__init__()
30 | self.n_head = n_head
31 | self.d_k = d_k
32 | self.d_v = d_v
33 |
34 | # 用于投影变换mlp
35 | self.fc_q = nn.Linear(d_k_, n_head * d_k)
36 | self.fc_k = nn.Linear(d_k_, n_head * d_k)
37 | self.fc_v = nn.Linear(d_v_, n_head * d_v)
38 |
39 | self.attention = ScaledDotProductAttension(scale=np.power(d_k, 0.5))
40 | self.fc_concatOutput = nn.Linear(n_head * d_v, d_o) # concat -> mlp -> output
41 | def forward(self, q, k, v, mask = None):
42 | n_head, d_q, d_k, d_v = self.n_head, self.d_k, self.d_k, self.d_v
43 | batch, n_q, d_q_ = q.size()
44 | batch, n_k, d_k_ = k.size()
45 | batch, n_v, d_v_ = v.size()
46 |
47 | #投影变化,单头变多头
48 | q = self.fc_q(q)
49 | k = self.fc_k(k)
50 | v = self.fc_v(v)
51 |
52 | # view method1: (batch, n_q, n_head * d_q) -> (batch, n_q, n_head, d_q)
53 | # permute method: 将tensor维度重排列为 (n_head, batch, n_q, d_q)
54 | # contiguous method: 确保张量在内存中是连续存储的
55 | # view method2: (n_head, batch, n_q, d_q) -> (n_head * batch, n_q, d_q)
56 | q = q.view(batch, n_q, n_head, d_q).permute(2, 0, 1, 3).contiguous().view(-1, n_q, d_q)
57 | k = k.view(batch, n_k, n_head, d_k).permute(2, 0, 1, 3).contiguous().view(-1, n_k, d_k)
58 | v = v.view(batch, n_v, n_head, d_v).permute(2, 0, 1, 3).contiguous().view(-1, n_v, d_v)
59 |
60 | if mask is not None:
61 | # repeat(n_head, 1, 1): 将mask沿第0维复制 n_head次,其他维度不变
62 | mask = mask.repeat(n_head, 1, 1)
63 | attn, output = self.attention(q, k, v, mask=mask) # 当成单头注意力求输出
64 |
65 | output = output.view(n_head, batch, n_q, d_v).permute(1, 2, 0, 3).contiguous().view(batch, n_q, -1) # Concat
66 | output = self.fc_concatOutput(output) # 投影变换得到最终输出
67 | return attn, output
68 |
69 | class SelfAttention(nn.Module):
70 | """ Self-Attention """
71 | def __init__(self, n_head, d_k, d_v, d_x, d_o):
72 | super().__init__()
73 | self.wq = nn.Parameter(torch.Tensor(d_x, d_k))
74 | self.wk = nn.Parameter(torch.Tensor(d_x, d_k))
75 | self.wv = nn.Parameter(torch.Tensor(d_x, d_v))
76 |
77 | self.mha = MultiHeadAttention(n_head=n_head, d_k_=d_k, d_v_=d_v, d_k=d_k, d_v=d_v, d_o=d_o)
78 | self.init_parameters()
79 |
80 | def init_parameters(self):
81 | for param in self.parameters():
82 | stdv = 1. / np.power(param.size(-1), 0.5)
83 | param.data.uniform_(-stdv, stdv)
84 |
85 | def forward(self, x, mask = None):
86 | q = torch.matmul(x, self.wq)
87 | k = torch.matmul(x, self.wk)
88 | v = torch.matmul(x, self.wv)
89 |
90 | attn, output = self.mha(q, k, v, mask=mask)
91 | return attn, output
92 |
93 | if __name__ == "__main__":
94 | # n_q ?= n_k == n_v
95 | n_q, n_k, n_v = 2, 4, 4
96 | # d_q_ == d_k_ ?= d_v_
97 | d_q_, d_k_, d_v_ = 128, 128, 64
98 | batch = 32
99 |
100 | q = torch.randn(batch, n_q, d_q_) # batch, 个数 : n_q, 维度 d_q_
101 | k = torch.randn(batch, n_k, d_k_)
102 | v = torch.randn(batch, n_v, d_v_)
103 | mask = torch.zeros(batch, n_q, n_k).bool()
104 | print(mask)
105 |
106 | attension = ScaledDotProductAttension(scale=np.power(d_k_, 0.5)) # 实例化
107 | attn, output = attension(q, k, v, mask = mask) # 调用函数
108 |
109 | # mha = MultiHeadAttention(n_head=8, d_k_=128, d_v_=64, d_k=256, d_v=128, d_o=128)
110 | # attn, output = mha(q, k, v, mask=mask)
111 |
112 | # n_x, d_x为自注意力的item数量、及对应维度
113 | # 矩阵相乘,分别得到变换后的,q,k,v。再使用多头注意力机制就可以
114 | # n_x = 4
115 | # d_x = 80
116 | # x = torch.randn(batch, n_x, d_x)
117 | # mask = torch.zeros(batch, n_x, n_x).bool()
118 | # selfattn = SelfAttention(n_head=8, d_k=128, d_v=64, d_x=80, d_o=80)
119 | # attn, output = selfattn(x, mask=mask)
120 |
121 | # print(attn); print(output)
122 | # print(attn.size()); print(output.size())
123 |
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/auto-parallelism.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/auto-parallelism.pptx
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_11.35.50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_11.35.50.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_11.36.20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_11.36.20.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_14.21.53.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_14.21.53.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_15.06.46.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_15.06.46.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_21.22.37.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_21.22.37.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_21.23.07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_21.23.07.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-02_22.59.26.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-02_22.59.26.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-02_23.01.41.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-02_23.01.41.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_15.04.36.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_15.04.36.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_16.29.54.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_16.29.54.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_17.39.56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_17.39.56.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.18.39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.18.39.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.24.00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.24.00.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.27.19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.27.19.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.42.48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.42.48.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.46.39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.46.39.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.10.38.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.10.38.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.21.48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.21.48.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.52.54.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.52.54.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.15.44.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.15.44.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.29.39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.29.39.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.35.43.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.35.43.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.38.09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.38.09.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.39.49.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.39.49.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 1.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 10.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 11.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 12.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 13.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 14.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 15.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 16.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 17.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 18.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 19.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 2.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 20.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 21.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 3.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 4.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 5.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 6.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 7.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 8.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 9.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled.png
--------------------------------------------------------------------------------
/paper_read/Auto-parallelism summary/img_auto_parallelism/mlsysdistribute_system.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/mlsysdistribute_system.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/Composable and Modular Code Generation in MLIR.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/Composable and Modular Code Generation in MLIR.pptx
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/MLIRtoLLVM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/MLIRtoLLVM.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/MLIRtoLLVMandIntrinsics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/MLIRtoLLVMandIntrinsics.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/classical优化.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/classical优化.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/image-213.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/image-213.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/inplace-op.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/inplace-op.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/relevant dialects1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/relevant dialects1.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/relevant dialects2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/relevant dialects2.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/silos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/silos.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/special优化.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/special优化.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step1.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step2.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step3.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation1.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation2.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation3.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation4.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/v2-5b69d56e33512deeb65eda364c343859_1440w.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/v2-5b69d56e33512deeb65eda364c343859_1440w.webp
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/x1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/x1.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-07 21.42.13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-07 21.42.13.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-10 16.26.12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-10 16.26.12.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-13 14.54.32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-13 14.54.32.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-13 22.23.23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-13 22.23.23.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-14 16.52.45.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-14 16.52.45.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-15 11.47.39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-15 11.47.39.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-16 14.16.10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-16 14.16.10.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-21 18.42.21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-21 18.42.21.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-21 18.54.30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-21 18.54.30.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.47.49.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.47.49.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.56.53.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.56.53.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.57.09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.57.09.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.57.25.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.57.25.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.59.03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.59.03.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 18.00.04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 18.00.04.png
--------------------------------------------------------------------------------
/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 18.00.43.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 18.00.43.png
--------------------------------------------------------------------------------
/paper_read/DISC/DISC.md:
--------------------------------------------------------------------------------
1 | # DISC
2 |
3 | > DISC:A Dynamic Shape Compiler for Machine Learning Workloads
4 | >
5 | >
6 | > 看hlo相关内容
7 | >
8 |
9 | 端到端的动态形状编译器项目BladeDISC
10 |
11 | ## 1、背景
12 |
13 | - 动态shape特性
14 |
15 | 输入tensor shape变化(批次)、运行过程中(某些算子导致)shape变化
16 |
17 | - 当前框架缺乏对动态shape的支持
18 |
19 | 静态shape在编译时完全已知shape信息,方便优化
20 |
21 | 动态shape需要为每个shape编译,从而会增加编译开销、内存使用、优化和部署的复杂性
22 |
23 | 但 某些任务需要根据结果进行动态调度 或者 某任务的输入过大导致编排时间过长
24 |
25 | - 当前对动态shape的优化
26 |
27 | 区分动态shape和静态shape的算子后,仅对静态shape算子进行优化
28 |
29 | 通过padding、slicing的方式对动态shape张量处理至固定已知shape
30 |
31 | ## 2、问题与解决
32 |
33 | - 问题一:现有IR缺少对动态shape计算的完整表示
34 |
35 | DISC基于MLIR,对XLA编译器中的HLO的IR进行复用扩充为DHLO,以支持动态shape表示
36 |
37 | - 问题二:在运行时利用VM解释生成运行时流,带来解释开销且损失主机-设备协同优化机会
38 |
39 | DISC在编译时生成运行时流的代码,将主机端逻辑和设备计算一起编译
40 |
41 | - 问题三:由于动态shape而无法进行内核融合等优化
42 |
43 | DISC在基于算子间的shape传播特性和编译时捕获的shape约束信息,进行内核融合优化
44 |
45 | - 问题四:深度学习编译器的灵活性
46 |
47 | DISC支持TensorFlow和PyTorch,且同时支持动态shape和静态shape混合情况
48 |
49 | ## 3、架构
50 |
51 |
52 |
53 | - **Computation Graph Bridging**:AI框架的计算图Lower到DLHO、收集shape约束信息
54 | - **DHLO(IR Supplementation)**:DLHO支持动态shape的表示
55 | - **Shape Calculation & Placer**:编译同时生成shape计算逻辑、主机-设备的计算逻辑放置
56 | - **Buffer Management & Optimization**:缓冲区管理与优化
57 | - **Host-side Control Flow**:外部库Lower、内核启动管理、设备管理以及与框架交互
58 | - **Fusion Decision**:根据算子调度兼容性分析执行融合决策
59 | - **Codegen**:主机-设备端代码生成
60 |
61 | 整个流程:在未知完整shape的情况的情况下进行编译,编译时同时生成**关于shape计算逻辑**以及kernel计算的代码,期间也会收集**shape约束信息**,并使用约束分析指导编译优化过程。
62 |
63 | 
64 |
65 | > BladeDISC架构图,出自:https://zhuanlan.zhihu.com/p/552484413
66 | >
67 |
68 | ## 4、DHLO
69 |
70 | `Dynamic HLO`: 在XLA的HLO IR基础上,扩展了一套具有完备动态shape表达能力的IR。
71 |
72 | 静态场景下,HLO IR中的shape表达会被静态化,所有的shape计算会被固化为编译时常量保留在编译结果中;而在动态shape场景下,IR本身需要有足够的能力表达shape计算和动态shape信息的传递。
73 |
74 | 将原有中间表示HLO中的start_indices、limit_indices、strides等属性值使用张量的形式进行保存,使得可以在运行时将计算得到的shape值传入。
75 |
76 | 
77 |
78 | ## HLO相关补充
79 |
80 | ### **层次划分**
81 |
82 | HLO IR可以分成三个层次,HloModule, HloComputation和HloInstruction。
83 |
84 | 
85 |
86 | - HloModule
87 |
88 | 一个编译单元,相当于一个完整的**可执行程序**,所以有入口函数,也就是 entry_computation, 有且仅有一个。输入可以是多个参数,但输出只有一个(root instruction的值),**如果要返回多个值,需要把多个值构造成一个元组(tuple)返回**。一个module可以包含多个computation,除了entry_computation,其他的都是"nested",也就是被调用。
89 |
90 | ENTRY ----------------> 程序入口,ROOT--------------> 程序输出
91 |
92 | 一个HloModule可以包含很多个HloComputation
93 |
94 | - HloComputation
95 |
96 | 是HLO IR中间层的表示,相当于程序中的一个**函数**。一个HloModule只能有一个entry_conputation,其他的computation是被entry_computation调用的。我们可以把entry_computation类比作main函数。每个HloComputation可以包含多个HloInstruction,但只能有一个root_instruction,root_instruction的output就是该computation的output。
97 |
98 | - HloInstruction
99 |
100 | HLO IR最底层的表示,相当于程序中的一条指令,一个HloInstruction可包含多个算子。computation的input用parameter表示。HloInstruction也可以调用HloComputation。一个HloInstruction只有一个输出,如果需要多个output,就打包成一个Tuple。
101 |
102 | data dependency:如果一个instruction的output是另一个instruction的input,我们就说两个instruction之间存在data dependency。HLO中使用operand和uses两个fields来表示data dependency。
103 |
104 | ```cpp
105 | class HloInstruction {
106 | ...
107 | InstructionVector operands_;
108 | std::vector users_;
109 | absl::flat_hash_map user_map_;
110 | ...
111 | };
112 | ```
113 |
114 | 
115 |
116 | control dependency:有些instruction之间并没有数据依赖,但是我们仍然可能对这些instruction的执行顺序有额外的要求。在HloInstruction中有control_successors_和control_predecessors_两个fields来表示control dependency。
117 |
118 | ### **多种HLO**
119 |
120 | [https://github.com/tensorflow/mlir-hlo](https://github.com/tensorflow/mlir-hlo)
121 |
122 | - `DHLO`:Dynamic HLO
123 |
124 | 在XLA的HLO IR基础上,扩展了一套具有完备动态shape表达能力的IR。静态场景下,HLO IR中的shape表达会被静态化,所有的shape计算会被固化为编译时常量保留在编译结果中;而在动态shape场景下,IR本身需要有足够的能力表达shape计算和动态shape信息的传递。
125 |
126 | - `CHLO`:"Client" HLO
127 |
128 | 最初设计用于映射XLA的client APi,更接近前端。其中的ops可以来自XlaBuilder或者XLA helper functions,支持隐式广播、支持动态型型状。设计初衷是为了贴近client级别,方便渐进细化下降。
129 |
130 | > 隐式广播指的是那些没有具体指定发送给哪个应用程序的广播
131 | >
132 |
133 | | CHLO | |
134 | | --- | --- |
135 | | 入口 | 通过XlaBuilder API,TF2XLA kernels、JAX、PyTorch bridge直接使用这些API;
136 | legalization from TensorFlow ops in the TF Graph Compiler |
137 | | 出口 | MHLO;
138 | 调用XlaBuilder API导出到 xla::HloInstructionProto |
139 | - `MHLO`: "Meta"-HLO
140 |
141 | [https://tensorflow.google.cn/mlir/hlo_ops](https://tensorflow.google.cn/mlir/hlo_ops)
142 |
143 | 和xla_hlo相近,没有隐式广播,但支持动态形状
144 |
145 | 隐式捕获控制流ops,便于优化;输出结果多个,不用组合成一个元组;拥有不能加在client dialect或server dialect的ops;verification发生在boundary;更彻底地支持动态形状,无需更新所有users/backends
146 |
147 | 大量得映射在 linalg named ops
148 |
149 | | MHLO | |
150 | | --- | --- |
151 | | 入口 | Legalization from CHLO dialect or conversion from XLA HLO;
152 | 直接从TF Graph Compiler获得 |
153 | | 出口 | LMHLO;
154 | Linalg IREE |
155 | - `LMHLO`: "Late"-"Meta"-HLO
156 |
157 | 与 MHLO 相同,但在 buffers (e.g., memref) 而不是 tensors 上,在缓冲区分配后作用
158 |
159 | **LMHLO是为了帮助迁移 XLA 后端而临时引入的**,在这个级别上建模的大部分内容已经可以表示出来了`linalg`。在 LHLO 上执行转换的通道应该理想地遵守 Linalg 上存在的严格接口并且足够通用。
160 |
161 | > tensor values (immutable) 和in-memory buffers (side-effecting)
162 | >
163 |
164 | | LMHLO | |
165 | | --- | --- |
166 | | 入口 | 缓冲区分配后从XLA转入 |
167 | | 出口 | Codegen(LLVM IR) |
168 |
169 | > HLO to LHLO conversion and fusion #41424
170 | >
171 |
172 |
--------------------------------------------------------------------------------
/paper_read/DISC/img_DISC/%E6%88%AA%E5%B1%8F2023-03-02_23.20.46.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/%E6%88%AA%E5%B1%8F2023-03-02_23.20.46.png
--------------------------------------------------------------------------------
/paper_read/DISC/img_DISC/%E6%88%AA%E5%B1%8F2023-03-02_23.26.10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/%E6%88%AA%E5%B1%8F2023-03-02_23.26.10.png
--------------------------------------------------------------------------------
/paper_read/DISC/img_DISC/Untitled 1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/Untitled 1.jpeg
--------------------------------------------------------------------------------
/paper_read/DISC/img_DISC/Untitled.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/Untitled.jpeg
--------------------------------------------------------------------------------
/paper_read/DISC/img_DISC/v2-e68e87bbfeab11afd84bb40a17e1a179_r.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/v2-e68e87bbfeab11afd84bb40a17e1a179_r.jpg
--------------------------------------------------------------------------------
/paper_read/Graphene/ASPLOS’23 - Graphene An IR for Optimized Tensor Computations.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/ASPLOS’23 - Graphene An IR for Optimized Tensor Computations.pdf
--------------------------------------------------------------------------------
/paper_read/Graphene/ASPLOS’23 - Graphene.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/ASPLOS’23 - Graphene.pptx
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/ASPLOS_23_-_Graphene_An_IR_for_Optimized_Tensor_Computations.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/ASPLOS_23_-_Graphene_An_IR_for_Optimized_Tensor_Computations.pdf
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 1.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 10.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 11.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 12.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 13.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 14.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 15.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 16.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 17.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 18.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 19.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 2.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 20.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 21.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 3.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 4.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 5.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 6.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 7.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 8.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled 9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 9.png
--------------------------------------------------------------------------------
/paper_read/Graphene/img_Graphene/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled.png
--------------------------------------------------------------------------------
/paper_read/Ray/Ray.md:
--------------------------------------------------------------------------------
1 | # Ray
2 |
3 | > Ray: A Distributed Framework for Emerging AI Applications
4 | >
5 |
6 | ## 1、背景介绍
7 |
8 | 分布式开发框架,核心要把调度做好(跑得快、资源利用率高)
9 |
10 | 一般可以分为两个方面:任务的编排和优化 + 任务的调度执行
11 |
12 | - **任务的编排优化**
13 | - 静态:任务执行前,通过编译和优化技术进行全局的编排,可以给予rule(静态图)或代价模型
14 | - 动态:任务边执行边编排(动态图)
15 |
16 | 静态利于优化,但 某些任务需要根据结果进行动态调度 或者 某任务的输入过大导致编排时间过长
17 |
18 | - **任务的调度执行**:中心化调度(集中调度)、去中心化调度、变种(中心化和去中心化调度结合)
19 | - 中心化调度(例如Spark、DP接口):编排好的任务由中心节点决定在哪个节点上执行,中心节点的调度可以依据 计算资源、局部性
20 | - 去中心化调度(例如MPI、DDP接口):没有中心节点统一调度,各个节点单独进行任务调度执行,互相之间通过通信进行交
21 |
22 | 中心化调度能最大化利用系统资源,并提供一定的动态性和容错性,即如果发现有节点down了,可以进行任务重分配,但是缺点是调度开销大
23 |
24 | ## 2、ray
25 |
26 | Spark:静态的编排和优化+中心化的调度,利于执行优化和资源利用。
27 | Ray:动态编排(表面上是分布式RPC框架,但是多个RPC调用其实是形成了一张动态图)+树状的递归调度(去中心化和中心化结合的调度),利于灵活的编程以及低时延的调度。
28 |
29 | 
30 |
31 | GCS 作为集中的服务端,是 Worker 之间传递消息的纽带。每个 Server 都有一个共用的 Object Store,也就是用 Apache Arrow/Plasma 构建的内存数据。 Local Scheduler 是 Server 内部的调度,同时通过 GCS 来和其他 Server 上的 Worker 通信。Object Store 时间也有通信,作用是传递 Worker 之间的数据。
32 |
33 | 一个典型的远程调用流程:
34 |
35 | 
36 |
37 | 可以看到,GCS 储存了代码、输入参数、返回值。Worker 通过 Local Scheduler 来和 GCS 通信。Local Scheduler 就是 Raylet, 是单机上的基础调度服务
38 |
39 | > 更多见:[https://zhuanlan.zhihu.com/p/111340572](https://zhuanlan.zhihu.com/p/111340572)
--------------------------------------------------------------------------------
/paper_read/Ray/img_Ray/Untitled.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Ray/img_Ray/Untitled.jpeg
--------------------------------------------------------------------------------
/paper_read/Ray/img_Ray/Untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Ray/img_Ray/Untitled.png
--------------------------------------------------------------------------------
/paper_read/Slapo/ASPLOS24_Slapo.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Slapo/ASPLOS24_Slapo.pptx
--------------------------------------------------------------------------------
/paper_read/TensorIR/ASPLOS’23 - TensorIR.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/TensorIR/ASPLOS’23 - TensorIR.pptx
--------------------------------------------------------------------------------
/paper_read/TensorIR/TensorIR.md:
--------------------------------------------------------------------------------
1 | ## TensorIR
2 |
3 | 第六页:相关工作
4 |
5 | Halide 和 TVM 使用一种调度语言,可以用标量体描述循环嵌套的循环优化原语。 Tensor Comprehensions 、MLIR/Affine 使用多面体模型 来分析循环嵌套依赖性。以自下而上的方式使用标量计算优化循环嵌套
6 |
7 | Fireiron和 Stripe使用嵌套多面体结构以自上而下的方式对张量程序建模。
8 |
9 | TensorIR 结合了两种方法的见解,并将表示概括为张量化程序。
10 |
11 | > 自动化是机器学习编译和张量程序优化中的一个重要主题。 AutoTVM [10] 引入了一种基于学习的方法,通过学习成本模型和模板引导搜索来优化张量程序。 Triton [41] 引入了一种基于图块的模板表示,用于有效的程序优化。 FlexTensor [50] 自动生成模板。 Halide 使用 Monte-Carlo 树搜索 [2] 构建了一个自动调度程序。 Ansor [48] 使用分层搜索空间改进了自动调度。 我们的自动调度算法从这些方法中吸取教训,并将它们推广到最适合特定领域硬件加速的张量化计算。 自动矢量化 [25, 36] 是编译器研究中的一个长期课题。 张量化可以被视为矢量化问题的推广,以启用现代加速器中的固有张量 [4、21、30、31]。 关于这个主题有一些现有的作品[6,45,47,49]。 AKG [47] 使用多面体方法探索张量化搜索空间,UNIT [45] 引入了一个通用的张量化流程,而 AMOS [49] 可以通过张量表达式自动映射到张量化内在。 我们的方法通过提出一种新的张量化计算抽象并联合执行张量化和其他优化来概括这些先前的方法。 TensorIR 是进一步开发张量感知自动调度方法的基础。
12 |
13 |
14 |
15 | 第七页
16 |
17 | 一个block可以包含一个或者多个nested loop nests,其中包含与计算内容相对应的子块 -->
18 |
19 | 允许我们将计算划分到相应的子块,并利用块签名来表示依赖(约束)关系
20 |
21 |
22 |
23 | 第11页
24 |
25 | TensorIR以TVM IRModule为载体,从TVMScript中导入,并且可以不断进行Schedule变换,以及Pass变换,最终将优化后的IRModule build成为一个能够在不同硬件上运行的Module。
26 |
27 | 在Schedule变换过程中,对于给定的输入程序,我们需要生成具有等效语义的程序的丰富搜索空间。 我们引入原语将 TensorIR 程序转换为等效的优化程序。
28 |
29 |
30 |
31 | 第15页
32 |
33 | 对于每个输入的tensor指令,本文使用一种TensorIntrin结构,由两个块组成,一个块描述计算语义,另一个块提供张量化计算的底层实现。
34 |
35 | 通过TensorIntrin中的多维缓冲区规范包括数据类型、存储范围、内存布局和邻接约束。这些约束在验证阶段被使用。
36 |
37 |
38 |
39 | 第16页
40 |
41 | (1)给定一对后端目标和一个输入程序,我们首先将程序主体与可能的TensorIntrin进行匹配,生成张量化候选对象。匹配以循序渐进的方式进行。
42 |
43 | (2)系统将缓冲区访问表达式转换为中间迭代器。基于缓冲区访问模式,我们为每个迭代器计算特征函数,并在共享相同特征向量的迭代器之间建立映射。该映射进一步指导块实例空间和重建索引缓冲区的转换。
44 |
45 |
46 |
47 | 第17页
48 |
49 | 对于一组给定的张量候选者,我们需要构建一个包含tensorization的大型程序搜索空间,本文通过生成包含张量化计算的程序草图来构建搜索空间。
50 |
51 | 通过迭代应用预定义的草图生成规则来生成草图,需要检查块签名来构建适用于张量计算的草图生成规则,并在我们的分析过程中利用访问区域信息。
52 |
53 | 数据移动决策通常取决于计算调度决策,如平铺、线程绑定、执行范围和生产者-消费者数据流粒度。
54 |
55 |
56 |
57 | 第18页
58 |
59 | 在张量化程序草图生成阶段之后,我们使用进化搜索来探索空间并找到优化的张量程序。
60 |
61 | 初始化是随机选择一个程序草图,进化搜索由一个learning-base cost model指导
62 |
63 | cost model使用从程序中提取的特征。 特征向量包含与内存访问模式、重用和循环注释相关的信息。(memory access patterns, reuse, and loop annotations)
64 |
65 |
66 |
67 | 第19页
68 |
69 | 以探索内存分配策略为例:
70 | Astra采取测量驱动的方法来选择相互冲突的内存分配策略,在探索每个分配之后,我们为每个分配构建最佳配置,然后比较它们的端到端时间。
71 | Parallel:每个子节点都能被独立地探索并调优
72 | Prefix-based:每次只探索一维,其他参数不变。(第一个epoch搜索,找到ideal stream mapping;freeze the configuration;下一个epoch继续探索)
73 | Exhaustive:详细地探索子树(指数级别复杂度)
74 | Equivalence:如果一组kernel是同样的shape,在DFG中有着相似的inbound、outbound和depencies,那么它们可以同等看待
75 | (10个GEMM kernel分给2个stream,有5中而不是2^10次种)
76 |
--------------------------------------------------------------------------------
/tools/conda.md:
--------------------------------------------------------------------------------
1 | # conda
2 |
3 |
4 |
5 | ## 换源
6 |
7 | vim ~/.condarc
8 |
9 | ```bash
10 | show_channel_urls: true
11 | channels:
12 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
13 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
14 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
15 | - defaults
16 | auto_activate_base: false
17 | ```
18 |
19 | ## 使用
20 |
21 | ```bash
22 | conda create -n B --clone A #克隆环境A来创建名为B的环境
23 | conda create -n B python=3.10
24 | conda activate xxxx #开启xxxx环境
25 | conda deactivate #关闭环境
26 | conda info -e #显示所有的虚拟环境
27 | conda remove -n xxxx --all #删除已创建的xxxx虚拟环境
28 |
29 | conda update --all
30 |
31 | conda clean -p #删除没有用的包
32 | conda clean -t #tar打包
33 | conda clean -a
34 |
35 | conda config --show #查看全部配置
36 | ```
--------------------------------------------------------------------------------
/tools/git.md:
--------------------------------------------------------------------------------
1 | # git
2 |
3 |
4 |
5 | ## 显示当前分支
6 |
7 | ```
8 | vim .bashrc
9 | ```
10 |
11 | 将下面的代码加入到文件的最后处
12 |
13 | ``` bash
14 | function git_branch {
15 | branch="`git branch 2>/dev/null | grep "^\*" | sed -e "s/^\*\ //"`"
16 | if [ "${branch}" != "" ];then
17 | if [ "${branch}" = "(no branch)" ];then
18 | branch="(`git rev-parse --short HEAD`...)"
19 | fi
20 | echo " ($branch)"
21 | fi
22 | }
23 |
24 | export PS1='\u@\h \[\033[01;36m\]\W\[\033[01;32m\]$(git_branch)\[\033[00m\] \$ '
25 | ```
26 |
27 | 保存退出,执行加载命令
28 |
29 | ```
30 | source ~/.bashrc
31 | ```
32 |
33 | ## 常用操作
34 |
35 | ```bash
36 | git clone xxx
37 | git submodule update --init --recursive
38 |
39 | # 查看远程分支
40 | git branch -a
41 |
42 | # 查看本地分支
43 | git branch
44 |
45 | # 切换分支 tx-84
46 | git checkout tx-84
47 | # 检查子模块是否版本对齐
48 | git status
49 | git submodule update --init
50 |
51 |
52 | # 获取master最新代码
53 | git checkout master
54 | git pull # 将远程主机的最新内容拉到本地,用户在检查了以后决定是否合并到工作本机分支中,这样master的最新代码就在origin/master
55 | git fetch # git pull = git fetch + git merge
56 |
57 | # 基于master创建分支
58 | git checkout -b myfeature
59 |
60 | # 合并commit
61 | # 分支开发完成后,很可能有一堆commit,但是合并到主干的时候只希望有少量commit
62 | git reset HEAD~5
63 | git add xxx
64 | git commit -m "Here's the bug fix that closes #28"
65 |
66 | # 推到远程仓库
67 | git push origin myfeature -f
68 |
69 | # rebase代码
70 | git checkout myfeature
71 | git rebase origin/master
72 | git push origin myfeature -f
73 |
74 | # 查看commit更改
75 | git show
76 | git show --name-only
77 | git show
78 | git log --oneline # 查看最近的提交
79 |
80 | # 例如,把 e57b0e6 合并到 17cb931,不保留注释
81 | pick 17cb931 fix && add batch del
82 | f e57b0e6 fix && add batch del
83 |
84 | # 指定需要合并版本号,处理从该版本后往后的commit,不包含该版本,会进入vi编辑器
85 | git rebase -i 版本号
86 | git commit -n --amend # 或者使用这个命令将其合并进上一个commit
87 |
88 | # 使用别人的patch测试,记得先fetch
89 | git cherry-pick commit_id
90 | ```
91 |
92 | 给开源仓库提 `pr` 的流程
93 |
94 | 首先在 github 上 fork 你的目标仓库,这样你的主页就出现了一个 fork 的仓库。以 [triton-linalg](https://github.com/Cambricon/triton-linalg) 仓库为例。
95 |
96 | 然后进入本地现有仓库所在目录,通过命令行执行以下命令,将 fork 仓库的地址添加为现有仓库的一个远程地址。
97 |
98 | ```bash
99 | git remote add forked git@github.com:tfruan2000/triton-linalg.git
100 | ```
101 |
102 | 接着,执行以下命令,将 fork 远程地址与原仓库的远程地址关联起来。
103 |
104 | ```bash
105 | git remote add upstream git@github.com:tfruan2000/triton-linalg.git
106 | ```
107 |
108 | 至此,我们就成功将 fork 仓库添加到了现有仓库中。使用 `git remote -v ` 查看一下
109 |
110 | ```bash
111 | (triton_env) ➜ triton-linalg git:(correct_some_info) ✗ git remote -v
112 | forked git@github.com:tfruan2000/triton-linalg.git (fetch)
113 | forked git@github.com:tfruan2000/triton-linalg.git (push)
114 | origin https://github.com/Cambricon/triton-linalg.git (fetch)
115 | origin https://github.com/Cambricon/triton-linalg.git (push)
116 | upstream git@github.com:tfruan2000/triton-linalg.git (fetch)
117 | upstream git@github.com:tfruan2000/triton-linalg.git (push)
118 | ```
119 |
120 | 然后就可以把推到远端了,例如要推 `correct_some_info` 分支
121 |
122 | ```bash
123 | git push forked correct_some_info:correct_some_info
124 | ```
--------------------------------------------------------------------------------
/tools/linux.md:
--------------------------------------------------------------------------------
1 | # linux 终端基础操作
2 |
3 | - mkdir
4 | - mkdir 文件名 : 在当前目录下新建 “文件名”的文件夹
5 |
6 | - cd
7 | - cd 文件名 : 进入当前目录下 “文件名”的文件夹,
8 |
9 | - pwd
10 | - pwd : 显示当前位置
11 |
12 | - ls / ll
13 | - ls : 显示当前目录下的文件
14 | - ll : 显示当前目录下的文件,包括隐藏文件
15 |
16 | - touch
17 | - touch 文件名 : 在当前目录下新建 “文件名”的文件
18 |
19 | - cp
20 | - cp 源文件 目标文件(夹) : 复制文件或文件夹
21 | - cp -r 源文件 目标文件(夹) : 复制文件夹
22 |
23 | - mv
24 | - mv 源文件 目标文件(夹) : 移动文件或文件夹
25 | - mv 源文件 目标文件(夹) : 重命名文件或文件夹
26 |
27 | - rm
28 | - rm 文件名 : 删除文件
29 | - rm -rf 文件名 : 删除文件夹
30 |
31 | - tar
32 | - tar -zxvf 文件名.tar.gz : 解压tar.gz
33 | - tar -zcvf 文件名.tar.gz 文件名 : 压缩文件
34 |
35 | - unzip
36 | - unzip 文件名.zip : 解压zip
37 |
38 | - du
39 | - du -ah --max-depth=1 : 显示当前目录下各个文件占据内存
40 |
41 | - df
42 | - df -h : 显示磁盘使用情况
43 |
44 | - cat
45 | - cat 文件名 : 查看文件内容
46 |
47 | - vim / vi
48 | - vim 文件名 : 编辑文件
49 |
50 | - tree
51 | - tree : 显示目录树
52 |
53 | - grep
54 | - grep -rni "关键词" 文件名 : 在文件中搜索关键词
55 |
56 | - find
57 | - find . -name "文件名" : 在当前目录下查找文件
58 | - find . -name "文件名" -exec rm -rf {} \; : 删除查找到的文件
59 |
60 | - ps
61 | - ps -ef | grep "进程名" : 查看进程
62 | - kill -9 进程号 : 杀死进程
63 |
64 | - jobs
65 | - jobs : 查看后台运行的任务
66 | - fg %n : 将后台任务调到前台运行
67 | - bg %n : 将前台任务调到后台运行
68 |
69 | - scp
70 | - scp -r 文件名 用户名@IP地址:目标路径 : 上传文件
71 | - scp -r 用户名@IP地址:文件名 目标路径 : 下载文件
72 |
73 | - ssh
74 | - ssh 用户名@IP地址 : 远程登录
75 |
76 | - wget / curl
77 | - wget / curl 下载链接 : 下载文件
78 |
79 | - apt
80 | - apt update : 更新软件源
81 | - apt upgrade : 更新软件
82 | - apt install 软件名 : 安装软件
83 | - apt remove 软件名 : 卸载软件
84 |
85 | - sh
86 | - sh 文件名 : 运行脚本
87 |
88 | - chmod
89 | - chmod 777 文件名 : 修改文件权限
90 |
91 | - chown
92 | - chown 用户名 文件名 : 修改文件所有者
93 |
94 | - ln
95 | - ln -s 源文件 目标文件 : 创建软链接
96 |
--------------------------------------------------------------------------------
/tools/macos.md:
--------------------------------------------------------------------------------
1 | # mac 配置
2 |
3 | ## homebrew
4 |
5 | 参考:https://mirrors.tuna.tsinghua.edu.cn/help/homebrew/
6 |
7 | 安装需求:
8 |
9 | - 对于 macOS 用户,系统自带 bash、git 和 curl,在命令行输入 `xcode-select --install` 安装 CLT for Xcode 即可。
10 |
11 | - 对于 Linux 用户,系统自带 bash,仅需额外安装 git 和 curl。
12 |
13 | 安装 Homebrew / Linuxbrew:
14 |
15 | ```bash
16 | # 从本镜像下载安装脚本并安装 Homebrew / Linuxbrew
17 | git clone --depth=1 https://mirrors.tuna.tsinghua.edu.cn/git/homebrew/install.git brew-install
18 | /bin/bash brew-install/install.sh
19 | rm -rf brew-install # 删掉多余的安装包
20 |
21 | # 也可从 GitHub 获取官方安装脚本安装 Homebrew / Linuxbrew
22 | /bin/bash -c "$(curl -fsSL https://github.com/Homebrew/install/raw/master/install.sh)"
23 | ```
24 |
25 | 加入环境变量
26 |
27 | ```bash
28 | #以下针对基于 Apple Silicon CPU 设备上的 macOS 系统(命令行运行 uname -m 应输出 arm64)上的 Homebrew:
29 | test -r ~/.bash_profile && echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> ~/.bash_profile
30 | test -r ~/.zprofile && echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> ~/.zprofile
31 |
32 | #对基于 Intel CPU 设备上的 macOS 系统(命令行运行 uname -m 应输出 x86_64)的用户可跳过本步。
33 |
34 | #以下针对 Linux 系统上的 Linuxbrew:
35 | test -d ~/.linuxbrew && eval "$(~/.linuxbrew/bin/brew shellenv)"
36 | test -d /home/linuxbrew/.linuxbrew && eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)"
37 | test -r ~/.bash_profile && echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.bash_profile
38 | test -r ~/.profile && echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.profile
39 | test -r ~/.zprofile && echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.zprofile
40 | ```
41 |
42 | 换源:
43 |
44 | ```bash
45 | export HOMEBREW_INSTALL_FROM_API=1
46 | export HOMEBREW_API_DOMAIN="https://mirrors.tuna.tsinghua.edu.cn/homebrew-bottles/api"
47 | export HOMEBREW_BOTTLE_DOMAIN="https://mirrors.tuna.tsinghua.edu.cn/homebrew-bottles"
48 | export HOMEBREW_BREW_GIT_REMOTE="https://mirrors.tuna.tsinghua.edu.cn/git/homebrew/brew.git"
49 | export HOMEBREW_CORE_GIT_REMOTE="https://mirrors.tuna.tsinghua.edu.cn/git/homebrew/homebrew-core.git"
50 | brew update
51 | ```
52 |
53 | 配置好后再使用 `brew install` 安装软件
54 |
55 | ```bash
56 | brew cask install google-chrome
57 | brew install miniconda
58 | ```
59 |
60 | ## on my zsh
61 |
62 | 安装zsh `brew install zsh zsh-completions`
63 |
64 | 切换到zsh `[sudo] chsh -s $(which zsh)`
65 |
66 | 安装oh-my-zsh
67 |
68 | ```bash
69 | git clone git://github.com/robbyrussell/oh-my-zsh.git ~/.oh-my-zsh
70 | ```
71 |
72 | 修改主题,在 `~/.zshrc` 里的 设置`ZSH_THEME="ys"`
73 |
74 | 安装插件
75 | 常用autojump、zsh-autosuggestions、zsh-syntax-highlighting三个插件
76 | ```bash
77 | cd ~/.oh-my-zsh/plugins
78 | brew install autojump
79 | git clone git clone https://github.com/zsh-users/zsh-syntax-highlighting.git
80 | git clone https://github.com/zsh-users/zsh-autosuggestions.git
81 | ```
82 | 然后在 `~/.zshrc` 找到 `plugins=` 添加下面的,最后保存执行 `source ~/.zshrc`
83 |
84 | ```bash
85 | plugins=(
86 | autojump
87 | git zsh-autosuggestions
88 | git zsh-syntax-highlighting
89 | )
90 | ```
91 |
92 | ## fzf
93 |
94 | 用来增强搜索 `ctrl + r` / `command + r`
95 |
96 | ```bash
97 | git clone --depth 1 https://github.com/junegunn/fzf.git ~/.fzf
98 | ~/.fzf/install
99 | ```
100 |
--------------------------------------------------------------------------------
/tools/tmux.md:
--------------------------------------------------------------------------------
1 | # tmux
2 |
3 |
4 |
5 | ## 配置
6 |
7 | vim ~/.tmux.config
8 | tmux source ~/.tmux.config
9 |
10 | ```bash
11 | ## ====================== 将以下内容输入
12 |
13 | #set -g prefix C-z # 修改 默认的ctrl-b 组合键为 ctrl-z
14 |
15 | bind | split-window -h # ctrl-b + | 左右分屏
16 | bind - split-window -v # ctrl-b + - 上下分屏
17 |
18 | # 开启鼠标切换tmux窗口
19 | setw -g mouse-resize-pane on
20 | setw -g mouse-select-pane on
21 | setw -g mouse-select-window on
22 | setw -g mode-mouse on
23 |
24 | set -g base-index 1 # 窗口编号从 1 开始计数
25 | set -g pane-base-index 1 # 窗格编号从 1 开始计数
26 | set -g renumber-windows on # 关掉某个窗口后,编号重排
27 | setw -g allow-rename off # 禁止活动进程修改窗口名
28 | setw -g automatic-rename off # 禁止自动命名新窗口
29 |
30 | set -g status-right '#{prefix_highlight} #H | %a %Y-%m-%d %H:%M'
31 | set -g @prefix_highlight_show_copy_mode 'on'
32 | set -g @prefix_highlight_copy_mode_attr 'fg=white,bg=blue'
33 | ## ====================== :wq! 保存退出
34 | ```
35 |
36 | ## 操作
37 |
38 | ```bash
39 | tmux # 开启一个窗口
40 | exit # 销毁/关闭该窗口
41 | tmux detach # 将当前会话与窗口分离,跑长时间记得使用(快捷键 按下ctrl-b松手 再按 d)
42 | tmux attach -t # 例如 tmux aatch -t 0
43 |
44 | tmux ls # 查看当前所有的 Tmux 会话(快捷键ctrl-b + s)
45 | tmux kill-session -t 0 # 命令用于杀死某个会话,数字0是编号
46 |
47 | tmux split-window # 划分上下两个窗格 ctrl-b + -
48 | tmux split-window -h # 划分左右两个窗格 ctrl-b + |
49 |
50 | # 快捷键Ctrl+b :光标切换到其他窗格。ctrl-b + 上下左右
51 | tmux select-pane -U # 光标切换到上方窗格
52 | tmux select-pane -D # 光标切换到下方窗格
53 | tmux select-pane -L # 光标切换到左边窗格
54 | tmux select-pane -R # 光标切换到右边窗格
55 | ```
56 |
57 | 一般某个连接服务器后某个进程需要长时间的话,就先 `tmux` 开启后,跑任务,再 `tmux detach`
58 |
59 | 更多见:https://www.ruanyifeng.com/blog/2019/10/tmux.html
60 |
61 |
--------------------------------------------------------------------------------
/tools/vim.md:
--------------------------------------------------------------------------------
1 | # Vim
2 |
3 |
4 |
5 | ## 配置
6 |
7 | vim ~/.vimrc
8 |
9 | source ~/.vimrc
10 |
11 | ```bash
12 |
13 | set wildmenu"按TAB键时命令行自动补齐"
14 | set ignorecase"忽略大小写"
15 | set number "显示行号"
16 | set ruler"显示当前光标位置"
17 | set autoread"文件在Vim之外修改过,自动重新读入"
18 | set autowrite"设置自动保存内容"
19 | set autochdir"当前目录随着被编辑文件的改变而改变"
20 | set cindent "c/c++自动缩进"
21 | set smartindent
22 | set autoindent"参考上一行的缩进方式进行自动缩进"
23 | set softtabstop=4 "4 character as a tab"
24 | set shiftwidth=4
25 | set smarttab
26 | set hlsearch "开启搜索结果的高亮显示"
27 | set incsearch "边输入边搜索(实时搜索)"
28 |
29 | ```
30 |
31 | ## 操作
32 |
33 | - 模式切换
34 | - 正常为命令模式,按 `h` `j` `k` `l` 分别为左下上右
35 | - 按i进入编辑模式,按esc退出编辑模式
36 | - 按v进入可视模式,此时是一个个选择,按V进入行选择
37 |
38 | - 复制
39 | - 在命令模式下,将光标移动到将要复制的行处,按 `yy` 进行复制;
40 | - 按 `nyy` 复制n行;其中n为1、2、3……
41 |
42 | - 粘贴
43 | - 按 `p` 进行粘贴
44 |
45 | - 删除
46 | - 按 `d` 后按数字,其中数字表示删除的行数
47 |
48 | - 撤回
49 | - 撤回上一步操作:按 `u`
50 | - 撤回多步操作:按 `U`
51 |
52 | - 查找
53 | - 按 `/` 进入查找模式,输入关键词,按 `n` 查找下一个,按 `N` 查找上一个
54 |
55 | - 替换
56 | - 按 `:%s/old/new/g` 进行替换
57 | - `g` 表示全局替换
58 |
59 | - 在vim中比较两个文件的不同
60 | - `vimdiff a.file b.file` 或者在一个文件中 `:vs b.file`
61 | - 使用 `crtl + w` + `w` 进行切换左右侧
62 |
63 | ## nvim
64 |
65 | nvim比vim感觉更好看点,而且好用些
66 |
67 | 安装neovim
68 |
69 | - Mac
70 |
71 | ```bash
72 | brew install neovim
73 | ```
74 |
75 | - linux
76 |
77 | ```bash
78 | curl -LO https://github.com/neovim/neovim/releases/latest/download/nvim.appimage
79 | chmod u+x nvim.appimage
80 | ./nvim.appimage
81 |
82 | mv ./nvim.appimage ~/.local/bin/nvim
83 | ```
84 |
85 | 再创建配置文件,并和 `~/.vimrc` 一样配置
86 |
87 | ```bash
88 | mkdir ~/.config/nvim
89 | touch ~/.config/nvim/init.vim
90 |
91 | cp ~/.vimrc ~/.config/nvim/init.vim
92 | ```
93 |
94 | 然后修改 `~/.zshrc` 或 `./bashrc`
95 |
96 | ```bash
97 | alias vim="nvim"
98 | alias vi="nvim"
99 | ```
100 |
--------------------------------------------------------------------------------
/tools/vim_file.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/tools/vim_file.zip
--------------------------------------------------------------------------------