├── .gitignore ├── Meta-Scheduler ├── infra.excalidraw.svg └── new_infra.excalidraw.svg ├── README.md ├── ai_compiler ├── IREE │ ├── IREE_Survey.md │ ├── evaluate │ │ ├── auto-scheduler.md │ │ ├── benchmark-module.md │ │ └── img_benchmark-module │ │ │ └── compilation_flow.png │ ├── img_IREE_Survey │ │ ├── v2-5b69d56e33512deeb65eda364c343859_1440w.webp │ │ ├── v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg │ │ ├── 截屏2022-12-07 17.43.39.png │ │ ├── 截屏2022-12-07 21.42.13.png │ │ ├── 截屏2023-02-28 09.31.38.png │ │ └── 截屏2023-02-28 09.31.47.png │ ├── img_会议文件 │ │ ├── %E6%88%AA%E5%B1%8F2023-05-01_18.33.01.png │ │ ├── %E6%88%AA%E5%B1%8F2023-05-02_12.36.57.png │ │ ├── %E6%88%AA%E5%B1%8F2023-05-02_12.42.52.png │ │ ├── 20200130-IREE_Jan_2020_MLIR_ODM-_External.pdf │ │ ├── 20200820-IREE_CodeGen_-_Public.pdf │ │ ├── 20210609_-_IREE_Runtime_Design_Slides.pdf │ │ ├── 20220505-IREE_targeting_Vulkan_Zhang_May22.pdf │ │ ├── HALDialect.png │ │ ├── HALOps.png │ │ ├── Untitled 1.png │ │ ├── Untitled 10.png │ │ ├── Untitled 11.png │ │ ├── Untitled 12.png │ │ ├── Untitled 13.png │ │ ├── Untitled 14.png │ │ ├── Untitled 15.png │ │ ├── Untitled 16.png │ │ ├── Untitled 17.png │ │ ├── Untitled 18.png │ │ ├── Untitled 19.png │ │ ├── Untitled 2.png │ │ ├── Untitled 20.png │ │ ├── Untitled 21.png │ │ ├── Untitled 22.png │ │ ├── Untitled 23.png │ │ ├── Untitled 24.png │ │ ├── Untitled 25.png │ │ ├── Untitled 26.png │ │ ├── Untitled 27.png │ │ ├── Untitled 28.png │ │ ├── Untitled 29.png │ │ ├── Untitled 3.png │ │ ├── Untitled 30.png │ │ ├── Untitled 4.png │ │ ├── Untitled 5.png │ │ ├── Untitled 6.png │ │ ├── Untitled 7.png │ │ ├── Untitled 8.png │ │ ├── Untitled 9.png │ │ └── Untitled.png │ ├── pipeline │ │ ├── img_linalg-vector-gpu-llvm │ │ │ ├── Untitled 1.png │ │ │ ├── Untitled 1.txt │ │ │ ├── Untitled 2.txt │ │ │ ├── Untitled 3.txt │ │ │ ├── Untitled.png │ │ │ └── Untitled.txt │ │ ├── img_pipeline │ │ │ ├── Untitled 1.png │ │ │ ├── Untitled 2.png │ │ │ └── Untitled.png │ │ ├── linalg-vector-gpu-llvm.md │ │ └── pipeline.md │ └── 会议文件.md ├── LLVM │ ├── LLVM简介.md │ ├── TableGen.md │ ├── img_LLVM简介 │ │ ├── webp-1664801155586-3.webp │ │ ├── webp-1664801163209-6.webp │ │ ├── webp-1664802251158-9.webp │ │ └── webp.webp │ └── img_TableGen │ │ └── type.png ├── MLIR │ ├── MLIR_CodeGen_summary.md │ ├── MLIR_Note.md │ ├── MLIR_Survey.md │ ├── composition │ │ ├── Analysis.md │ │ ├── Builder.md │ │ ├── Dialect.md │ │ ├── Interface.md │ │ └── PDLL.md │ ├── img_CodeGen_summary │ │ ├── 0.png │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.png │ │ ├── 6.png │ │ ├── 7.jpeg │ │ ├── codegen-dialect-hierarchy-20230214213053227.svg │ │ └── cover1.png │ ├── img_MLIR_Note │ │ ├── Untitled 1.png │ │ ├── Untitled 2.png │ │ └── Untitled.png │ ├── img_MLIR_Survey │ │ ├── cover1.png │ │ ├── cover2.png │ │ ├── image-111.png │ │ ├── image-121.png │ │ ├── image-131.png │ │ ├── image-211.png │ │ ├── image-212.png │ │ ├── image-213.png │ │ ├── image-221.png │ │ ├── image-231.png │ │ ├── image-232.png │ │ ├── image-251.png │ │ ├── image-301.png │ │ ├── image-302.png │ │ ├── image-321.png │ │ ├── image-322.png │ │ ├── image-331.png │ │ ├── image-332.png │ │ ├── image-334.png │ │ ├── image-342.png │ │ ├── image-431.png │ │ ├── image-511.png │ │ ├── image-512.png │ │ ├── image-513.png │ │ ├── image-521.png │ │ ├── image-522.png │ │ ├── image-523.png │ │ ├── image-524.png │ │ └── image-611.png │ ├── meeting pdf │ │ ├── 2021-10-07-The-Torch-MLIR-project.pdf │ │ ├── Structured Ops in MLIR.pdf │ │ ├── Tensor Codegen Thoughts - Jan 23, 2020 MLIR- external ODM.pdf │ │ └── Tutorial-AminiVasilacheZinenko-MLIR.pdf │ └── pipeline │ │ ├── MLIR_matmul性能测试.md │ │ ├── hlo2linalg.md │ │ ├── img_hlo2linalg │ │ ├── 1251718-20210923060706363-1852351942.png │ │ ├── 3-7236750.png │ │ └── 截屏2023-02-13 17.45.49.png │ │ ├── img_linalg │ │ ├── 3.png │ │ ├── 73613629-c5586580-45c5-11ea-94b7-074aeea94c7b.png │ │ ├── 73613904-2f720a00-45c8-11ea-8265-1c856c02525b.png │ │ └── codegen-dialect-hierarchy.svg │ │ ├── img_matmul性能测试 │ │ └── Untitled.png │ │ └── linalg.md ├── TVM │ ├── [TVM] vectorize 和 tensorize Pass.md │ ├── [TVM]MLC课程.md │ ├── [TVM]简介.md │ ├── [TVM]编译安装.md │ ├── img_MLC课程 │ │ ├── %E6%88%AA%E5%B1%8F2023-05-30_12.08.14.png │ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_10.33.29.png │ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_15.15.02.png │ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_19.55.57.png │ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_21.08.19.png │ │ ├── %E6%88%AA%E5%B1%8F2023-05-31_21.34.01.png │ │ ├── %E6%88%AA%E5%B1%8F2023-06-01_11.56.31.png │ │ └── image-20230329133901335.png │ ├── img_vectorize 和 tensorize Pass │ │ ├── image-20230329133901335.png │ │ └── v2-ee6ca5e08aee17b8f9998dd3a3da75c1_r.jpg │ ├── img_简介 │ │ ├── 4type.png │ │ ├── 4type2.png │ │ ├── DNN1.png │ │ ├── DNN2.png │ │ ├── TVMSoftwareStack.png │ │ ├── TVMflow.png │ │ ├── all_unity.png │ │ ├── automation.png │ │ ├── cooperate.png │ │ ├── horizontal.png │ │ ├── now_q1.png │ │ ├── unify.png │ │ └── unity.png │ └── img_编译安装 │ │ ├── 201907192304343.png │ │ ├── image-20220925220059169.png │ │ ├── image-20220925220149543.png │ │ ├── image-20220925220653571.png │ │ ├── image-20220925220718150.png │ │ ├── image-20220925220751150.png │ │ ├── image-20220925222016904.png │ │ ├── image-20220925234257698.png │ │ ├── image-20220926002042777.png │ │ ├── image-20220926002050138.png │ │ ├── image-20220926002056920.png │ │ ├── image-20220926085918902.png │ │ ├── image-20220926090051685.png │ │ ├── image-20220926090558359.png │ │ ├── image-20220926090943230.png │ │ ├── image-20220926095351317.png │ │ ├── image-20220926100008289.png │ │ ├── image-20220926100817561.png │ │ └── watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MjA4MTM4OQ==,size_16,color_FFFFFF,t_70.png ├── Triton │ ├── Triton_base.md │ ├── Triton_example.md │ ├── Triton_language.md │ ├── Triton_linalg.md │ ├── Triton_optim.md │ ├── img_Triton_base │ │ ├── cta_wrap_thread.png │ │ ├── cuda_triton.png │ │ ├── cuda_vs_triton.png │ │ ├── distribute_layout.png │ │ ├── gpu_arch.png │ │ ├── layout.png │ │ ├── swizzled.png │ │ ├── triton_arch.png │ │ └── triton_arch_now.png │ ├── img_Triton_language │ │ ├── load.png │ │ ├── loadpid0.png │ │ ├── loadpid1.png │ │ ├── loadpid2.png │ │ └── store.png │ ├── img_Triton_linalg │ │ ├── bqb1.png │ │ ├── dialect.png │ │ ├── diff_with_triton_shared.png │ │ ├── mlir_pipeline.png │ │ ├── opt.png │ │ └── success.png │ └── 编译安装.md ├── XLA │ ├── img_xla2hlo │ │ ├── Screen%2BShot%2B2017-02-27%2Bat%2B9.54.12%2BAM.png │ │ ├── codegen-dialect-hierarchy.svg │ │ ├── how-does-xla-work.png │ │ ├── v2-16e964ead53e7c71c0cc4dff6ed11851_b.jpg │ │ ├── v2-84cd6a3244ebcd2f210626887a09c33f_b.jpg │ │ ├── v2-ae9fc9f5aeb969d0c25940cd9f8f24c3_b.jpg │ │ └── 截屏2023-02-14 17.54.11.png │ └── xla2hlo.md └── ai_compiler_commom │ ├── AI_Compiler_Survey.md │ ├── Graph_Partition.md │ ├── img_AI_Compiler_Survey │ ├── MS框架.png │ ├── hlo优化.png │ ├── 发展.png │ ├── 后端优化.png │ ├── 技术框架.png │ └── 结构.png │ └── img_Graph_Partition │ └── ansor.png ├── basic ├── Architecture │ └── Architecture.md ├── CMake │ └── cmake.md ├── DataReuse │ ├── DataReuse.md │ └── img_data_reuse │ │ ├── Untitled 1.jpeg │ │ ├── Untitled 1.png │ │ ├── Untitled 10.png │ │ ├── Untitled 11.png │ │ ├── Untitled 12.png │ │ ├── Untitled 13.png │ │ ├── Untitled 14.png │ │ ├── Untitled 15.png │ │ ├── Untitled 2.png │ │ ├── Untitled 3.png │ │ ├── Untitled 4.png │ │ ├── Untitled 5.png │ │ ├── Untitled 6.png │ │ ├── Untitled 7.png │ │ ├── Untitled 8.png │ │ ├── Untitled 9.png │ │ ├── Untitled.jpeg │ │ └── Untitled.png ├── GPU │ ├── GPU架构发展.md │ └── img_GPU架构 │ │ ├── %E6%88%AA%E5%B1%8F2023-06-03_18.04.41.png │ │ ├── %E6%88%AA%E5%B1%8F2023-06-03_18.08.11.png │ │ ├── %E6%88%AA%E5%B1%8F2023-06-03_18.09.48.png │ │ ├── %E6%88%AA%E5%B1%8F2023-06-03_18.10.47.png │ │ ├── %E6%88%AA%E5%B1%8F2023-06-04_12.39.32.png │ │ ├── %E6%88%AA%E5%B1%8F2023-06-04_15.15.51.png │ │ ├── IMG_8232.jpg │ │ ├── Untitled 1.jpeg │ │ ├── Untitled 1.png │ │ ├── Untitled 10.jpeg │ │ ├── Untitled 11.jpeg │ │ ├── Untitled 12.jpeg │ │ ├── Untitled 13.jpeg │ │ ├── Untitled 14.jpeg │ │ ├── Untitled 15.jpeg │ │ ├── Untitled 16.jpeg │ │ ├── Untitled 17.jpeg │ │ ├── Untitled 18.jpeg │ │ ├── Untitled 19.jpeg │ │ ├── Untitled 2.jpeg │ │ ├── Untitled 2.png │ │ ├── Untitled 20.jpeg │ │ ├── Untitled 3.jpeg │ │ ├── Untitled 3.png │ │ ├── Untitled 4.jpeg │ │ ├── Untitled 4.png │ │ ├── Untitled 5.jpeg │ │ ├── Untitled 5.png │ │ ├── Untitled 6.jpeg │ │ ├── Untitled 6.png │ │ ├── Untitled 7.jpeg │ │ ├── Untitled 8.jpeg │ │ ├── Untitled 9.jpeg │ │ ├── Untitled.jpeg │ │ └── Untitled.png ├── PolyhedralCompilation │ ├── PolyhedralCompilation.md │ └── img_Polyhedral_Compilation │ │ ├── Untitled 1.png │ │ ├── Untitled 10.png │ │ ├── Untitled 11.png │ │ ├── Untitled 12.png │ │ ├── Untitled 13.png │ │ ├── Untitled 14.png │ │ ├── Untitled 15.png │ │ ├── Untitled 16.png │ │ ├── Untitled 17.png │ │ ├── Untitled 18.png │ │ ├── Untitled 19.png │ │ ├── Untitled 2.png │ │ ├── Untitled 20.png │ │ ├── Untitled 21.png │ │ ├── Untitled 22.png │ │ ├── Untitled 23.png │ │ ├── Untitled 24.png │ │ ├── Untitled 25.png │ │ ├── Untitled 26.png │ │ ├── Untitled 27.png │ │ ├── Untitled 28.png │ │ ├── Untitled 29.png │ │ ├── Untitled 3.png │ │ ├── Untitled 30.png │ │ ├── Untitled 31.png │ │ ├── Untitled 4.png │ │ ├── Untitled 5.png │ │ ├── Untitled 6.png │ │ ├── Untitled 7.png │ │ ├── Untitled 8.png │ │ ├── Untitled 9.png │ │ ├── Untitled.jpeg │ │ └── Untitled.png └── PyTorch │ ├── PyTorch2.0.md │ └── img_PyTorch2.0 │ ├── Untitled 1.png │ ├── Untitled 2.png │ ├── Untitled 3.png │ └── Untitled.png ├── coding ├── CPP.md ├── coding_note.md └── img_coding_note │ └── computational_complexity.png ├── paper_read ├── Astitch │ ├── AStitch.pptx │ ├── Astitch.md │ └── img_Astitch │ │ ├── image-20221128113128495.png │ │ ├── image-20221128113426967.png │ │ ├── image-20221128115126772.png │ │ ├── image-20221128125650543.png │ │ ├── image-20221128131909434.png │ │ ├── image-20221128135909877.png │ │ ├── image-20221128141107379.png │ │ ├── image-20221128143319632.png │ │ ├── image-20221128145444483.png │ │ ├── image-20221128153319148.png │ │ ├── image-20221128185111129.png │ │ ├── image-20221128224708418.png │ │ ├── image-20221129103644172.png │ │ └── image-20221129104739007.png ├── Astra、Rammer、Roller │ ├── img_Astra_Rammer_Roller │ │ ├── Untitled 1.png │ │ ├── Untitled 2.png │ │ └── Untitled.png │ └── 短记_Astra_Rammer_Roller.md ├── Attention │ ├── Attention.md │ ├── img_Attention │ │ ├── %E6%88%AA%E5%B1%8F2024-01-21_19.09.47.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-21_19.10.11.png │ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_17.04.41.png │ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_17.13.41.png │ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_17.14.05.png │ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_17.20.42.png │ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_20.17.01.png │ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_22.57.54.png │ │ ├── %E6%88%AA%E5%B1%8F2024-03-10_23.31.10.png │ │ ├── Untitled 1.png │ │ ├── Untitled 10.png │ │ ├── Untitled 11.png │ │ ├── Untitled 13.png │ │ ├── Untitled 14.png │ │ ├── Untitled 2.png │ │ ├── Untitled 3.png │ │ ├── Untitled 4.png │ │ ├── Untitled 5.png │ │ ├── Untitled 6.png │ │ ├── Untitled 7.png │ │ ├── Untitled 8.png │ │ ├── Untitled 9.png │ │ └── Untitled.png │ └── test_attention │ │ ├── attention.ipynb │ │ └── attention.py ├── Auto-parallelism summary │ ├── Auto_Parallelism.md │ ├── auto-parallelism.pptx │ └── img_auto_parallelism │ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_11.35.50.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_11.36.20.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_14.21.53.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_15.06.46.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_21.22.37.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-01_21.23.07.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-02_22.59.26.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-02_23.01.41.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_15.04.36.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_16.29.54.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_17.39.56.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.18.39.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.24.00.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.27.19.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.42.48.png │ │ ├── %E6%88%AA%E5%B1%8F2024-01-04_19.46.39.png │ │ ├── %E6%88%AA%E5%B1%8F2024-02-24_23.10.38.png │ │ ├── %E6%88%AA%E5%B1%8F2024-02-24_23.21.48.png │ │ ├── %E6%88%AA%E5%B1%8F2024-02-24_23.52.54.png │ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.15.44.png │ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.29.39.png │ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.35.43.png │ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.38.09.png │ │ ├── %E6%88%AA%E5%B1%8F2024-02-25_00.39.49.png │ │ ├── Untitled 1.png │ │ ├── Untitled 10.png │ │ ├── Untitled 11.png │ │ ├── Untitled 12.png │ │ ├── Untitled 13.png │ │ ├── Untitled 14.png │ │ ├── Untitled 15.png │ │ ├── Untitled 16.png │ │ ├── Untitled 17.png │ │ ├── Untitled 18.png │ │ ├── Untitled 19.png │ │ ├── Untitled 2.png │ │ ├── Untitled 20.png │ │ ├── Untitled 21.png │ │ ├── Untitled 3.png │ │ ├── Untitled 4.png │ │ ├── Untitled 5.png │ │ ├── Untitled 6.png │ │ ├── Untitled 7.png │ │ ├── Untitled 8.png │ │ ├── Untitled 9.png │ │ ├── Untitled.png │ │ └── mlsysdistribute_system.png ├── Composable and Modular Code Generation in MLIR │ ├── Composable and Modular Code Generation in MLIR.md │ ├── Composable and Modular Code Generation in MLIR.pptx │ └── img_CodeGenerationInMLIR │ │ ├── MLIRtoLLVM.png │ │ ├── MLIRtoLLVMandIntrinsics.png │ │ ├── classical优化.png │ │ ├── image-213.png │ │ ├── inplace-op.png │ │ ├── relevant dialects1.png │ │ ├── relevant dialects2.png │ │ ├── silos.png │ │ ├── special优化.png │ │ ├── step1.png │ │ ├── step2.png │ │ ├── step3.png │ │ ├── transformation1.png │ │ ├── transformation2.png │ │ ├── transformation3.png │ │ ├── transformation4.png │ │ ├── v2-5b69d56e33512deeb65eda364c343859_1440w.webp │ │ ├── v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg │ │ ├── x1.png │ │ ├── 截屏2022-12-07 21.42.13.png │ │ ├── 截屏2022-12-10 16.26.12.png │ │ ├── 截屏2022-12-13 14.54.32.png │ │ ├── 截屏2022-12-13 22.23.23.png │ │ ├── 截屏2022-12-14 16.52.45.png │ │ ├── 截屏2022-12-15 11.47.39.png │ │ ├── 截屏2022-12-16 14.16.10.png │ │ ├── 截屏2022-12-21 18.42.21.png │ │ ├── 截屏2022-12-21 18.54.30.png │ │ ├── 截屏2023-01-09 17.47.49.png │ │ ├── 截屏2023-01-09 17.56.53.png │ │ ├── 截屏2023-01-09 17.57.09.png │ │ ├── 截屏2023-01-09 17.57.25.png │ │ ├── 截屏2023-01-09 17.59.03.png │ │ ├── 截屏2023-01-09 18.00.04.png │ │ └── 截屏2023-01-09 18.00.43.png ├── DISC │ ├── DISC.md │ └── img_DISC │ │ ├── %E6%88%AA%E5%B1%8F2023-03-02_23.20.46.png │ │ ├── %E6%88%AA%E5%B1%8F2023-03-02_23.26.10.png │ │ ├── Untitled 1.jpeg │ │ ├── Untitled.jpeg │ │ ├── Untitled.svg │ │ └── v2-e68e87bbfeab11afd84bb40a17e1a179_r.jpg ├── Graphene │ ├── ASPLOS’23 - Graphene An IR for Optimized Tensor Computations.pdf │ ├── ASPLOS’23 - Graphene.pptx │ ├── Graphene.md │ └── img_Graphene │ │ ├── ASPLOS_23_-_Graphene_An_IR_for_Optimized_Tensor_Computations.pdf │ │ ├── Untitled 1.png │ │ ├── Untitled 10.png │ │ ├── Untitled 11.png │ │ ├── Untitled 12.png │ │ ├── Untitled 13.png │ │ ├── Untitled 14.png │ │ ├── Untitled 15.png │ │ ├── Untitled 16.png │ │ ├── Untitled 17.png │ │ ├── Untitled 18.png │ │ ├── Untitled 19.png │ │ ├── Untitled 2.png │ │ ├── Untitled 20.png │ │ ├── Untitled 21.png │ │ ├── Untitled 3.png │ │ ├── Untitled 4.png │ │ ├── Untitled 5.png │ │ ├── Untitled 6.png │ │ ├── Untitled 7.png │ │ ├── Untitled 8.png │ │ ├── Untitled 9.png │ │ └── Untitled.png ├── Ray │ ├── Ray.md │ └── img_Ray │ │ ├── Untitled.jpeg │ │ └── Untitled.png ├── Slapo │ └── ASPLOS24_Slapo.pptx └── TensorIR │ ├── ASPLOS’23 - TensorIR.pptx │ └── TensorIR.md └── tools ├── conda.md ├── git.md ├── linux.md ├── macos.md ├── tmux.md ├── vim.md └── vim_file.zip /.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore_global 2 | #################################### 3 | ######## OS generated files ######## 4 | #################################### 5 | .DS_Store 6 | .DS_Store? 7 | *.swp 8 | ._* 9 | .Spotlight-V100 10 | .Trashes 11 | Icon? 12 | ehthumbs.db 13 | Thumbs.db 14 | #################################### 15 | ############# packages ############# 16 | #################################### 17 | *.7z 18 | *.dmg 19 | *.gz 20 | *.iso 21 | *.jar 22 | *.rar 23 | *.tar 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mlsys-study-note 2 | 3 | 更新博客内容见: [tfruan2000.github.io](https://tfruan2000.github.io/) 4 | 5 | 现在这里只是我同步资料的小仓库啦~ 6 | 7 | ``` 8 | ├── ai_compiler 9 | | ├── TVM 10 | | ├── IREE 11 | | └── MLIR 12 | | 13 | ├── basic 14 | | ├── optimization 15 | | ├── tech: cmake. 16 | | └── lesson 17 | | 18 | ├── coding 19 | | ├── C++ / Rust / Python 20 | | └── ... 21 | | 22 | ├── paper_read 23 | | ├── 笔记咋都是纸质版的😫 24 | | └── ... 25 | | 26 | └── tools 27 | ├── git / vim / tmux 等使用笔记 28 | ├── macOS 配置 29 | └── ... 30 | ``` 31 | -------------------------------------------------------------------------------- /ai_compiler/IREE/IREE_Survey.md: -------------------------------------------------------------------------------- 1 | # 👻 IREE 2 | 3 | ## 1. IREE 简介 4 | 5 | > 官方网站:https://openxla.github.io/iree/ 6 | > 7 | > mlir类比cpp,dialect类比stl,iree类比一个完整的项目 8 | 9 | [IREE](https://github.com/google/iree#iree-intermediate-representation-execution-environment) (Intermediate Representation Execution Environment)是一种基于MLIR的端到端编译器,可以将ML模型lower到统一的IR。具有它自己的高级表示以及一组 dialects,从代码生成的目的来说,**这些 dialects 正在向 Linalg-on-tensors 的方向发展**,严重依赖于tensor层级上的fusion。IREE-specific dialects 主要用于组织计算有效载荷,目前可以表示为MHLO、TOSA、Linalg-on-tensors等。 10 | 11 | > 在tensor级别fusion通常更简单,因为不需要跟踪对buffer的读取和写入 12 | 13 | 讲解下图: https://drive.google.com/drive/u/0/folders/1sRAsgsd8Bvpm_IxREmZf2agsGU2KvrK- 14 | 15 |
截屏2023-02-28 09.31.47
16 | 17 |
截屏2023-02-28 09.31.38
18 | 19 | 主要特征: 20 | 21 | - 提前编译调度和执行逻辑 22 | - 支持dynamic shapes, flow control, streaming和其他高级模型功能 23 | - 针对许多 CPU 和 GPU 架构进行了优化 24 | - 低开销、流水线执行以实现高效的功率和资源使用 25 | - 嵌入式系统上的二进制文件大小低至 30KB 26 | - 调试和分析支持 27 | 28 | ## 2. IREE 结构 29 | 30 | IREE对ML模型编译采用整体方法(holistic approach):生成的IR既包含==调度逻辑==,又包括==执行逻辑==。 31 | 32 | > 调度逻辑:需要将数据依赖性传达给低级并行流水线硬件/API (low-level parallel pipelined hardware/API)(如 [Vulkan](https://www.khronos.org/vulkan/))。 33 | > 34 | > 执行逻辑:将硬件上的密集计算编码为特定于硬件/API 的二进制文件,如[SPIR-V](https://www.khronos.org/spir/)。 35 | 36 | 截屏2022-12-07 21.42.13 37 | 38 | a) **导入您的模型** 39 | 40 | [使用受支持的框架](https://iree-org.github.io/iree/getting-started/#supported-frameworks)之一开发程序,然后使用 IREE 的导入工具之一运行模型。 41 | 42 | b) **选择您的[硬件部署配置](https://iree-org.github.io/iree/deployment-configurations/)** 43 | 44 | 确定目标平台、加速器和其他限制。 45 | 46 | c) **编译你的模型** 47 | 48 | 通过 IREE 编译,根据您的部署配置选择编译目标。 49 | 50 | d) **运行你的模型** 51 | 52 | 使用 IREE 的运行时组件来执行编译后的模型。 53 | 54 | ## 3. IREE Compiler 55 | 56 | - **IREE Compiler (LLVM Target)** 57 | 58 | v2-5b69d56e33512deeb65eda364c343859_1440w 59 | 60 | 大多数转换都发生在 Linalg Dialect 中,在 tensor 或者 buffer 级别,以及 bufferization 过程(tensor向buffer转换)。执行文件的首选路径是**lower到 Vector Dialect**,在这里可以进行额外的转换。当从 Linalg Dialect 往下 lowering 时,SCF 可用于围绕向量操作的控制流(control flow around vector operations),但对这些操作不执行任何转换。去生成 SCF Dialect 本质上意味着不再进行进一步的结构优化。Vector Dialect 可以逐步 lower 到复杂度较低的抽象,直到最终生成 LLVM Dialect。 61 | 62 | - **IREE Compiler (SPIR-V Target)** 63 | 64 | v2-8ce71a71e5c5e83da438c1d5793f76d9_r 65 | 66 | [SPIR-V](https://mlir.llvm.org/docs/Dialects/SPIR-V/)(Standard Portable Intermediate Representation, [Khronos group](https://www.khronos.org/spir/) standard.)是IREE编译器的主要目标。顶层流程类似于生成 LLVM IR 的流程,**大多数转换都发生在 Linalg-on-tensor 和 Vector 级别上**。从这里开始,lowering 倾向于直接转到 SPIR-V ,SPIR-V 具有一组跨越多个抽象级别的丰富操作集,操作集中包含:高级操作、结构化控制流和类指令的原语(high-level operations, structured control flow and instruction-like primitives)。该流程通过 GPU Dialect 进行 device-only operations,如工作项标识符提取,并依赖 IREE 的 runtime 来管理 GPU 内核。 67 | 68 | > SPIR-V 最初发布于 2015 年。SPIR-V 是多个 Khronos API 共用的中间语言,包括 Vulkan, OpenGL, 以及 OpenCL。 69 | > 70 | > Khronos Group 的标语是“连接软件与硬件”,简明扼要地总结了它的任务。这种连接是通过标准规范 (standard) 和编程接口。**Khronos Group 定义标准规范以及编程接口;硬件厂商提供它们的硬件实现,软件厂商则可以让软件在所有支持的平台与设备上运行。**Khronos Group 定义维护了很多标准规范,比较著名的有 Vulkan, OpenGL, 以及 OpenCL。 71 | > 72 | > SPIR-V 支持通过多种机制来扩展其功能,包括添加新的枚举值,引入新的扩展 (extension),或者通过某个命名空间引入一整套指令 (extended instruction set)。其扩展也分为不同等级——厂商自有扩展 (vendor specific)、多厂商联合支持的扩展 (EXT)、 以及 Khronos 级别的扩展 (KHR)。 73 | 74 | 最近的一些工作实现了 允许 IREE 从 Vector Dialect 转换到 GPU Dialect,将 GPU 线程暴露为向量通道(在warp或block级别)。类似地,有些工作中实现了 绕过中间阶段,直接从 Linalg 和 Vector 转换到 SPIR-V,但可能会被渐近式的 lowering 方法取代。 75 | 76 | 77 | 78 | ## 4. IREE opt 79 | 80 | > 在https://github.com/iree-org/iree/commit/823fe5ace7285e5fda555ef12dbb029a130e73ef中提到 81 | > 82 | > "iree-hlo-to-linalg-on-tensors" 改成了 "iree-codegen-hlo-to-linalg-on-tensors", 83 | 84 | iree-opt -h | grep hlo 85 | --iree-codegen-flow-hlo-to-hlo-preprocessing - Apply hlo to hlo transformations for some hlo ops 86 | --iree-codegen-hlo-to-linalg-on-buffers - Convert from XLA-HLO ops to Linalg ops on buffers 87 | --iree-codegen-hlo-to-linalg-on-tensors - Convert from XLA-HLO ops to Linalg ops on tensors 88 | --iree-codegen-shape-convert-hlo - Converts dynamic shape dependent HLO ops to shaped variants. 89 | --lhlo-legalize-to-linalg - Legalize from LHLO dialect to Linalg dialect 90 | --hlo-legalize-to-linalg - Legalize from HLO dialect to Linalg dialect 91 | 92 | 编译参考:https://openxla.github.io/iree/building-from-source/getting-started/#prerequisites 93 | 94 | 编译好的iree-opt在`iree-build/tools` 95 | 96 | ## 5. IREE 发展路线 97 | 98 | 待翻译: 99 | 100 | https://github.com/openxla/iree/blob/main/docs/developers/design_roadmap.md 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /ai_compiler/IREE/evaluate/img_benchmark-module/compilation_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/evaluate/img_benchmark-module/compilation_flow.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_IREE_Survey/v2-5b69d56e33512deeb65eda364c343859_1440w.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/v2-5b69d56e33512deeb65eda364c343859_1440w.webp -------------------------------------------------------------------------------- /ai_compiler/IREE/img_IREE_Survey/v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg -------------------------------------------------------------------------------- /ai_compiler/IREE/img_IREE_Survey/截屏2022-12-07 17.43.39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/截屏2022-12-07 17.43.39.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_IREE_Survey/截屏2022-12-07 21.42.13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/截屏2022-12-07 21.42.13.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_IREE_Survey/截屏2023-02-28 09.31.38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/截屏2023-02-28 09.31.38.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_IREE_Survey/截屏2023-02-28 09.31.47.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_IREE_Survey/截屏2023-02-28 09.31.47.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-01_18.33.01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-01_18.33.01.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-02_12.36.57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-02_12.36.57.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-02_12.42.52.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/%E6%88%AA%E5%B1%8F2023-05-02_12.42.52.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/20200130-IREE_Jan_2020_MLIR_ODM-_External.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/20200130-IREE_Jan_2020_MLIR_ODM-_External.pdf -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/20200820-IREE_CodeGen_-_Public.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/20200820-IREE_CodeGen_-_Public.pdf -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/20210609_-_IREE_Runtime_Design_Slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/20210609_-_IREE_Runtime_Design_Slides.pdf -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/20220505-IREE_targeting_Vulkan_Zhang_May22.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/20220505-IREE_targeting_Vulkan_Zhang_May22.pdf -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/HALDialect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/HALDialect.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/HALOps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/HALOps.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 1.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 10.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 11.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 12.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 13.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 14.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 15.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 16.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 17.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 18.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 19.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 2.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 20.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 21.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 22.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 23.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 24.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 25.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 26.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 27.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 28.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 29.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 29.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 3.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 30.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 4.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 5.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 6.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 7.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 8.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled 9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled 9.png -------------------------------------------------------------------------------- /ai_compiler/IREE/img_会议文件/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/img_会议文件/Untitled.png -------------------------------------------------------------------------------- /ai_compiler/IREE/pipeline/img_linalg-vector-gpu-llvm/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_linalg-vector-gpu-llvm/Untitled 1.png -------------------------------------------------------------------------------- /ai_compiler/IREE/pipeline/img_linalg-vector-gpu-llvm/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_linalg-vector-gpu-llvm/Untitled.png -------------------------------------------------------------------------------- /ai_compiler/IREE/pipeline/img_pipeline/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_pipeline/Untitled 1.png -------------------------------------------------------------------------------- /ai_compiler/IREE/pipeline/img_pipeline/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_pipeline/Untitled 2.png -------------------------------------------------------------------------------- /ai_compiler/IREE/pipeline/img_pipeline/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/IREE/pipeline/img_pipeline/Untitled.png -------------------------------------------------------------------------------- /ai_compiler/LLVM/LLVM简介.md: -------------------------------------------------------------------------------- 1 | # LLVM 2 | 3 | > 定义:LVM项目是模块化、可重用的编译器以及工具链技术的集合. 4 | 5 | ## 1. 传统的编译器架构 6 | 7 |
img
8 | 9 | - Frontend:前端 10 | 11 | ​ ==词法==分析、==语法==分析、==语义==分析、==生成中间代码== 12 | 13 | - Optimizer:优化器 14 | 15 | ​ 中间代码优化 16 | 17 | - Backend:后端 18 | 19 | ​ 生成机器码 20 | 21 | ## 2. LLVM架构 22 | 23 |
img
24 | 25 | - 不同的前端后端使用统一的中间代码LLVM ==Intermediate Representation==(LLVM IR) 26 | 27 | - 如果需要支持一种新的==编程语言==,那么只需要实现一个新的==前端== 28 | - 如果需要支持一种新的==硬件设备==,那么只需要实现一个新的==后端== 29 | - 优化阶段是一个通用的阶段,它正对的是统一的 ==LLVM IR==,不论是哪种编程语言或那种硬件设备,都不需要对优化阶段进行修改 30 | - 相比之下,GCC的前端和后端没有分得太开,前端和后端耦合在了一起,所以GCC为了支持一门新的语言或者新的目标平台,就变得特别困难 31 | 32 | > 相比之下,GCC的前端和后端没有分得太开,前端和后端耦合在了一起,所以GCC为了支持一门新的语言或者新的目标平台,就变得特别困难 33 | 34 | - LLVM现在被作为实现各种静态和运行时编译语言的通用基础结构 35 | 36 | ### Clang 37 | 38 | Clang是LLVM项目的一个子项目,是基于LLVM框架的**C/C++/Objective-C编译器的前端**。 39 | 40 | **相较于GCC,Clang具有以下优点:** 41 | 42 | - 编译速度快 43 | - 占用内存小:Clang生成的AST占有内存只为GCC的五分之一左右 44 | - 模块化设计:Clang采用基于库的模块化设计 45 | - 诊断信息可读性强:在编译过程中,Clang创建并保留了大量详细的元数据,有利于调试和错误报告 46 | - 易于扩展 47 | 48 | ### Clang和LLVM的关系 49 | 50 |
img
51 | 52 | clang是llvm整体框架的前端 53 | 54 | 源代码(c/c++) $\rightarrow$ 经过clang $\rightarrow$ 中间代码 $\rightarrow$ 经过一系列的优化(pass) $\rightarrow$ 机器码 55 | 56 | ## 3. OC源文件的编译过程 57 | 58 | Xcode创建一个Test项目,然后cd到main.m的上一路径。 59 | 命令行查看编译的过程:$ clang -ccc-print-phases main.m 60 | 61 | ```cpp 62 | $ clang -ccc-print-phases main.m 63 | 64 | 0: input, "main.m", objective-c 65 | 1: preprocessor, {0}, objective-c-cpp-output 66 | 2: compiler, {1}, ir 67 | 3: backend, {2}, assembler 68 | 4: assembler, {3}, object 69 | 5: linker, {4}, image 70 | 6: bind-arch, "x86_64", {5}, image 71 | ``` 72 | 73 | 找到main.m文件 $\rightarrow$ 预处理器,处理include、import、宏定义 $\rightarrow$ 编译器编译,生成ir中间代码 $\rightarrow$ 后端,生成目标代码 $\rightarrow$ 汇编 $\rightarrow$ 链接其他动态库静态库 $\rightarrow$ 编译成适合某个架构的代码 74 | 75 | 查看preprocessor(预处理器)的处理结果:$ clang -R main.m 76 | 77 | 会打印出大量信息 78 | 79 | ```cpp 80 | # 1 "main.m" 81 | # 1 "" 1 82 | # 1 "" 3 83 | # 353 "" 3 84 | # 1 "" 1 85 | # 1 "" 2 86 | # 1 "main.m" 2 87 | . 88 | . 89 | . 90 | int main(int argc, const char * argv[]) { 91 | @autoreleasepool { 92 | NSLog(@"Hello, World!"); 93 | } 94 | return 0; 95 | } 96 | ``` 97 | 98 | ### 词法分析(生成多个token) 99 | 100 | 词法分析,生成Token: `$ clang -fmodules -E -Xclang -dump-tokens main.m` 101 | 102 | 将代码分成一个个小单元(token) 103 | 104 | ```cpp 105 | void test(int a, int b){ 106 | int c = a + b - 3; 107 | } 108 | ``` 109 | 110 | 生成 111 | 112 | ```cpp 113 | void 'void' [StartOfLine] Loc= 114 | identifier 'test' [LeadingSpace] Loc= 115 | l_paren '(' Loc= 116 | int 'int' Loc= 117 | identifier 'a' [LeadingSpace] Loc= 118 | comma ',' Loc= 119 | int 'int' [LeadingSpace] Loc= 120 | identifier 'b' [LeadingSpace] Loc= 121 | r_paren ')' Loc= 122 | l_brace '{' Loc= 123 | int 'int' [StartOfLine] [LeadingSpace] Loc= 124 | identifier 'c' [LeadingSpace] Loc= 125 | equal '=' [LeadingSpace] Loc= 126 | identifier 'a' [LeadingSpace] Loc= 127 | plus '+' [LeadingSpace] Loc= 128 | identifier 'b' [LeadingSpace] Loc= 129 | minus '-' [LeadingSpace] Loc= 130 | numeric_constant '3' [LeadingSpace] Loc= 131 | semi ';' Loc= 132 | r_brace '}' [StartOfLine] Loc= 133 | eof '' Loc= 134 | ``` 135 | 136 | 可以看出,词法分析时,上面的代码被拆分成一个个token,后面数字表示某一行的第几个字符,例如第一个void,表示18行第一个字符。 137 | 138 | ### 语法分析(生成AST) 139 | 140 | 语法分析后生成语法树(Abstract Syntax Tree):`$ clang -fmodules -fsyntax-only -Xclang -ast-dump main.m` 141 | 142 | ```cpp 143 | |-FunctionDecl 0x7fa1439f5630 line:18:6 test 'void (int, int)' 144 | | |-ParmVarDecl 0x7fa1439f54b0 col:15 used a 'int' 145 | | |-ParmVarDecl 0x7fa1439f5528 col:22 used b 'int' 146 | | `-CompoundStmt 0x7fa142167c88 147 | | `-DeclStmt 0x7fa142167c70 148 | | `-VarDecl 0x7fa1439f5708 col:9 c 'int' cinit 149 | | `-BinaryOperator 0x7fa142167c48 'int' '-' 150 | | |-BinaryOperator 0x7fa142167c00 'int' '+' 151 | | | |-ImplicitCastExpr 0x7fa1439f57b8 'int' 152 | | | | `-DeclRefExpr 0x7fa1439f5768 'int' lvalue ParmVar 0x7fa1439f54b0 'a' 'int' 153 | | | `-ImplicitCastExpr 0x7fa1439f57d0 'int' 154 | | | `-DeclRefExpr 0x7fa1439f5790 'int' lvalue ParmVar 0x7fa1439f5528 'b' 'int' 155 | | `-IntegerLiteral 0x7fa142167c28 'int' 3 156 | 157 | `- 158 | ``` 159 | 160 | AST图形化如下显示 161 | 162 |
img
163 | 164 | ### 语义分析(生成中间代码 LLVM IR) 165 | 166 | LLVM IR有三种表示形式(本质是等价的) 167 | 168 | - text:便于阅读的文本格式,类似于汇编,扩展名 .II, $ clang -S -emit-llvm main.m 169 | - memery:内存格式 170 | - bitcode:二进制格式,扩展名 .bc, $ clang -c -emit-llvm main.m 171 | 172 | 以text形式编译查看 173 | 174 | ```cpp 175 | ; Function Attrs: noinline nounwind optnone ssp uwtable 176 | define void @test(i32, i32) #2 { 177 | %3 = alloca i32, align 4 178 | %4 = alloca i32, align 4 179 | %5 = alloca i32, align 4 180 | store i32 %0, i32* %3, align 4 181 | store i32 %1, i32* %4, align 4 182 | %6 = load i32, i32* %3, align 4 183 | %7 = load i32, i32* %4, align 4 184 | %8 = add nsw i32 %6, %7 185 | %9 = sub nsw i32 %8, 3 186 | store i32 %9, i32* %5, align 4 187 | ret void 188 | } 189 | ``` 190 | 191 | ## 4. IR基本语法 192 | 193 | 注释以分号 ; 开头 194 | 全局标识符以@开头,局部标识符以%开头 195 | alloca,在当前函数栈帧中分配内存 196 | i32,32bit,4个字节的意思 197 | align,内存对齐 198 | store,写入数据 199 | load,读取数据 200 | 官方语法参考[LLVM Language Reference Manual — LLVM 16.0.0git documentation](https://llvm.org/docs/LangRef.html) 201 | 202 |
img
203 | 204 | 205 | 206 | 207 | 208 | -------------------------------------------------------------------------------- /ai_compiler/LLVM/img_LLVM简介/webp-1664801155586-3.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_LLVM简介/webp-1664801155586-3.webp -------------------------------------------------------------------------------- /ai_compiler/LLVM/img_LLVM简介/webp-1664801163209-6.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_LLVM简介/webp-1664801163209-6.webp -------------------------------------------------------------------------------- /ai_compiler/LLVM/img_LLVM简介/webp-1664802251158-9.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_LLVM简介/webp-1664802251158-9.webp -------------------------------------------------------------------------------- /ai_compiler/LLVM/img_LLVM简介/webp.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_LLVM简介/webp.webp -------------------------------------------------------------------------------- /ai_compiler/LLVM/img_TableGen/type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/LLVM/img_TableGen/type.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/MLIR_Note.md: -------------------------------------------------------------------------------- 1 | # [MLIR] Code Note 2 | 3 | https://github.com/tfruan2000/tfruan2000.github.io/blob/main/_posts/MLIR/2024-08-07-mlir-code-note.md 4 | 5 | https://tfruan2000.github.io/posts/mlir-code-note/ 6 | -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/0.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/1.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/2.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/3.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/4.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/5.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/6.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/7.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/7.jpeg -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_CodeGen_summary/cover1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_CodeGen_summary/cover1.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Note/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Note/Untitled 1.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Note/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Note/Untitled 2.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Note/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Note/Untitled.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/cover1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/cover1.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/cover2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/cover2.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-111.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-111.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-121.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-121.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-131.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-131.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-211.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-211.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-212.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-212.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-213.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-213.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-221.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-221.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-231.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-231.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-232.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-232.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-251.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-251.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-301.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-301.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-302.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-302.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-321.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-321.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-322.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-322.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-331.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-331.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-332.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-332.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-334.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-334.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-342.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-342.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-431.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-431.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-511.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-511.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-512.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-513.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-513.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-521.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-521.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-522.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-522.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-523.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-523.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-524.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-524.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/img_MLIR_Survey/image-611.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/img_MLIR_Survey/image-611.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/meeting pdf/2021-10-07-The-Torch-MLIR-project.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/meeting pdf/2021-10-07-The-Torch-MLIR-project.pdf -------------------------------------------------------------------------------- /ai_compiler/MLIR/meeting pdf/Structured Ops in MLIR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/meeting pdf/Structured Ops in MLIR.pdf -------------------------------------------------------------------------------- /ai_compiler/MLIR/meeting pdf/Tensor Codegen Thoughts - Jan 23, 2020 MLIR- external ODM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/meeting pdf/Tensor Codegen Thoughts - Jan 23, 2020 MLIR- external ODM.pdf -------------------------------------------------------------------------------- /ai_compiler/MLIR/meeting pdf/Tutorial-AminiVasilacheZinenko-MLIR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/meeting pdf/Tutorial-AminiVasilacheZinenko-MLIR.pdf -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/hlo2linalg.md: -------------------------------------------------------------------------------- 1 | # hlo到linalg 2 | 3 | > 在mlir中,hlo是一个广泛使用的高级表示,能够承接不同的前端框架输入;**linalg方言是目前比较重要的一层方言**,包括寒武纪在内的很多公司都使用了这一层的方言,在他的基础上去干**编译优化、调优和后端代码生成**。熟悉这两种方言并了解从 hlo→ linalg 的下降流程,看现在的支持情况和以后我们如何在上面添加支持。 4 | 5 | 两种方向: 6 | 7 | - TF→xla_hlo→IREE Flow→LinAlg→... (IREE使用的模式) 8 | - TF→xla_mhlo→xla_lhlo→... (TF 代码生成策略) 9 | 10 | > [HLO to LinAlg on buffers - no conversion](https://github.com/iree-org/iree/issues/2011#top) 11 | > 12 | > HLO → LHLO过程会完成buffer的分配,而 IREE 会事先进行缓冲区分配。 13 | > 14 | > HLO+XLA buffer assignment → lhlo 15 | > 16 | > LHLO存在是为了保留XLA的强大机制,用于 HLO 级别的layout/buffer allocation(和其他优化)并重新进入MLIR Codegen 17 | > 18 | > 现在努力地将IREE的相关代码复制到TF Codegen中,这样来实现去除LHLO 19 | 20 | 使用到的工具链:tf-opt、mlir-hlo-opt,mlir编译完成后只有mlir-opt,[过程中使用到的工具链编译](codegen工具链) 21 | 22 | **之前了解的从hlo到linalg流程** 23 | 24 | `tf-opt train.mhlo.mlir --hlo-legalize-to-linalg -o train.linalg.mlir` 25 | 26 | 27 | 28 | 29 | 30 | 31 | 完整的流程:模型文件 → TF dialect(tf_executor Dialect → tf Dialect)→ xla_hlo→xla_lhlo → linalg Dialect 32 | 33 |
img
34 | 35 | 下文参考:https://discourse.llvm.org/t/llvm-ir-segmentation-fault-core-dumped/4302/1 36 | 37 | 1. 从模型 .pbtxt翻译为tf_executor Dialect (得到add.mlir) 38 | 39 | 使用`tf-mlir-translate`工具进行翻译,其中各种选项指定了输入输出的类型以及尺寸 40 | 41 | ```cmake 42 | $ tf-mlir-translate -graphdef-to-mlir -tf-enable-shape-inference-on-import=false add.pbtxt -tf-input-arrays=input0,input1 -tf-input-data-types=DT_INT32,DT_INT32 -tf-input-shapes=10:10 -tf-output-arrays=Add -o add.mlir 43 | ``` 44 | 45 | 2. tf_executor Dialect → tf Dialect (得到add-func.mlir) 46 | 47 | ```cmake 48 | $ tf-opt -tf-executor-to-functional-conversion add.mlir -o add-func.mlir 49 | ``` 50 | 51 | 3. tf Dialect → mhlo Dialect (得到add-mhlo.mlir) 52 | 53 | ```cmake 54 | $ tf-opt --tf-to-hlo-pipeline add-func.mlir -o add-mhlo.mlir 55 | ``` 56 | 57 | 4. mhlo Dialect → lhlo Dialect (得到add-lhlo.mlir) 58 | 59 | ```cmake 60 | $ mlir-hlo-opt add-mhlo.mlir -hlo-legalize-to-lhlo -o add-lhlo.mlir 61 | ``` 62 | 63 | 5. lhlo Dialect → linalg Dialect (得到add-linalg.mlir) 64 | 65 | ```cmake 66 | $ tf-opt add-lhlo.mlir -lhlo-legalize-to-linalg -o add-linalg.mlir 67 | ``` 68 | 69 | 到此为止,这一阶段的各种表达式变换以及工具链是由TensorFlow社区维护的。从这一步往后的编译过程以及基础设施,就是由MLIR社区进行维护的了。 70 | 71 | 6. linalg Dialect → Scf Dialect (得到add-scf.mlir) 72 | 73 | ```cmake 74 | mlir-opt add-linalg.mlir -convert-linalg-to-loops -o add-scf.mlir 75 | ``` 76 | 77 | 7. Scf Dialect → Std Dialect (得到add-std.mlir) 78 | 79 | ```cmake 80 | mlir-opt add-scf.mlir -convert-scf-to-std -o add-std.mlir 81 | ``` 82 | 83 | 8. Std Dialect → LLVM Dialect (得到add-llvm.mlir) 84 | 85 | ```cmake 86 | mlir-opt add-std.mlir -convert-std-to-llvm -o add-llvm.mlir 87 | ``` 88 | 89 | 9. LLVM Dialect → LLVM IR file (得到add.ll) 90 | 91 | ```cmake 92 | mlir-translate add-llvm.mlir -mlir-to-llvmir -o add.ll 93 | ``` 94 | 95 | 10. Use `lli` to run the .ll file 96 | 97 | 98 | 99 |
3
100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/img_hlo2linalg/1251718-20210923060706363-1852351942.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_hlo2linalg/1251718-20210923060706363-1852351942.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/img_hlo2linalg/3-7236750.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_hlo2linalg/3-7236750.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/img_hlo2linalg/截屏2023-02-13 17.45.49.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_hlo2linalg/截屏2023-02-13 17.45.49.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/img_linalg/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_linalg/3.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/img_linalg/73613629-c5586580-45c5-11ea-94b7-074aeea94c7b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_linalg/73613629-c5586580-45c5-11ea-94b7-074aeea94c7b.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/img_linalg/73613904-2f720a00-45c8-11ea-8265-1c856c02525b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_linalg/73613904-2f720a00-45c8-11ea-8265-1c856c02525b.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/img_matmul性能测试/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/MLIR/pipeline/img_matmul性能测试/Untitled.png -------------------------------------------------------------------------------- /ai_compiler/MLIR/pipeline/linalg.md: -------------------------------------------------------------------------------- 1 | # linalg 2 | 3 | > https://mlir.llvm.org/docs/Dialects/Linalg/ 4 | > 5 | > MLIR Codegen Flow 6 | > OpGraph: Graph of Tensor ops in MLIR 7 | > 8 | > TSOWB(e.g. LHLO): Target Special ops with Buffers 9 | > 10 | > CGASel: CodeGen Alogrithm Selector,图分治算法(搜索或RL) 11 | > 12 | > HHO(e.g. Linalg): 13 | > 14 | > MHA: Memory Hierarchy Abstraction,循环层级 15 | > 16 | > HLTSIR: High Level Target Special IR,vector+target intrinsics 17 | > 18 | > TSIR(e.g. llamas): Target Special IR,寄存器分配、调度、机器码生成 19 | > 20 | > https://mlir.llvm.org/docs/Rationale/RationaleLinalgDialect/ 21 | 22 | [Linalg Dialect](https://mlir.llvm.org/docs/Dialects/Linalg/) 中,基于结构化数据对结构化计算使用了通用的表示形式(a versatile representation of structured computation on structured data)。这种dialect是为了transformations而专门设计出来的,**只需要很少量的分析就可以完成转换**;并且它**同时支持 tensor 和 buffer 作为操作数**(在tensor和memref容器上运行的更高级别的计算原语),bufferization 过程(实现tensor到buffer的转换)也可以在不改变操作本身完成。 23 | 24 | 此外, Linalg Dialect 提供了具有特定负载的 [“named” operations ](https://mlir.llvm.org/docs/Dialects/Linalg/#named-payload-carrying-opsa-namenamed_opsa)(如:矩阵乘法和卷积),也提供了用于定义 structure 的 [“generic” operations](https://mlir.llvm.org/docs/Dialects/Linalg/#payload-carrying-opsa-namepayload_opsa)。这两种形式之间可以互相转换。Linalg Dialect 的迭代结构允许它们转换为向量(vector)操作,以及基于向量或标量操作的(仿射,Affine Dialect)循环。 25 | 26 | > 结构化的代码具有高度可组合性和可重用性:tiling和fusion转换在各个数据结构阶段都是完全通用的 27 | > 28 | > 结构化Ops为模式匹配和重写提供了自然锚点。 29 | 30 | 31 | 32 | 33 | 34 | 1. 只需要很少量的分析就可以完成转换: 35 | 36 | **Linalg ==generic op== 本质是多层完美嵌套循环(perfect loop nest)的 op 化表示。** 37 | 38 | > Linalg generic op 里面用 indexing map 来隐性表示每层循环与输入输出的 access 关系,用附加的 region 表示针对这些输入输出进行的计算。 39 | 40 | - linalg op 通过其 indexing map 来指定循环变量 (loop induction variable) 如何访问 (access) 操作数 (operand) 以及结果 (result)。 41 | - linalg op region 内的负载操作则指定了循环内部所进行的计算。 42 | 43 | (1)linalg op背后**统一的结构** `-->` 有助于简化转换的逻辑。 因为转换只需要针对 indexing map 以及 region 进行操作,而无需考虑这具体是哪一个 linalg op。(匹配 indexing map和region的情况,而不是匹配linalg op) 44 | 45 | (2)Linalg generic op本质是perfect loop nest的op化表示`-->` 针对 loop 做各种 transformation 的时候不可能存在非完美的情况,这样可以取消用来检测和维持 loop 完美性的逻辑,避免复杂的分析。for example, loop tiling can be applied to the loop nest computing a matrix multiplication, no need to additionally rely on affine dependence analysis to check this 46 | 47 | 48 | 49 | ==named ops== 基本就是 generic ops 上面提供的 sugar:每个 named op 都有明确的隐性的 indexing map 和 compute region,它们定义了一个 named op。**named op 是可以和 generic op 相互转换的。** 50 | 51 | named ops 存在的作用是和上层对接变得简单。**算子层到 Linalg 层可以直接产生这些 named ops。但是在 Linalg 以及以下的层次上,transformation 主要操作的是 generic ops**,确切地说是 generic ops 背后的 [op interface](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td)。这两种形式之间可以互相转换,编译器 transformation 不需要修改,因为这些 named ops 都有同样的 op interface,既有 transformation 可以直接操作。 52 | 53 | 54 | 55 | 2. linalg中同时存在tensor和buffer的表示 56 | 57 | | Tensor | Buffer | 58 | | --------------------------------------------------------- | ------------------------------------------------------------ | 59 | | 不一定与内存相关联的不可变值 (immutable values),重写简单 | 可变的,可能会受到混叠的影响(多个对象可能指向同样的底层存储位置) | 60 | | 高层级运算(TF、torch、HLO) | 框架(组合、卷积) --> 结构(loop、vector) --> 编程模式(SIMD) | 61 | 62 | tensor到buffer的转换是通过bufferization完成 63 | 64 | 65 | 66 | 67 | 68 | 3. 下图是TF到LLVM IR的一种codegen 69 | 70 | 从 [MHLO ](https://github.com/tensorflow/mlir-hlo#meta-hlo-dialect-mhlo)去生成 `Linalg-on-tensors`(转换局限在tensor层面,其目的并非递降,而是为接下来的转换做准备),**并在 Linalg 上调用 bufferization 之前,在该级别上执行融合(IREE也是专注于tensor级别的fusion)。**进一步的循环转换(loop transformations)(如tiling)发生在 SCF Dialect 级别,然后转换为 target-specific GPU dialect;而有效负载操作(payload operations)则先转换为 Standard Dialect 再转换为 LLVM Dialect。 71 | 72 | 3 73 | 74 | 75 | 76 | 4. https://mlir.llvm.org/docs/Rationale/RationaleLinalgDialect/ 77 | 78 | 利用线性代数语义定义ops和转换:Linalg **defines ops and transformations declaratively** and was originally restricted to ops with *linear-algebra like* semantics (`pointwise`, `matmul`, `conv`…). 79 | 80 | 针对 dense tensors 使用较多 81 | 82 | 尽可能地保持信息:This information captures the legality and applicability of transformations and is **not lost by lowering prematurely to loop or CFG form**. 83 | 84 | 使用混合抽象:reflect on and integrate the key elements of the prior art success as well as avoid the common pitfalls in the area of code generation 85 | 86 | 本地改写机制:uses local rewrite rules implemented with the MLIR [Declarative Rewrite Rules](https://mlir.llvm.org/docs/DeclarativeRewrites/) 87 | 88 | 89 | 90 | 5. 91 | 92 |
MLIR代码生成流程
93 | 94 |
MLIR CodeGen Dialect Hierarchy
95 | -------------------------------------------------------------------------------- /ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-30_12.08.14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-30_12.08.14.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_10.33.29.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_10.33.29.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_15.15.02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_15.15.02.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_19.55.57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_19.55.57.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_21.08.19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_21.08.19.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_21.34.01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-05-31_21.34.01.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-06-01_11.56.31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/%E6%88%AA%E5%B1%8F2023-06-01_11.56.31.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_MLC课程/image-20230329133901335.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_MLC课程/image-20230329133901335.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_vectorize 和 tensorize Pass/image-20230329133901335.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_vectorize 和 tensorize Pass/image-20230329133901335.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_vectorize 和 tensorize Pass/v2-ee6ca5e08aee17b8f9998dd3a3da75c1_r.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_vectorize 和 tensorize Pass/v2-ee6ca5e08aee17b8f9998dd3a3da75c1_r.jpg -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/4type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/4type.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/4type2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/4type2.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/DNN1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/DNN1.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/DNN2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/DNN2.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/TVMSoftwareStack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/TVMSoftwareStack.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/TVMflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/TVMflow.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/all_unity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/all_unity.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/automation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/automation.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/cooperate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/cooperate.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/horizontal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/horizontal.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/now_q1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/now_q1.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/unify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/unify.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_简介/unity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_简介/unity.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/201907192304343.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/201907192304343.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220925220059169.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220059169.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220925220149543.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220149543.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220925220653571.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220653571.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220925220718150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220718150.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220925220751150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925220751150.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220925222016904.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925222016904.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220925234257698.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220925234257698.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926002042777.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926002042777.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926002050138.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926002050138.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926002056920.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926002056920.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926085918902.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926085918902.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926090051685.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926090051685.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926090558359.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926090558359.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926090943230.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926090943230.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926095351317.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926095351317.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926100008289.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926100008289.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/image-20220926100817561.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/image-20220926100817561.png -------------------------------------------------------------------------------- /ai_compiler/TVM/img_编译安装/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MjA4MTM4OQ==,size_16,color_FFFFFF,t_70.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/TVM/img_编译安装/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MjA4MTM4OQ==,size_16,color_FFFFFF,t_70.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/cta_wrap_thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/cta_wrap_thread.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/cuda_triton.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/cuda_triton.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/cuda_vs_triton.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/cuda_vs_triton.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/distribute_layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/distribute_layout.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/gpu_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/gpu_arch.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/layout.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/swizzled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/swizzled.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/triton_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/triton_arch.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_base/triton_arch_now.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_base/triton_arch_now.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_language/load.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/load.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_language/loadpid0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/loadpid0.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_language/loadpid1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/loadpid1.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_language/loadpid2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/loadpid2.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_language/store.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_language/store.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_linalg/bqb1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/bqb1.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_linalg/dialect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/dialect.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_linalg/diff_with_triton_shared.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/diff_with_triton_shared.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_linalg/mlir_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/mlir_pipeline.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_linalg/opt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/opt.png -------------------------------------------------------------------------------- /ai_compiler/Triton/img_Triton_linalg/success.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/Triton/img_Triton_linalg/success.png -------------------------------------------------------------------------------- /ai_compiler/Triton/编译安装.md: -------------------------------------------------------------------------------- 1 | # Triton编译安装 2 | 3 | ## 配置python环境 4 | 5 | 建议使用conda配置,选择python3.10会稳定些 6 | 7 | `conda create -n triton_env python=3.10` 8 | 9 | - 根据cuda版本安装pytorch(gpu版) 10 | 11 | 例如我用的是cuda11.8,那么 12 | 13 | ```bash 14 | conda install pytorch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 pytorch-cuda=11.8 -c pytorch -c nvidia 15 | ``` 16 | 17 | 详细见官网:https://pytorch.org/get-started/previous-versions/ 18 | 19 | - 安装常见的包 20 | 21 | numpy matplotlib pybind11 lit pytest isort pandas tabulate scipy flake8 autopep8 22 | 23 | pybind11安装后需要配置环境变量,否则会找不到头文件 24 | 25 | ```bash 26 | export PYBIND_INCLUDE_PATH=/xxxx/miniconda/envs/triton_env/lib/python3.10/site-packages/pybind11/include 27 | ``` 28 | 29 | 下面的源挺好用的 `vim ~/.condarc` 30 | 31 | ```bash 32 | show_channel_urls: true 33 | channels: 34 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ 35 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ 36 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 37 | - defaults 38 | auto_activate_base: false 39 | ``` 40 | 41 | ## 捷径 42 | 43 | ```bash 44 | git clone https://github.com/triton-lang/triton.git 45 | ``` 46 | 47 | clone llvm 很难搞,如果不用修改源码,就直接安装吧 48 | 49 | 50 | ```bash 51 | pip install git+https://github.com/LLNL/hatchet 52 | 53 | pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly --use-deprecated legacy-resolver 54 | ``` 55 | 56 | 运行一下 57 | 58 | ```bash 59 | Python 3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0] on linux 60 | Type "help", "copyright", "credits" or "license" for more information. 61 | >>> import triton 62 | >>> triton.__version__ 63 | '3.0.0' 64 | 65 | 66 | cd triton/python/tutorials/ 67 | python 03-matrix-multiplication.py 68 | ``` 69 | 70 | 71 | ## 编译llvm 72 | 73 | ```bash 74 | git clone https://github.com/triton-lang/triton.git 75 | git clone https://github.com/llvm/llvm-project.git 76 | ``` 77 | 78 | 如果拉取出现下面报错,在repo内输入 `git config --global http.postBuffer 1024288000` 79 | 80 | ```bash 81 | remote: Compressing objects: 100% (1151/1151), done. 82 | error: RPC failed; result=18, HTTP code = 200| 592.00 KiB/s 83 | fatal: The remote end hung up unexpectedly 84 | fatal: 过早的文件结束符(EOF) 85 | fatal: index-pack failed 86 | ``` 87 | 88 | - 切换llvm commit 89 | 90 | git checkout xxx,其中xxx是triton对应的llvm版本号,可以使用 `cat triton/cmake/llvm-hash.txt` 找到 91 | 92 | - build 93 | 94 | cmake 版本要求3.20以上,记得安装ninja。如果没有root权限就下编译好的二进制,解压后加PATH即可。 95 | 96 | ```bash 97 | cd xxxpath/llvm-project 98 | mkdir build && cd build 99 | 100 | cmake -G Ninja ../llvm \ 101 | -DLLVM_ENABLE_PROJECTS="mlir;llvm" \ 102 | -DLLVM_BUILD_EXAMPLES=ON \ 103 | -DLLVM_TARGETS_TO_BUILD="X86;NVPTX;AMDGPU" \ 104 | -DMLIR_ENABLE_CUDA_RUNNER=ON \ 105 | -DCMAKE_BUILD_TYPE=Release \ 106 | -DLLVM_ENABLE_ASSERTIONS=ON \ 107 | -DLLVM_ENABLE_RTTI=ON \ 108 | -DLLVM_INSTALL_UTILS=ON \ 109 | -DMLIR_INCLUDE_INTEGRATION_TESTS=ON \ 110 | -DCMAKE_INSTALL_PREFIX="xxxpath/tools_build/llvm" 111 | 112 | ninja -j32 113 | ninja install 114 | 115 | cmake --build . --target check-mlir 116 | ``` 117 | 118 | 编译时target只能是"X86;NVPTX;AMDGPU",如果多了riscv,后续编译出的libtriton.so是有问题的,会报错 119 | 120 | ```bash 121 | ImportError: /lustre/S/ruantingfeng/triton/triton_repo/python/triton/_C/libtriton.so: undefined symbol: LLVMInitializeRISCVAsmParser 122 | ``` 123 | 124 | - 增加环境变量在.bashrc 125 | 126 | ```bash 127 | export PATH=xxxpath/tools_build/llvm/bin:$PATH 128 | export LLVM_BUILD_DIR=xxxpath/tools_build/llvm 129 | export LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include 130 | export LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib 131 | export LLVM_SYSPATH=$LLVM_BUILD_DIR 132 | ``` 133 | 134 | ## 编译triton 135 | 136 | ```bash 137 | cd xxxpath/triton 138 | conda actiave triron_env 139 | LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include \ 140 | LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib \ 141 | LLVM_SYSPATH=$LLVM_BUILD_DIR \ 142 | pip install -e python 143 | ``` 144 | 145 | 编译好的内容在 `xxxpath/triton/python/build` 中 146 | 147 | 而且 `libtriton.so` 已经加到 `_c` 中了 148 | 149 | ```bash 150 | $ ll python/triton/_C/ 151 | include/ libtriton.so 152 | ``` 153 | 154 | 再加个环境变量 155 | 156 | ```bash 157 | export TRITON_HOME=/lustre/S/ruantingfeng/triton/triton_repo 158 | export PYTHONPATH=$TRITON_HOME/python:${PYTHONPATH} 159 | ``` 160 | 161 | 测试一下,没啥问题就可以运行 `python/tutorials` 中的测试(跑了一下03-matrix-multiplication.py,看起来暂时干不过cuBLAS) 162 | 163 | ```bash 164 | $ python 165 | Python 3.10.14 (main, Mar 21 2024, 16:24:04) [GCC 11.2.0] on linux 166 | Type "help", "copyright", "credits" or "license" for more information. 167 | >>> import triton 168 | >>> triton.__version__ 169 | '3.0.0' 170 | >>> 171 | ``` -------------------------------------------------------------------------------- /ai_compiler/XLA/img_xla2hlo/Screen%2BShot%2B2017-02-27%2Bat%2B9.54.12%2BAM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/Screen%2BShot%2B2017-02-27%2Bat%2B9.54.12%2BAM.png -------------------------------------------------------------------------------- /ai_compiler/XLA/img_xla2hlo/how-does-xla-work.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/how-does-xla-work.png -------------------------------------------------------------------------------- /ai_compiler/XLA/img_xla2hlo/v2-16e964ead53e7c71c0cc4dff6ed11851_b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/v2-16e964ead53e7c71c0cc4dff6ed11851_b.jpg -------------------------------------------------------------------------------- /ai_compiler/XLA/img_xla2hlo/v2-84cd6a3244ebcd2f210626887a09c33f_b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/v2-84cd6a3244ebcd2f210626887a09c33f_b.jpg -------------------------------------------------------------------------------- /ai_compiler/XLA/img_xla2hlo/v2-ae9fc9f5aeb969d0c25940cd9f8f24c3_b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/v2-ae9fc9f5aeb969d0c25940cd9f8f24c3_b.jpg -------------------------------------------------------------------------------- /ai_compiler/XLA/img_xla2hlo/截屏2023-02-14 17.54.11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/XLA/img_xla2hlo/截屏2023-02-14 17.54.11.png -------------------------------------------------------------------------------- /ai_compiler/XLA/xla2hlo.md: -------------------------------------------------------------------------------- 1 | ## xla 2 | 3 | [XLA 源码深入解读](https://zhuanlan.zhihu.com/p/427444916) 4 | 5 | XLA(加速线性代数)是一种针对特定领域的线性代数编译器,能够加快 TensorFlow 模型的运行速度。 6 | 7 | > XLA good at 8 | > 9 | > (1) transforming code back and forth between the scalar and the vector worlds 10 | > 11 | > (2) passing function boundaries for handling both host and device code 12 | > 13 | > (3) complying to stringent requirements imposed by energy-efficient xPUs 14 | 15 | 更一般来说,XLA 可以获取 TensorFlow 操作的整个子图,并将它们融合到需要最少内核启动次数的高效循环中。获得的融合内核可以利用模型专属信息进行优化。(算子融合、中间值传输) 16 | 17 | 18 | 19 | img 20 | 21 | ```python 22 | def model_fn(x, y, z): 23 | return tf.reduce_sum(x + y * z) 24 | ``` 25 | 26 | 例如上述代码中,原始的tf会启动三个内核:分别对应于乘法、加法和减法运算。但XLA能将加法、乘法和减法“融合”到一个 GPU 内核中,以只使用一个内核就可以完成计算。融合操作不会将由 `y*z` 和 `x+y*z` 生成的中间值写出到内存中;而是直接将这些中间计算的结果“流式传输”给用户,同时将它们完全保留在 GPU 寄存器中。 27 | 28 | 29 | 30 | XLA 接受在 HLO 中定义的计算图(“计算”)并将其编译为适用于各种架构的机器指令。 31 | 32 | XLA对输入的HLO计算图进行**与目标设备无关的优化,如CSE(公共子表达式消除),算子融合,运行时内存分配分析**。输出为优化后的HLO计算图 HLO IR。 33 | 然后,将HLO计算图发送到后端(Backend),后端结合特定的硬件属性对HLO计算图进行进一步的HLO级优化,例如将某些操作或其组合进行模式匹配从而优化计算库调用。最后,后端将HLO IR转化为LLVM IR,LLVM再进行低级优化并生成机器码。 34 | 35 | > XLA IR在优化中,会将一些具名算子节点(BatchNormalization)直接替换为包含计算细节(+-*/),同时插入一些相关的add、multiply和maximum等节点;或者将另外的具名算子(Conv)替换为cuDNN API,并且插入相应的call、reshape等节点。接下来,会做一些fusion和dse等优化操作。 36 | 37 |
img
38 | 39 | 40 | 41 | tf2lxa:graph compile -> hlo graph build -> hlo pass pipeline -> hlo dataflow analysis -> codegen 42 | 43 | 44 | 45 | ## HLO 46 | 47 | 截屏2023-02-14 17.54.11 48 | 49 | ### 层次划分 50 | 51 | HLO IR可以分成三个层次,HloModule, HloComputation和HloInstruction。 52 | 53 |
img
54 | 55 | - HloModule 56 | 57 | 一个编译单元,相当于一个完整的**可执行程序**,所以有入口函数,也就是 entry_computation, 有且仅有一个。输入可以是多个参数,但输出只有一个(root instruction的值),**如果要返回多个值,需要把多个值构造成一个元组(tuple)返回**。一个module可以包含多个computation,除了entry_computation,其他的都是"nested",也就是被调用。 58 | 59 | ENTRY ----------------> 程序入口,ROOT--------------> 程序输出 60 | 61 | 一个HloModule可以包含很多个HloComputation 62 | 63 | - HloComputation 64 | 65 | 是HLO IR中间层的表示,相当于程序中的一个**函数**。一个HloModule只能有一个entry_conputation,其他的computation是被entry_computation调用的。我们可以把entry_computation类比作main函数。每个HloComputation可以包含多个HloInstruction,但只能有一个root_instruction,root_instruction的output就是该computation的output。 66 | 67 | - HloInstruction 68 | 69 | HLO IR最底层的表示,相当于程序中的一条指令,一个HloInstruction可包含多个算子。computation的input用parameter表示。HloInstruction也可以调用HloComputation。一个HloInstruction只有一个输出,如果需要多个output,就打包成一个Tuple。 70 | 71 | data dependency:如果一个instruction的output是另一个instruction的input,我们就说两个instruction之间存在data dependency。HLO中使用operand和uses两个fields来表示data dependency。 72 | 73 | ```cpp 74 | class HloInstruction { 75 | ... 76 | InstructionVector operands_; 77 | std::vector users_; 78 | absl::flat_hash_map user_map_; 79 | ... 80 | }; 81 | ``` 82 | 83 |
img
84 | 85 | control dependency:有些instruction之间并没有数据依赖,但是我们仍然可能对这些instruction的执行顺序有额外的要求。在HloInstruction中有control_successors\_和control_predecessors_两个fields来表示control dependency。 86 | 87 | 88 | 89 | ### 多种HLO 90 | 91 | https://github.com/tensorflow/mlir-hlo 92 | 93 | - `DHLO`:Dynamic HLO 94 | 95 | 在XLA的HLO IR基础上,扩展了一套具有完备动态shape表达能力的IR。静态场景下,HLO IR中的shape表达会被静态化,所有的shape计算会被固化为编译时常量保留在编译结果中;而在动态shape场景下,IR本身需要有足够的能力表达shape计算和动态shape信息的传递。 96 | 97 | - `CHLO`:"Client" HLO 98 | 99 | 最初设计用于映射XLA的client APi,更接近前端。其中的ops可以来自XlaBuilder或者XLA helper functions,支持隐式广播、支持动态型型状。设计初衷是为了贴近client级别,方便渐进细化下降。 100 | 101 | > 隐式广播指的是那些**没有具体指定发送给哪个应用程序的广播** 102 | 103 | | CHLO | | 104 | | :--- | ------------------------------------------------------------ | 105 | | 入口 | 通过XlaBuilder API,TF2XLA kernels、JAX、PyTorch bridge直接使用这些API;
legalization from TensorFlow ops in the TF Graph Compiler | 106 | | 出口 | MHLO;
调用XlaBuilder API导出到 xla::HloInstructionProto | 107 | 108 | - `MHLO`: "Meta"-HLO 109 | 110 | https://tensorflow.google.cn/mlir/hlo_ops 111 | 112 | 和xla_hlo相近,没有隐式广播,但支持动态形状 113 | 114 | 隐式捕获控制流ops,便于优化;输出结果多个,不用组合成一个元组;拥有不能加在client dialect或server dialect的ops;verification发生在boundary;更彻底地支持动态形状,无需更新所有users/backends 115 | 116 | 大量得映射在 linalg named ops 117 | 118 | | MHLO | | 119 | | :--- | :----------------------------------------------------------- | 120 | | 入口 | Legalization from CHLO dialect or conversion from XLA HLO;
直接从TF Graph Compiler获得 | 121 | | 出口 | LMHLO;
Linalg IREE | 122 | 123 | - `LMHLO`: "Late"-"Meta"-HLO 124 | 125 | 与 MHLO 相同,但在 buffers (e.g., memref) 而不是 tensors 上,在缓冲区分配后作用 126 | 127 | **LMHLO是为了帮助迁移 XLA 后端而临时引入的**,在这个级别上建模的大部分内容已经可以表示出来了`linalg`。在 LHLO 上执行转换的通道应该理想地遵守 Linalg 上存在的严格接口并且足够通用。 128 | 129 | > tensor values (immutable) 和in-memory buffers (side-effecting) 130 | 131 | | LMHLO | | 132 | | :---- | :-------------------- | 133 | | 入口 | 缓冲区分配后从XLA转入 | 134 | | 出口 | Codegen(LLVM IR) | 135 | 136 | > [HLO to LHLO conversion and fusion #41424](https://github.com/tensorflow/tensorflow/issues/41424) 137 | 138 | 139 | 140 |
MLIR CodeGen Dialect Hierarchy
141 | -------------------------------------------------------------------------------- /ai_compiler/ai_compiler_commom/Graph_Partition.md: -------------------------------------------------------------------------------- 1 | # 切图算法 2 | 3 | 在 [Auto-Parallelism](../../paper_read/Auto-parallelism%20summary/Auto_Parallelism.md) 文件中讲到了ai compiler可能涉及的两种切图算法: 4 | 5 | (1)为了设备分布式执行切图,将计算图(静态图)切为子图, 为了在内存受限的独立设备上执行 6 | 7 | 例如:FlexFlow、Alpa 8 | 9 | (2)为了launch kernel切图(算子融合),子图对于硬件kernel可能过大,无法融合为1个kernel,为了便于在auto-scheduler时直接融(tile)为一个kernel,故将将子图切为适应硬件kernel大小的片段 10 | 11 | 当时在 [Auto-Parallelism](../../paper_read/Auto-parallelism%20summary/Auto_Parallelism.md) 中分析了第一类切图算法,本文就学习一下第二类——为了launch kernel切图(算子融合) 12 | 13 | ## 相关工作 14 | 15 | 为了launch kernel切图也经常以算子融合的形式实现,很多ai compiler都实现了各自的切图算法 16 | 17 | ### 手写pattern 18 | 19 | 手写一些融合的pattern,例如:`conv2d + relu`,`conv2d + bn + relu`,`conv2d + bn + relu + pool`等,常见让计算密集型和访存密集型算子融合,减小整体执行开销。针对特定场景有较好的收益,但泛化性较差(总不能手动枚举所有优化可能吧,总得编译时间和性能trade-off) 20 | 21 | > 推荐两篇讲访存和计算的知乎文章:https://zhuanlan.zhihu.com/p/600489819, https://zhuanlan.zhihu.com/p/688613416 22 | 23 | ### XLA 24 | 25 | XLA的op fusion是在HLO IR层做的,相关代码见[fusion_pipeline.cc](https://github.com/openxla/xla/blob/main/xla/service/gpu/fusion_pipeline.cc)。 26 | 27 | - PriorityFusion: 实现了FusionQueue,queue中都是producer。根据cost model来fuse instructions,并且通过动态地更新优先级(benefit最大)来选择下一个fusion对象。 28 | 29 | - InstructionFusion: 纵向fusion,将从producer到consumer的所有指令融合为一个kernel。 30 | 31 | - FusionMerger: 当合并结果不会增加字节传输,将融合后的指令进行合并。 32 | 33 | - MultiOutputFusion: 横向融合的一种,将多个output的指令融合为一个kernel。 34 | 35 | - HorizontalLoopFusion: 将多个fusion op横向融合在一起launch 36 | 37 | ### TVM 38 | 39 | TVM的算子融合是在relay层做的,相关代码见[graph_partitioner.cc](https://github.com/apache/tvm/blob/main/src/relay/analysis/graph_partitioner.cc)和[fuse_op.cc](https://github.com/apache/tvm/blob/main/src/relay/transforms/fuse_ops.cc) 40 | 41 | 其将算子分为7类:ElemWise,Broadcast,Injective,CommReduce,OutEWiseFusable,Tuple,Opaque,不同的算子间有不同的融合rule 42 | 43 | 主体实现代码: 44 | 45 | ```cpp 46 | std::vector GraphPartitioner::Partition( 47 | const IndexedForwardGraph& graph) { 48 | // 每个节点初始化为一个group 49 | this->InitGroups(graph); 50 | if (opt_level_ == 0) return std::move(groups_); 51 | // get post dominator tree 52 | auto post_dom_tree = DominatorTree::PostDom(arena_, graph); 53 | // run fusion algorithm. 54 | for (int phase = 0; phase < 3; ++phase) { 55 | this->RunFuse(graph, post_dom_tree, phase); 56 | } 57 | return std::move(groups_); 58 | } 59 | ``` 60 | 61 | 个人想法:ansor生成Sketch已经是在图划分后,而划图就已经考虑了op的融合行为,所以给定的rule中会有融合的rule,会尽量尝试融合 62 | 63 |
ansor_pipeline
64 | 65 | -------------------------------------------------------------------------------- /ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/MS框架.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/MS框架.png -------------------------------------------------------------------------------- /ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/hlo优化.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/hlo优化.png -------------------------------------------------------------------------------- /ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/发展.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/发展.png -------------------------------------------------------------------------------- /ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/后端优化.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/后端优化.png -------------------------------------------------------------------------------- /ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/技术框架.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/技术框架.png -------------------------------------------------------------------------------- /ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/结构.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_AI_Compiler_Survey/结构.png -------------------------------------------------------------------------------- /ai_compiler/ai_compiler_commom/img_Graph_Partition/ansor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/ai_compiler/ai_compiler_commom/img_Graph_Partition/ansor.png -------------------------------------------------------------------------------- /basic/Architecture/Architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | 传统nn网络对硬件的需求:weight以及中间结果,通过存算一体搞一个weight station,再分析数据流以减少层之间的中间结果,就可以靠dsa叠算力将吞吐拉到极大 4 | 5 | 但是llm推理本质上是多次inference,通过一次input给出多个token,每次inference都将当前获得的token和input拼接再输入来预测下一个token。多次inference的过程是串行的,所以需要保存大量上下文信息。新一轮的inference的某些计算结果都可以复用上一轮的某些结果,这便是kv-cache技术。 但kv-cache对芯片显存的要求可大可小,和请求数、上下文长度相关。 6 | 7 | llm专用芯片的瓶颈在于显存容量和带宽,kv-cache是热数据,每次inference生成token都要读一遍,如果kv-cache高达几百GB,那么想实现100token/s那就是几十TB/s的带宽需求。提高并发度一定程度可以加大对权重读取带宽的复用,但是并发度越高,kv-cache容量需求越大,容量又会bound。 -------------------------------------------------------------------------------- /basic/CMake/cmake.md: -------------------------------------------------------------------------------- 1 | # cmake 简介 2 | 3 | 存在多种make工具(不同平台、不同应用),cmake被设计出来实现”write once, run everywhere” 4 | 5 | 基础流程: 6 | 7 | 1. 写 CMake 的配置文件 `CMakeLists.txt` 8 | 9 | 2. 执行命令 10 | 11 | ``` 12 | cmake PATH 13 | ``` 14 | 15 | 或 16 | 17 | ``` 18 | ccmake PATH 19 | ``` 20 | 21 | 生成 22 | 23 | ``` 24 | Makefile 25 | ``` 26 | 27 | 1. PATH 是 CMakeLists.txt 所在的目录 28 | 2. ccmake 比 cmake 多交互式页面 29 | 30 | 3. 使用 `make` 命令进行编译 -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 1.jpeg -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 1.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 10.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 11.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 12.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 13.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 14.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 15.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 2.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 3.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 4.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 5.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 6.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 7.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 8.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled 9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled 9.png -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled.jpeg -------------------------------------------------------------------------------- /basic/DataReuse/img_data_reuse/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/DataReuse/img_data_reuse/Untitled.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.04.41.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.04.41.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.08.11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.08.11.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.09.48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.09.48.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.10.47.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-03_18.10.47.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-04_12.39.32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-04_12.39.32.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-04_15.15.51.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/%E6%88%AA%E5%B1%8F2023-06-04_15.15.51.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/IMG_8232.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/IMG_8232.jpg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 1.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 1.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 10.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 10.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 11.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 11.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 12.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 12.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 13.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 13.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 14.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 14.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 15.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 15.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 16.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 16.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 17.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 17.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 18.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 18.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 19.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 19.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 2.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 2.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 20.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 20.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 3.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 3.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 4.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 4.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 5.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 5.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 5.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 6.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 6.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 6.png -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 7.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 7.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 8.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 8.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled 9.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled 9.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled.jpeg -------------------------------------------------------------------------------- /basic/GPU/img_GPU架构/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/GPU/img_GPU架构/Untitled.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 1.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 10.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 11.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 12.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 13.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 14.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 15.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 16.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 17.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 18.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 19.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 2.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 20.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 21.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 22.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 23.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 24.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 25.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 26.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 27.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 28.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 29.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 29.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 3.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 30.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 31.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 4.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 5.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 6.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 7.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 8.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled 9.png -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled.jpeg -------------------------------------------------------------------------------- /basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PolyhedralCompilation/img_Polyhedral_Compilation/Untitled.png -------------------------------------------------------------------------------- /basic/PyTorch/PyTorch2.0.md: -------------------------------------------------------------------------------- 1 | # PyTorch2.0 2 | 3 | DL framework三种运行加速 4 | 5 |
Untitled
6 | 7 | Torch Compiler分3步 8 | 9 | 1. Graph Acquisition: Dynamo (forward) + AOTAutograd (backward) 10 | 2. Graph Lowering: ATen / Prim IR 11 | 3. Graph Compilation: TorchInductor 12 | 13 |
Untitled
14 | 15 | TorchInductor分为四层,三大核心技术 16 | 17 | 1. 核心 IR 是 loop level IR,是 python callable。做 codegen 或者 analysis,只需要直接 execute。 18 | 2. 用 SymPy(一个符号计算库)支持动态 shape。 19 | 3. 在 CPU 上用的 OpenMP,跟 intel 一起搞的。GPU 选了 OpenAI Triton。 20 | 21 | AOTInductor = torch.export (whole graph capture) + Inductor (AOT compilation) 22 | 23 |
Untitled
24 | 25 | Dynamo 的 2 个精髓: 26 | 27 | 1. partial graph capture。遇到不支持的,就保留并拆分出前后的子图,分别编译。 28 | 2. guard。解决了 trace 的经典难题 -- control flow 等导致 capture 不能用。guard 会自动报警,不会出错。 29 | 30 | 三种模式 31 | 32 |
Untitled
-------------------------------------------------------------------------------- /basic/PyTorch/img_PyTorch2.0/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PyTorch/img_PyTorch2.0/Untitled 1.png -------------------------------------------------------------------------------- /basic/PyTorch/img_PyTorch2.0/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PyTorch/img_PyTorch2.0/Untitled 2.png -------------------------------------------------------------------------------- /basic/PyTorch/img_PyTorch2.0/Untitled 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PyTorch/img_PyTorch2.0/Untitled 3.png -------------------------------------------------------------------------------- /basic/PyTorch/img_PyTorch2.0/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/basic/PyTorch/img_PyTorch2.0/Untitled.png -------------------------------------------------------------------------------- /coding/img_coding_note/computational_complexity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/coding/img_coding_note/computational_complexity.png -------------------------------------------------------------------------------- /paper_read/Astitch/AStitch.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/AStitch.pptx -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128113128495.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128113128495.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128113426967.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128113426967.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128115126772.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128115126772.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128125650543.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128125650543.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128131909434.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128131909434.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128135909877.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128135909877.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128141107379.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128141107379.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128143319632.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128143319632.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128145444483.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128145444483.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128153319148.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128153319148.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128185111129.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128185111129.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221128224708418.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221128224708418.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221129103644172.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221129103644172.png -------------------------------------------------------------------------------- /paper_read/Astitch/img_Astitch/image-20221129104739007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astitch/img_Astitch/image-20221129104739007.png -------------------------------------------------------------------------------- /paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled 1.png -------------------------------------------------------------------------------- /paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled 2.png -------------------------------------------------------------------------------- /paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Astra、Rammer、Roller/img_Astra_Rammer_Roller/Untitled.png -------------------------------------------------------------------------------- /paper_read/Astra、Rammer、Roller/短记_Astra_Rammer_Roller.md: -------------------------------------------------------------------------------- 1 | # Astra、Rammer、Roller 2 | 3 | ## Astra 4 | 5 | DNN任务的特点:重复性(数据重复、指令重复)和可预测性(一般不会有复杂的控制流) 6 | 7 | Astra利用运行时对优化结果进行评估,其优化对象为整个模型程序(需要优化的就是全局程序的运行情况,并不单独优化某些参数或者特征) 8 | 9 | 在运行时运行多个版本编译的程序,通过运行时间筛选运行最快的编译版本 10 | 11 | 三个用于管理优化空间的技术: 12 | 13 | - 配置粗粒度的静态信息(*在某些维度上使用先验知识约束枚举器的可选项,排除无效选项*) 14 | - 在细粒度层面,并行的评估多个版本的编译。Astra框架同时对多个独立的参数进行评估。 15 | - 以初始的测验结果作为信号来智能的修剪动态状态空间。 16 | 17 | 可以进行的优化空间: 18 | 19 | - 内核内参数优化,如线程块大小、tiling数据大小和共享内存大小等参数。 20 | - 将多个内核函数融合,并指出内核融合需要测验,举例融合可能会带来性能下降。 21 | - 使用多个流 22 | - 其他的优化: 23 | - 全图优化,如内存规划 24 | - 分布式或GPU训练 25 | 26 | 优化器 = 枚举器 + custom-wirer(捕获器) 27 | 28 | 编译器用来枚举相关优化的状态空间,并使用静态信息对状态空间进行修剪 29 | 30 | 运行时对被可捕获的优化选项进行排序,从而选出最优的优化集合 31 | 32 | 自适应变量被组成一颗更新树,更新树有几种被探索的模式: 33 | 34 | - 并行模式:所有子节点都相互独立时 35 | - 详尽模式:所有子树需要被暴力的遍历,时间成本是指数级的 36 | - 前缀模式:需要按顺序分层级遍历子代。当上一个子代遍历完其最佳结果确定,开始下一子代的遍历 37 | 38 | 修剪探索状态空间的方式: 39 | 40 | - 并行探索:选择维度独立,并行每个组进行探索 41 | - 层次探索:探索空间引入一个内存分配策略,在探索完分配之后,再为每个分配构建最佳配置。 42 | - 屏障探索:不同流中调度内核的性能受到之前内核调度历史的影响。Astra引入屏障探索的概念,设立超时期,在超时期的边界对所有流进行强制屏障同步,并行的同时对多个超时期进行探索。可以分摊跨流屏障同步的成本。 43 | - 前缀探索:为了在超时期进一步控制状态空间,将超时期基于依赖关系进一步划分成不同的时期。时期中的操作可以进行跨流的调度。 44 | - 等价探索 45 | 46 | ## **Rammer** 47 | 48 | 通过算子**之间和内部**的协同,为DNN生成有效的**静态时空调度** 49 | 50 | 算子间并行 && 算子内并行:通过降低算子内部并行,从而提高算子间的并行 51 | 52 | 单个算子派发将贪婪的使用硬件资源,阻止了其他可并发的算子并行使用硬件资源 53 | 54 | 一个算子(rOperator)=多个rTask(调度的最小单元,加速器单个执行单元的运行对象) 55 | 56 | 以rOperator构成的数据流图依然保持着数据间的依赖关系,但是**以rTask为粒度的调度**将算子内部的并行信息暴露给编译器 57 | 58 | 每个rOperator有多个版本的rKernel的具体实现,每个rKernel有不同的tiling策略 59 | 60 | 虚拟化的并行设备(vDevice) 包含多个虚拟化的执行单元(vEU) 61 | 62 | rTask被分派到vEU上进行执行,每个vEU可以独立的执行rTask 63 | 64 | 为了保证具有依赖的算子的顺序执行,定义了屏障rTask 65 | 66 | Rammer在vDevice上将DFG组成成rProgram,rProgram被表示成一个prog[EU_id][order]的二维数组 67 | 68 | EU_id表示执行任务的单元,order表示EU设备上执行的顺序 69 | 70 | Rammer将调度决策从运行时迁移到编译期间(AOT) 71 | 72 | 编译过程中,Rammer会提供信息:rTask在vEU上独立执行时间、rTask的资源占用情况,如本地内存或寄存器、rProgram的总体执行时间 73 | 74 | 调度策略如下: 75 | 76 | Untitled 77 | 78 | 算子间调度的开销:内核启动、上下文初始化、主机和设备之间的通信 79 | 80 | 使用operator fusion技术减少算子间调度开销 81 | 82 | Rammer将输入格式统一转换成由rOperator表达的DFG图,在DFG图上做一些常见的图优化转换:常量折叠、公共子表达式消除、基于模式的算子融合 83 | 84 | 对于每个优化后的rOpertaor,Rammer在不同渠道加载一个或多个版本的rKernel实现:其他框架的转换器、手调内核函数、核函数代码生成器 85 | 86 | 后续,DFG划分成多个子图,并为每个子图编译出对应的rProgram,每个rProgram进一步转换为加速器上运行的设备代码 87 | 88 | 在NVIDIA GPU中,rTask会被分配到SM上,将其作为一个线程块实现。为了绕过CUDA内置的硬件调度器,Rammer采用持久线程块(PTB)来在硬件中实现vEU的映射 89 | 90 | - 抽象rTask,暴露算子内部的并行性 91 | - 抽线vEU,暴露硬件内部的调度能力 92 | - 利用DNN计算的可预测性,将运行时调度问题转换为编译器rTask执行优化 93 | 94 | ## Roller 95 | 96 | 高效生成内核,基于构造的方法 97 | 98 | rTile是Roller的核心,其**封装了与硬件加速器关键特性对齐的张量形状** 99 | 100 | 对tile进行抽象,从而通过对形状选择进行限制,以此实现高效的探索 101 | 102 | Roller基于rTile生成最终的rProgram 103 | 104 | rProgram的性能可以通过微性能模型进行评估 105 | 106 | Roller使用一种*scale-up-then-scale-out* 的方法:先执行放大过程,采用基于递归的rTile构造算法,逐步增大rTile的大小,构造硬件加速器的单个执行单元的饱和运行 107 | 108 | Roller系统结构 109 | 110 | Untitled 111 | 112 | 从张量表达式中提取张量的形状,并利用硬件的相关规范来构造rTiles 113 | 114 | 在rTiles的基础上使用*scale-upthen-scale-out*的递归构造算法生成rProgram 115 | 116 | 构造算法通过评估rProgram的性能来识别rTile合适的配置 117 | 118 | rTile必须和给定张量表达式中的张量形状和底层硬件特性对齐 119 | 120 |
Untitled
121 | 122 | 给定一个数据处理管道,其rProgram的优化目标是最大化管道的吞吐量,该目标可以转换为: 123 | 124 | - 计算和数据移动需要充分利用硬件特性 125 | - 吞吐量需要达到性能瓶颈 126 | - 需要足够的并行以充分利用并行执行单元 127 | 128 | Roller首先使用scale up以满足前两点,之后利用scale out利用多核并行性 129 | 130 | - scale up:Roller从内存结构由顶至下构建不同层次的rTile尺寸,Roller首先进行rTile的初始化,之后迭代扩大rTile的大小,最大程度提升数据重用得分。 131 | - scale out:Roller在最底层的内存尺寸上构造单一单元执行的rProgram,并通过将每层的rTile平均分配来将rProgram复制到其他单元。一般会在reduce的轴上分配单元 132 | 133 | Roller尝试沿着具有最小数据重用得分的轴上收缩rTiles以实现足够的并行度 -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-01-21_19.09.47.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-01-21_19.09.47.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-01-21_19.10.11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-01-21_19.10.11.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.04.41.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.04.41.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.13.41.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.13.41.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.14.05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.14.05.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.20.42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_17.20.42.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_20.17.01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_20.17.01.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_22.57.54.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_22.57.54.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_23.31.10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/%E6%88%AA%E5%B1%8F2024-03-10_23.31.10.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 1.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 10.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 11.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 13.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 14.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 2.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 3.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 4.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 5.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 6.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 7.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 8.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled 9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled 9.png -------------------------------------------------------------------------------- /paper_read/Attention/img_Attention/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Attention/img_Attention/Untitled.png -------------------------------------------------------------------------------- /paper_read/Attention/test_attention/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch import nn 4 | class ScaledDotProductAttension(nn.Module): 5 | """ Scaled Dot-Product Attention """ 6 | def __init__(self, scale): 7 | super().__init__() #声明父类的Init方法 8 | self.scale = scale 9 | self.softmax = nn.Softmax(dim = 2) #沿哪一维实施softmax 10 | def forward(self, q, k, v, mask=None): 11 | #TORCH.BMM 执行batch内两矩阵乘积运算:bmm(b*n*m, b*m*p) -> size(b*n*p) 12 | #TORCH.BMM 输入必须是3-dim tensor 13 | # 1.score = q \cdot k ,使用query和k点乘(matmul)获得相关度 14 | u = torch.bmm(q, k.transpose(1, 2)) 15 | # 2.缩放 16 | u = u / self.scale 17 | # 3.mask(opt) 18 | if mask is not None: 19 | u = u.masked_fill(mask, -np.inf) 20 | # 4.softmax 21 | attn = self.softmax(u) 22 | # 每个key都是等权 23 | output = torch.bmm(attn, v) 24 | return attn, output 25 | 26 | class MultiHeadAttention(nn.Module): 27 | """ Multi-Head Attention """ 28 | def __init__(self, n_head, d_k_, d_v_, d_k, d_v, d_o): 29 | super().__init__() 30 | self.n_head = n_head 31 | self.d_k = d_k 32 | self.d_v = d_v 33 | 34 | # 用于投影变换mlp 35 | self.fc_q = nn.Linear(d_k_, n_head * d_k) 36 | self.fc_k = nn.Linear(d_k_, n_head * d_k) 37 | self.fc_v = nn.Linear(d_v_, n_head * d_v) 38 | 39 | self.attention = ScaledDotProductAttension(scale=np.power(d_k, 0.5)) 40 | self.fc_concatOutput = nn.Linear(n_head * d_v, d_o) # concat -> mlp -> output 41 | def forward(self, q, k, v, mask = None): 42 | n_head, d_q, d_k, d_v = self.n_head, self.d_k, self.d_k, self.d_v 43 | batch, n_q, d_q_ = q.size() 44 | batch, n_k, d_k_ = k.size() 45 | batch, n_v, d_v_ = v.size() 46 | 47 | #投影变化,单头变多头 48 | q = self.fc_q(q) 49 | k = self.fc_k(k) 50 | v = self.fc_v(v) 51 | 52 | # view method1: (batch, n_q, n_head * d_q) -> (batch, n_q, n_head, d_q) 53 | # permute method: 将tensor维度重排列为 (n_head, batch, n_q, d_q) 54 | # contiguous method: 确保张量在内存中是连续存储的 55 | # view method2: (n_head, batch, n_q, d_q) -> (n_head * batch, n_q, d_q) 56 | q = q.view(batch, n_q, n_head, d_q).permute(2, 0, 1, 3).contiguous().view(-1, n_q, d_q) 57 | k = k.view(batch, n_k, n_head, d_k).permute(2, 0, 1, 3).contiguous().view(-1, n_k, d_k) 58 | v = v.view(batch, n_v, n_head, d_v).permute(2, 0, 1, 3).contiguous().view(-1, n_v, d_v) 59 | 60 | if mask is not None: 61 | # repeat(n_head, 1, 1): 将mask沿第0维复制 n_head次,其他维度不变 62 | mask = mask.repeat(n_head, 1, 1) 63 | attn, output = self.attention(q, k, v, mask=mask) # 当成单头注意力求输出 64 | 65 | output = output.view(n_head, batch, n_q, d_v).permute(1, 2, 0, 3).contiguous().view(batch, n_q, -1) # Concat 66 | output = self.fc_concatOutput(output) # 投影变换得到最终输出 67 | return attn, output 68 | 69 | class SelfAttention(nn.Module): 70 | """ Self-Attention """ 71 | def __init__(self, n_head, d_k, d_v, d_x, d_o): 72 | super().__init__() 73 | self.wq = nn.Parameter(torch.Tensor(d_x, d_k)) 74 | self.wk = nn.Parameter(torch.Tensor(d_x, d_k)) 75 | self.wv = nn.Parameter(torch.Tensor(d_x, d_v)) 76 | 77 | self.mha = MultiHeadAttention(n_head=n_head, d_k_=d_k, d_v_=d_v, d_k=d_k, d_v=d_v, d_o=d_o) 78 | self.init_parameters() 79 | 80 | def init_parameters(self): 81 | for param in self.parameters(): 82 | stdv = 1. / np.power(param.size(-1), 0.5) 83 | param.data.uniform_(-stdv, stdv) 84 | 85 | def forward(self, x, mask = None): 86 | q = torch.matmul(x, self.wq) 87 | k = torch.matmul(x, self.wk) 88 | v = torch.matmul(x, self.wv) 89 | 90 | attn, output = self.mha(q, k, v, mask=mask) 91 | return attn, output 92 | 93 | if __name__ == "__main__": 94 | # n_q ?= n_k == n_v 95 | n_q, n_k, n_v = 2, 4, 4 96 | # d_q_ == d_k_ ?= d_v_ 97 | d_q_, d_k_, d_v_ = 128, 128, 64 98 | batch = 32 99 | 100 | q = torch.randn(batch, n_q, d_q_) # batch, 个数 : n_q, 维度 d_q_ 101 | k = torch.randn(batch, n_k, d_k_) 102 | v = torch.randn(batch, n_v, d_v_) 103 | mask = torch.zeros(batch, n_q, n_k).bool() 104 | print(mask) 105 | 106 | attension = ScaledDotProductAttension(scale=np.power(d_k_, 0.5)) # 实例化 107 | attn, output = attension(q, k, v, mask = mask) # 调用函数 108 | 109 | # mha = MultiHeadAttention(n_head=8, d_k_=128, d_v_=64, d_k=256, d_v=128, d_o=128) 110 | # attn, output = mha(q, k, v, mask=mask) 111 | 112 | # n_x, d_x为自注意力的item数量、及对应维度 113 | # 矩阵相乘,分别得到变换后的,q,k,v。再使用多头注意力机制就可以 114 | # n_x = 4 115 | # d_x = 80 116 | # x = torch.randn(batch, n_x, d_x) 117 | # mask = torch.zeros(batch, n_x, n_x).bool() 118 | # selfattn = SelfAttention(n_head=8, d_k=128, d_v=64, d_x=80, d_o=80) 119 | # attn, output = selfattn(x, mask=mask) 120 | 121 | # print(attn); print(output) 122 | # print(attn.size()); print(output.size()) 123 | -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/auto-parallelism.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/auto-parallelism.pptx -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_11.35.50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_11.35.50.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_11.36.20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_11.36.20.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_14.21.53.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_14.21.53.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_15.06.46.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_15.06.46.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_21.22.37.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_21.22.37.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_21.23.07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-01_21.23.07.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-02_22.59.26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-02_22.59.26.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-02_23.01.41.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-02_23.01.41.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_15.04.36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_15.04.36.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_16.29.54.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_16.29.54.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_17.39.56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_17.39.56.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.18.39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.18.39.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.24.00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.24.00.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.27.19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.27.19.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.42.48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.42.48.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.46.39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-01-04_19.46.39.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.10.38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.10.38.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.21.48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.21.48.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.52.54.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-24_23.52.54.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.15.44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.15.44.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.29.39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.29.39.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.35.43.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.35.43.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.38.09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.38.09.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.39.49.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/%E6%88%AA%E5%B1%8F2024-02-25_00.39.49.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 1.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 10.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 11.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 12.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 13.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 14.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 15.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 16.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 17.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 18.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 19.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 2.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 20.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 21.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 3.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 4.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 5.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 6.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 7.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 8.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled 9.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/Untitled.png -------------------------------------------------------------------------------- /paper_read/Auto-parallelism summary/img_auto_parallelism/mlsysdistribute_system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Auto-parallelism summary/img_auto_parallelism/mlsysdistribute_system.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/Composable and Modular Code Generation in MLIR.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/Composable and Modular Code Generation in MLIR.pptx -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/MLIRtoLLVM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/MLIRtoLLVM.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/MLIRtoLLVMandIntrinsics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/MLIRtoLLVMandIntrinsics.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/classical优化.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/classical优化.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/image-213.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/image-213.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/inplace-op.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/inplace-op.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/relevant dialects1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/relevant dialects1.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/relevant dialects2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/relevant dialects2.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/silos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/silos.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/special优化.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/special优化.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step1.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step2.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/step3.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation1.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation2.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation3.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/transformation4.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/v2-5b69d56e33512deeb65eda364c343859_1440w.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/v2-5b69d56e33512deeb65eda364c343859_1440w.webp -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/v2-8ce71a71e5c5e83da438c1d5793f76d9_r.jpg -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/x1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/x1.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-07 21.42.13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-07 21.42.13.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-10 16.26.12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-10 16.26.12.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-13 14.54.32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-13 14.54.32.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-13 22.23.23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-13 22.23.23.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-14 16.52.45.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-14 16.52.45.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-15 11.47.39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-15 11.47.39.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-16 14.16.10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-16 14.16.10.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-21 18.42.21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-21 18.42.21.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-21 18.54.30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2022-12-21 18.54.30.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.47.49.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.47.49.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.56.53.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.56.53.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.57.09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.57.09.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.57.25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.57.25.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.59.03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 17.59.03.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 18.00.04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 18.00.04.png -------------------------------------------------------------------------------- /paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 18.00.43.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Composable and Modular Code Generation in MLIR/img_CodeGenerationInMLIR/截屏2023-01-09 18.00.43.png -------------------------------------------------------------------------------- /paper_read/DISC/DISC.md: -------------------------------------------------------------------------------- 1 | # DISC 2 | 3 | > DISC:A Dynamic Shape Compiler for Machine Learning Workloads 4 | > 5 | > 6 | > 看hlo相关内容 7 | > 8 | 9 | 端到端的动态形状编译器项目BladeDISC 10 | 11 | ## 1、背景 12 | 13 | - 动态shape特性 14 | 15 | 输入tensor shape变化(批次)、运行过程中(某些算子导致)shape变化 16 | 17 | - 当前框架缺乏对动态shape的支持 18 | 19 | 静态shape在编译时完全已知shape信息,方便优化 20 | 21 | 动态shape需要为每个shape编译,从而会增加编译开销、内存使用、优化和部署的复杂性 22 | 23 | 但 某些任务需要根据结果进行动态调度 或者 某任务的输入过大导致编排时间过长 24 | 25 | - 当前对动态shape的优化 26 | 27 | 区分动态shape和静态shape的算子后,仅对静态shape算子进行优化 28 | 29 | 通过padding、slicing的方式对动态shape张量处理至固定已知shape 30 | 31 | ## 2、问题与解决 32 | 33 | - 问题一:现有IR缺少对动态shape计算的完整表示 34 | 35 | DISC基于MLIR,对XLA编译器中的HLO的IR进行复用扩充为DHLO,以支持动态shape表示 36 | 37 | - 问题二:在运行时利用VM解释生成运行时流,带来解释开销且损失主机-设备协同优化机会 38 | 39 | DISC在编译时生成运行时流的代码,将主机端逻辑和设备计算一起编译 40 | 41 | - 问题三:由于动态shape而无法进行内核融合等优化 42 | 43 | DISC在基于算子间的shape传播特性和编译时捕获的shape约束信息,进行内核融合优化 44 | 45 | - 问题四:深度学习编译器的灵活性 46 | 47 | DISC支持TensorFlow和PyTorch,且同时支持动态shape和静态shape混合情况 48 | 49 | ## 3、架构 50 | 51 | 截屏2023-03-02 23.20.46.png 52 | 53 | - **Computation Graph Bridging**:AI框架的计算图Lower到DLHO、收集shape约束信息 54 | - **DHLO(IR Supplementation)**:DLHO支持动态shape的表示 55 | - **Shape Calculation & Placer**:编译同时生成shape计算逻辑、主机-设备的计算逻辑放置 56 | - **Buffer Management & Optimization**:缓冲区管理与优化 57 | - **Host-side Control Flow**:外部库Lower、内核启动管理、设备管理以及与框架交互 58 | - **Fusion Decision**:根据算子调度兼容性分析执行融合决策 59 | - **Codegen**:主机-设备端代码生成 60 | 61 | 整个流程:在未知完整shape的情况的情况下进行编译,编译时同时生成**关于shape计算逻辑**以及kernel计算的代码,期间也会收集**shape约束信息**,并使用约束分析指导编译优化过程。 62 | 63 |
v2-e68e87bbfeab11afd84bb40a17e1a179_r.jpg
64 | 65 | > BladeDISC架构图,出自:https://zhuanlan.zhihu.com/p/552484413 66 | > 67 | 68 | ## 4、DHLO 69 | 70 | `Dynamic HLO`: 在XLA的HLO IR基础上,扩展了一套具有完备动态shape表达能力的IR。 71 | 72 | 静态场景下,HLO IR中的shape表达会被静态化,所有的shape计算会被固化为编译时常量保留在编译结果中;而在动态shape场景下,IR本身需要有足够的能力表达shape计算和动态shape信息的传递。 73 | 74 | 将原有中间表示HLO中的start_indices、limit_indices、strides等属性值使用张量的形式进行保存,使得可以在运行时将计算得到的shape值传入。 75 | 76 |
截屏2023-03-02 23.26.10.png
77 | 78 | ## HLO相关补充 79 | 80 | ### **层次划分** 81 | 82 | HLO IR可以分成三个层次,HloModule, HloComputation和HloInstruction。 83 | 84 |
Untitled
85 | 86 | - HloModule 87 | 88 | 一个编译单元,相当于一个完整的**可执行程序**,所以有入口函数,也就是 entry_computation, 有且仅有一个。输入可以是多个参数,但输出只有一个(root instruction的值),**如果要返回多个值,需要把多个值构造成一个元组(tuple)返回**。一个module可以包含多个computation,除了entry_computation,其他的都是"nested",也就是被调用。 89 | 90 | ENTRY ----------------> 程序入口,ROOT--------------> 程序输出 91 | 92 | 一个HloModule可以包含很多个HloComputation 93 | 94 | - HloComputation 95 | 96 | 是HLO IR中间层的表示,相当于程序中的一个**函数**。一个HloModule只能有一个entry_conputation,其他的computation是被entry_computation调用的。我们可以把entry_computation类比作main函数。每个HloComputation可以包含多个HloInstruction,但只能有一个root_instruction,root_instruction的output就是该computation的output。 97 | 98 | - HloInstruction 99 | 100 | HLO IR最底层的表示,相当于程序中的一条指令,一个HloInstruction可包含多个算子。computation的input用parameter表示。HloInstruction也可以调用HloComputation。一个HloInstruction只有一个输出,如果需要多个output,就打包成一个Tuple。 101 | 102 | data dependency:如果一个instruction的output是另一个instruction的input,我们就说两个instruction之间存在data dependency。HLO中使用operand和uses两个fields来表示data dependency。 103 | 104 | ```cpp 105 |  class HloInstruction { 106 |   ... 107 |    InstructionVector operands_; 108 |    std::vector users_; 109 |    absl::flat_hash_map user_map_; 110 |   ... 111 |  }; 112 | ``` 113 | 114 |
Untitled
115 | 116 | control dependency:有些instruction之间并没有数据依赖,但是我们仍然可能对这些instruction的执行顺序有额外的要求。在HloInstruction中有control_successors_和control_predecessors_两个fields来表示control dependency。 117 | 118 | ### **多种HLO** 119 | 120 | [https://github.com/tensorflow/mlir-hlo](https://github.com/tensorflow/mlir-hlo) 121 | 122 | - `DHLO`:Dynamic HLO 123 | 124 | 在XLA的HLO IR基础上,扩展了一套具有完备动态shape表达能力的IR。静态场景下,HLO IR中的shape表达会被静态化,所有的shape计算会被固化为编译时常量保留在编译结果中;而在动态shape场景下,IR本身需要有足够的能力表达shape计算和动态shape信息的传递。 125 | 126 | - `CHLO`:"Client" HLO 127 | 128 | 最初设计用于映射XLA的client APi,更接近前端。其中的ops可以来自XlaBuilder或者XLA helper functions,支持隐式广播、支持动态型型状。设计初衷是为了贴近client级别,方便渐进细化下降。 129 | 130 | > 隐式广播指的是那些没有具体指定发送给哪个应用程序的广播 131 | > 132 | 133 | | CHLO | | 134 | | --- | --- | 135 | | 入口 | 通过XlaBuilder API,TF2XLA kernels、JAX、PyTorch bridge直接使用这些API; 136 | legalization from TensorFlow ops in the TF Graph Compiler | 137 | | 出口 | MHLO; 138 | 调用XlaBuilder API导出到 xla::HloInstructionProto | 139 | - `MHLO`: "Meta"-HLO 140 | 141 | [https://tensorflow.google.cn/mlir/hlo_ops](https://tensorflow.google.cn/mlir/hlo_ops) 142 | 143 | 和xla_hlo相近,没有隐式广播,但支持动态形状 144 | 145 | 隐式捕获控制流ops,便于优化;输出结果多个,不用组合成一个元组;拥有不能加在client dialect或server dialect的ops;verification发生在boundary;更彻底地支持动态形状,无需更新所有users/backends 146 | 147 | 大量得映射在 linalg named ops 148 | 149 | | MHLO | | 150 | | --- | --- | 151 | | 入口 | Legalization from CHLO dialect or conversion from XLA HLO; 152 | 直接从TF Graph Compiler获得 | 153 | | 出口 | LMHLO; 154 | Linalg IREE | 155 | - `LMHLO`: "Late"-"Meta"-HLO 156 | 157 | 与 MHLO 相同,但在 buffers (e.g., memref) 而不是 tensors 上,在缓冲区分配后作用 158 | 159 | **LMHLO是为了帮助迁移 XLA 后端而临时引入的**,在这个级别上建模的大部分内容已经可以表示出来了`linalg`。在 LHLO 上执行转换的通道应该理想地遵守 Linalg 上存在的严格接口并且足够通用。 160 | 161 | > tensor values (immutable) 和in-memory buffers (side-effecting) 162 | > 163 | 164 | | LMHLO | | 165 | | --- | --- | 166 | | 入口 | 缓冲区分配后从XLA转入 | 167 | | 出口 | Codegen(LLVM IR) | 168 | 169 | > HLO to LHLO conversion and fusion #41424 170 | > 171 | 172 | Untitled -------------------------------------------------------------------------------- /paper_read/DISC/img_DISC/%E6%88%AA%E5%B1%8F2023-03-02_23.20.46.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/%E6%88%AA%E5%B1%8F2023-03-02_23.20.46.png -------------------------------------------------------------------------------- /paper_read/DISC/img_DISC/%E6%88%AA%E5%B1%8F2023-03-02_23.26.10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/%E6%88%AA%E5%B1%8F2023-03-02_23.26.10.png -------------------------------------------------------------------------------- /paper_read/DISC/img_DISC/Untitled 1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/Untitled 1.jpeg -------------------------------------------------------------------------------- /paper_read/DISC/img_DISC/Untitled.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/Untitled.jpeg -------------------------------------------------------------------------------- /paper_read/DISC/img_DISC/v2-e68e87bbfeab11afd84bb40a17e1a179_r.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/DISC/img_DISC/v2-e68e87bbfeab11afd84bb40a17e1a179_r.jpg -------------------------------------------------------------------------------- /paper_read/Graphene/ASPLOS’23 - Graphene An IR for Optimized Tensor Computations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/ASPLOS’23 - Graphene An IR for Optimized Tensor Computations.pdf -------------------------------------------------------------------------------- /paper_read/Graphene/ASPLOS’23 - Graphene.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/ASPLOS’23 - Graphene.pptx -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/ASPLOS_23_-_Graphene_An_IR_for_Optimized_Tensor_Computations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/ASPLOS_23_-_Graphene_An_IR_for_Optimized_Tensor_Computations.pdf -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 1.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 10.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 11.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 12.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 13.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 14.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 15.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 16.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 17.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 18.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 19.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 2.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 20.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 21.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 3.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 4.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 5.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 6.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 7.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 8.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled 9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled 9.png -------------------------------------------------------------------------------- /paper_read/Graphene/img_Graphene/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Graphene/img_Graphene/Untitled.png -------------------------------------------------------------------------------- /paper_read/Ray/Ray.md: -------------------------------------------------------------------------------- 1 | # Ray 2 | 3 | > Ray: A Distributed Framework for Emerging AI Applications 4 | > 5 | 6 | ## 1、背景介绍 7 | 8 | 分布式开发框架,核心要把调度做好(跑得快、资源利用率高) 9 | 10 | 一般可以分为两个方面:任务的编排和优化 + 任务的调度执行 11 | 12 | - **任务的编排优化** 13 | - 静态:任务执行前,通过编译和优化技术进行全局的编排,可以给予rule(静态图)或代价模型 14 | - 动态:任务边执行边编排(动态图) 15 | 16 | 静态利于优化,但 某些任务需要根据结果进行动态调度 或者 某任务的输入过大导致编排时间过长 17 | 18 | - **任务的调度执行**:中心化调度(集中调度)、去中心化调度、变种(中心化和去中心化调度结合) 19 | - 中心化调度(例如Spark、DP接口):编排好的任务由中心节点决定在哪个节点上执行,中心节点的调度可以依据 计算资源、局部性 20 | - 去中心化调度(例如MPI、DDP接口):没有中心节点统一调度,各个节点单独进行任务调度执行,互相之间通过通信进行交 21 | 22 | 中心化调度能最大化利用系统资源,并提供一定的动态性和容错性,即如果发现有节点down了,可以进行任务重分配,但是缺点是调度开销大 23 | 24 | ## 2、ray 25 | 26 | Spark:静态的编排和优化+中心化的调度,利于执行优化和资源利用。 27 | Ray:动态编排(表面上是分布式RPC框架,但是多个RPC调用其实是形成了一张动态图)+树状的递归调度(去中心化和中心化结合的调度),利于灵活的编程以及低时延的调度。 28 | 29 |
Untitled
30 | 31 | GCS 作为集中的服务端,是 Worker 之间传递消息的纽带。每个 Server 都有一个共用的 Object Store,也就是用 Apache Arrow/Plasma 构建的内存数据。 Local Scheduler 是 Server 内部的调度,同时通过 GCS 来和其他 Server 上的 Worker 通信。Object Store 时间也有通信,作用是传递 Worker 之间的数据。 32 | 33 | 一个典型的远程调用流程: 34 | 35 |
Untitled
36 | 37 | 可以看到,GCS 储存了代码、输入参数、返回值。Worker 通过 Local Scheduler 来和 GCS 通信。Local Scheduler 就是 Raylet, 是单机上的基础调度服务 38 | 39 | > 更多见:[https://zhuanlan.zhihu.com/p/111340572](https://zhuanlan.zhihu.com/p/111340572) -------------------------------------------------------------------------------- /paper_read/Ray/img_Ray/Untitled.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Ray/img_Ray/Untitled.jpeg -------------------------------------------------------------------------------- /paper_read/Ray/img_Ray/Untitled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Ray/img_Ray/Untitled.png -------------------------------------------------------------------------------- /paper_read/Slapo/ASPLOS24_Slapo.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/Slapo/ASPLOS24_Slapo.pptx -------------------------------------------------------------------------------- /paper_read/TensorIR/ASPLOS’23 - TensorIR.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/paper_read/TensorIR/ASPLOS’23 - TensorIR.pptx -------------------------------------------------------------------------------- /paper_read/TensorIR/TensorIR.md: -------------------------------------------------------------------------------- 1 | ## TensorIR 2 | 3 | 第六页:相关工作 4 | 5 | Halide 和 TVM 使用一种调度语言,可以用标量体描述循环嵌套的循环优化原语。 Tensor Comprehensions 、MLIR/Affine 使用多面体模型 来分析循环嵌套依赖性。以自下而上的方式使用标量计算优化循环嵌套 6 | 7 | Fireiron和 Stripe使用嵌套多面体结构以自上而下的方式对张量程序建模。 8 | 9 | TensorIR 结合了两种方法的见解,并将表示概括为张量化程序。 10 | 11 | > 自动化是机器学习编译和张量程序优化中的一个重要主题。 AutoTVM [10] 引入了一种基于学习的方法,通过学习成本模型和模板引导搜索来优化张量程序。 Triton [41] 引入了一种基于图块的模板表示,用于有效的程序优化。 FlexTensor [50] 自动生成模板。 Halide 使用 Monte-Carlo 树搜索 [2] 构建了一个自动调度程序。 Ansor [48] 使用分层搜索空间改进了自动调度。 我们的自动调度算法从这些方法中吸取教训,并将它们推广到最适合特定领域硬件加速的张量化计算。 自动矢量化 [25, 36] 是编译器研究中的一个长期课题。 张量化可以被视为矢量化问题的推广,以启用现代加速器中的固有张量 [4、21、30、31]。 关于这个主题有一些现有的作品[6,45,47,49]。 AKG [47] 使用多面体方法探索张量化搜索空间,UNIT [45] 引入了一个通用的张量化流程,而 AMOS [49] 可以通过张量表达式自动映射到张量化内在。 我们的方法通过提出一种新的张量化计算抽象并联合执行张量化和其他优化来概括这些先前的方法。 TensorIR 是进一步开发张量感知自动调度方法的基础。 12 | 13 | 14 | 15 | 第七页 16 | 17 | 一个block可以包含一个或者多个nested loop nests,其中包含与计算内容相对应的子块 --> 18 | 19 | 允许我们将计算划分到相应的子块,并利用块签名来表示依赖(约束)关系 20 | 21 | 22 | 23 | 第11页 24 | 25 | TensorIR以TVM IRModule为载体,从TVMScript中导入,并且可以不断进行Schedule变换,以及Pass变换,最终将优化后的IRModule build成为一个能够在不同硬件上运行的Module。 26 | 27 | 在Schedule变换过程中,对于给定的输入程序,我们需要生成具有等效语义的程序的丰富搜索空间。 我们引入原语将 TensorIR 程序转换为等效的优化程序。 28 | 29 | 30 | 31 | 第15页 32 | 33 | 对于每个输入的tensor指令,本文使用一种TensorIntrin结构,由两个块组成,一个块描述计算语义,另一个块提供张量化计算的底层实现。 34 | 35 | 通过TensorIntrin中的多维缓冲区规范包括数据类型、存储范围、内存布局和邻接约束。这些约束在验证阶段被使用。 36 | 37 | 38 | 39 | 第16页 40 | 41 | (1)给定一对后端目标和一个输入程序,我们首先将程序主体与可能的TensorIntrin进行匹配,生成张量化候选对象。匹配以循序渐进的方式进行。 42 | 43 | (2)系统将缓冲区访问表达式转换为中间迭代器。基于缓冲区访问模式,我们为每个迭代器计算特征函数,并在共享相同特征向量的迭代器之间建立映射。该映射进一步指导块实例空间和重建索引缓冲区的转换。 44 | 45 | 46 | 47 | 第17页 48 | 49 | 对于一组给定的张量候选者,我们需要构建一个包含tensorization的大型程序搜索空间,本文通过生成包含张量化计算的程序草图来构建搜索空间。 50 | 51 | 通过迭代应用预定义的草图生成规则来生成草图,需要检查块签名来构建适用于张量计算的草图生成规则,并在我们的分析过程中利用访问区域信息。 52 | 53 | 数据移动决策通常取决于计算调度决策,如平铺、线程绑定、执行范围和生产者-消费者数据流粒度。 54 | 55 | 56 | 57 | 第18页 58 | 59 | 在张量化程序草图生成阶段之后,我们使用进化搜索来探索空间并找到优化的张量程序。 60 | 61 | 初始化是随机选择一个程序草图,进化搜索由一个learning-base cost model指导 62 | 63 | cost model使用从程序中提取的特征。 特征向量包含与内存访问模式、重用和循环注释相关的信息。(memory access patterns, reuse, and loop annotations) 64 | 65 | 66 | 67 | 第19页 68 | 69 | 以探索内存分配策略为例: 70 | Astra采取测量驱动的方法来选择相互冲突的内存分配策略,在探索每个分配之后,我们为每个分配构建最佳配置,然后比较它们的端到端时间。 71 | Parallel:每个子节点都能被独立地探索并调优 72 | Prefix-based:每次只探索一维,其他参数不变。(第一个epoch搜索,找到ideal stream mapping;freeze the configuration;下一个epoch继续探索) 73 | Exhaustive:详细地探索子树(指数级别复杂度) 74 | Equivalence:如果一组kernel是同样的shape,在DFG中有着相似的inbound、outbound和depencies,那么它们可以同等看待 75 | (10个GEMM kernel分给2个stream,有5中而不是2^10次种) 76 | -------------------------------------------------------------------------------- /tools/conda.md: -------------------------------------------------------------------------------- 1 | # conda 2 | 3 | 4 | 5 | ## 换源 6 | 7 | vim ~/.condarc 8 | 9 | ```bash 10 | show_channel_urls: true 11 | channels: 12 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ 13 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ 14 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 15 | - defaults 16 | auto_activate_base: false 17 | ``` 18 | 19 | ## 使用 20 | 21 | ```bash 22 |  conda create -n B --clone A       #克隆环境A来创建名为B的环境 23 |  conda create -n B python=3.10 24 |  conda activate xxxx               #开启xxxx环境 25 |  conda deactivate                  #关闭环境 26 |  conda info -e                    #显示所有的虚拟环境 27 |  conda remove -n xxxx --all       #删除已创建的xxxx虚拟环境 28 |   29 |  conda update --all 30 |   31 |  conda clean -p      #删除没有用的包 32 |  conda clean -t      #tar打包 33 |  conda clean -a 34 |   35 |  conda config --show   #查看全部配置 36 | ``` -------------------------------------------------------------------------------- /tools/git.md: -------------------------------------------------------------------------------- 1 | # git 2 | 3 | 4 | 5 | ## 显示当前分支 6 | 7 | ``` 8 | vim .bashrc 9 | ``` 10 | 11 | 将下面的代码加入到文件的最后处 12 | 13 | ``` bash 14 | function git_branch { 15 | branch="`git branch 2>/dev/null | grep "^\*" | sed -e "s/^\*\ //"`" 16 | if [ "${branch}" != "" ];then 17 | if [ "${branch}" = "(no branch)" ];then 18 | branch="(`git rev-parse --short HEAD`...)" 19 | fi 20 | echo " ($branch)" 21 | fi 22 | } 23 | 24 | export PS1='\u@\h \[\033[01;36m\]\W\[\033[01;32m\]$(git_branch)\[\033[00m\] \$ ' 25 | ``` 26 | 27 | 保存退出,执行加载命令 28 | 29 | ``` 30 | source ~/.bashrc 31 | ``` 32 | 33 | ## 常用操作 34 | 35 | ```bash 36 | git clone xxx 37 | git submodule update --init --recursive 38 | 39 | # 查看远程分支 40 | git branch -a 41 | 42 | # 查看本地分支 43 | git branch 44 | 45 | # 切换分支 tx-84 46 | git checkout tx-84 47 | # 检查子模块是否版本对齐 48 | git status 49 | git submodule update --init 50 | 51 | 52 | # 获取master最新代码 53 | git checkout master 54 | git pull # 将远程主机的最新内容拉到本地,用户在检查了以后决定是否合并到工作本机分支中,这样master的最新代码就在origin/master 55 | git fetch # git pull = git fetch + git merge 56 | 57 | # 基于master创建分支 58 | git checkout -b myfeature 59 | 60 | # 合并commit 61 | # 分支开发完成后,很可能有一堆commit,但是合并到主干的时候只希望有少量commit 62 | git reset HEAD~5 63 | git add xxx 64 | git commit -m "Here's the bug fix that closes #28" 65 | 66 | # 推到远程仓库 67 | git push origin myfeature -f 68 | 69 | # rebase代码 70 | git checkout myfeature 71 | git rebase origin/master 72 | git push origin myfeature -f 73 | 74 | # 查看commit更改 75 | git show 76 | git show --name-only 77 | git show 78 | git log --oneline # 查看最近的提交 79 | 80 | # 例如,把 e57b0e6 合并到 17cb931,不保留注释 81 | pick 17cb931 fix && add batch del 82 | f e57b0e6 fix && add batch del 83 | 84 | # 指定需要合并版本号,处理从该版本后往后的commit,不包含该版本,会进入vi编辑器 85 | git rebase -i 版本号 86 | git commit -n --amend # 或者使用这个命令将其合并进上一个commit 87 | 88 | # 使用别人的patch测试,记得先fetch 89 | git cherry-pick commit_id 90 | ``` 91 | 92 | 给开源仓库提 `pr` 的流程 93 | 94 | 首先在 github 上 fork 你的目标仓库,这样你的主页就出现了一个 fork 的仓库。以 [triton-linalg](https://github.com/Cambricon/triton-linalg) 仓库为例。 95 | 96 | 然后进入本地现有仓库所在目录,通过命令行执行以下命令,将 fork 仓库的地址添加为现有仓库的一个远程地址。 97 | 98 | ```bash 99 | git remote add forked git@github.com:tfruan2000/triton-linalg.git 100 | ``` 101 | 102 | 接着,执行以下命令,将 fork 远程地址与原仓库的远程地址关联起来。 103 | 104 | ```bash 105 | git remote add upstream git@github.com:tfruan2000/triton-linalg.git 106 | ``` 107 | 108 | 至此,我们就成功将 fork 仓库添加到了现有仓库中。使用 `git remote -v ` 查看一下 109 | 110 | ```bash 111 | (triton_env) ➜ triton-linalg git:(correct_some_info) ✗ git remote -v 112 | forked git@github.com:tfruan2000/triton-linalg.git (fetch) 113 | forked git@github.com:tfruan2000/triton-linalg.git (push) 114 | origin https://github.com/Cambricon/triton-linalg.git (fetch) 115 | origin https://github.com/Cambricon/triton-linalg.git (push) 116 | upstream git@github.com:tfruan2000/triton-linalg.git (fetch) 117 | upstream git@github.com:tfruan2000/triton-linalg.git (push) 118 | ``` 119 | 120 | 然后就可以把推到远端了,例如要推 `correct_some_info` 分支 121 | 122 | ```bash 123 | git push forked correct_some_info:correct_some_info 124 | ``` -------------------------------------------------------------------------------- /tools/linux.md: -------------------------------------------------------------------------------- 1 | # linux 终端基础操作 2 | 3 | - mkdir 4 | - mkdir 文件名 : 在当前目录下新建 “文件名”的文件夹 5 | 6 | - cd 7 | - cd 文件名 : 进入当前目录下 “文件名”的文件夹, 8 | 9 | - pwd 10 | - pwd : 显示当前位置 11 | 12 | - ls / ll 13 | - ls : 显示当前目录下的文件 14 | - ll : 显示当前目录下的文件,包括隐藏文件 15 | 16 | - touch 17 | - touch 文件名 : 在当前目录下新建 “文件名”的文件 18 | 19 | - cp 20 | - cp 源文件 目标文件(夹) : 复制文件或文件夹 21 | - cp -r 源文件 目标文件(夹) : 复制文件夹 22 | 23 | - mv 24 | - mv 源文件 目标文件(夹) : 移动文件或文件夹 25 | - mv 源文件 目标文件(夹) : 重命名文件或文件夹 26 | 27 | - rm 28 | - rm 文件名 : 删除文件 29 | - rm -rf 文件名 : 删除文件夹 30 | 31 | - tar 32 | - tar -zxvf 文件名.tar.gz : 解压tar.gz 33 | - tar -zcvf 文件名.tar.gz 文件名 : 压缩文件 34 | 35 | - unzip 36 | - unzip 文件名.zip : 解压zip 37 | 38 | - du 39 | - du -ah --max-depth=1 : 显示当前目录下各个文件占据内存 40 | 41 | - df 42 | - df -h : 显示磁盘使用情况 43 | 44 | - cat 45 | - cat 文件名 : 查看文件内容 46 | 47 | - vim / vi 48 | - vim 文件名 : 编辑文件 49 | 50 | - tree 51 | - tree : 显示目录树 52 | 53 | - grep 54 | - grep -rni "关键词" 文件名 : 在文件中搜索关键词 55 | 56 | - find 57 | - find . -name "文件名" : 在当前目录下查找文件 58 | - find . -name "文件名" -exec rm -rf {} \; : 删除查找到的文件 59 | 60 | - ps 61 | - ps -ef | grep "进程名" : 查看进程 62 | - kill -9 进程号 : 杀死进程 63 | 64 | - jobs 65 | - jobs : 查看后台运行的任务 66 | - fg %n : 将后台任务调到前台运行 67 | - bg %n : 将前台任务调到后台运行 68 | 69 | - scp 70 | - scp -r 文件名 用户名@IP地址:目标路径 : 上传文件 71 | - scp -r 用户名@IP地址:文件名 目标路径 : 下载文件 72 | 73 | - ssh 74 | - ssh 用户名@IP地址 : 远程登录 75 | 76 | - wget / curl 77 | - wget / curl 下载链接 : 下载文件 78 | 79 | - apt 80 | - apt update : 更新软件源 81 | - apt upgrade : 更新软件 82 | - apt install 软件名 : 安装软件 83 | - apt remove 软件名 : 卸载软件 84 | 85 | - sh 86 | - sh 文件名 : 运行脚本 87 | 88 | - chmod 89 | - chmod 777 文件名 : 修改文件权限 90 | 91 | - chown 92 | - chown 用户名 文件名 : 修改文件所有者 93 | 94 | - ln 95 | - ln -s 源文件 目标文件 : 创建软链接 96 | -------------------------------------------------------------------------------- /tools/macos.md: -------------------------------------------------------------------------------- 1 | # mac 配置 2 | 3 | ## homebrew 4 | 5 | 参考:https://mirrors.tuna.tsinghua.edu.cn/help/homebrew/ 6 | 7 | 安装需求: 8 | 9 | - 对于 macOS 用户,系统自带 bash、git 和 curl,在命令行输入 `xcode-select --install` 安装 CLT for Xcode 即可。 10 | 11 | - 对于 Linux 用户,系统自带 bash,仅需额外安装 git 和 curl。 12 | 13 | 安装 Homebrew / Linuxbrew: 14 | 15 | ```bash 16 | # 从本镜像下载安装脚本并安装 Homebrew / Linuxbrew 17 | git clone --depth=1 https://mirrors.tuna.tsinghua.edu.cn/git/homebrew/install.git brew-install 18 | /bin/bash brew-install/install.sh 19 | rm -rf brew-install # 删掉多余的安装包 20 | 21 | # 也可从 GitHub 获取官方安装脚本安装 Homebrew / Linuxbrew 22 | /bin/bash -c "$(curl -fsSL https://github.com/Homebrew/install/raw/master/install.sh)" 23 | ``` 24 | 25 | 加入环境变量 26 | 27 | ```bash 28 | #以下针对基于 Apple Silicon CPU 设备上的 macOS 系统(命令行运行 uname -m 应输出 arm64)上的 Homebrew: 29 | test -r ~/.bash_profile && echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> ~/.bash_profile 30 | test -r ~/.zprofile && echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> ~/.zprofile 31 | 32 | #对基于 Intel CPU 设备上的 macOS 系统(命令行运行 uname -m 应输出 x86_64)的用户可跳过本步。 33 | 34 | #以下针对 Linux 系统上的 Linuxbrew: 35 | test -d ~/.linuxbrew && eval "$(~/.linuxbrew/bin/brew shellenv)" 36 | test -d /home/linuxbrew/.linuxbrew && eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)" 37 | test -r ~/.bash_profile && echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.bash_profile 38 | test -r ~/.profile && echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.profile 39 | test -r ~/.zprofile && echo "eval \"\$($(brew --prefix)/bin/brew shellenv)\"" >> ~/.zprofile 40 | ``` 41 | 42 | 换源: 43 | 44 | ```bash 45 | export HOMEBREW_INSTALL_FROM_API=1 46 | export HOMEBREW_API_DOMAIN="https://mirrors.tuna.tsinghua.edu.cn/homebrew-bottles/api" 47 | export HOMEBREW_BOTTLE_DOMAIN="https://mirrors.tuna.tsinghua.edu.cn/homebrew-bottles" 48 | export HOMEBREW_BREW_GIT_REMOTE="https://mirrors.tuna.tsinghua.edu.cn/git/homebrew/brew.git" 49 | export HOMEBREW_CORE_GIT_REMOTE="https://mirrors.tuna.tsinghua.edu.cn/git/homebrew/homebrew-core.git" 50 | brew update 51 | ``` 52 | 53 | 配置好后再使用 `brew install` 安装软件 54 | 55 | ```bash 56 | brew cask install google-chrome 57 | brew install miniconda 58 | ``` 59 | 60 | ## on my zsh 61 | 62 | 安装zsh `brew install zsh zsh-completions` 63 | 64 | 切换到zsh `[sudo] chsh -s $(which zsh)` 65 | 66 | 安装oh-my-zsh 67 | 68 | ```bash 69 | git clone git://github.com/robbyrussell/oh-my-zsh.git ~/.oh-my-zsh 70 | ``` 71 | 72 | 修改主题,在 `~/.zshrc` 里的 设置`ZSH_THEME="ys"` 73 | 74 | 安装插件 75 | 常用autojump、zsh-autosuggestions、zsh-syntax-highlighting三个插件 76 | ```bash 77 | cd ~/.oh-my-zsh/plugins 78 | brew install autojump 79 | git clone git clone https://github.com/zsh-users/zsh-syntax-highlighting.git 80 | git clone https://github.com/zsh-users/zsh-autosuggestions.git 81 | ``` 82 | 然后在 `~/.zshrc` 找到 `plugins=` 添加下面的,最后保存执行 `source ~/.zshrc` 83 | 84 | ```bash 85 | plugins=( 86 | autojump 87 | git zsh-autosuggestions 88 | git zsh-syntax-highlighting 89 | ) 90 | ``` 91 | 92 | ## fzf 93 | 94 | 用来增强搜索 `ctrl + r` / `command + r` 95 | 96 | ```bash 97 | git clone --depth 1 https://github.com/junegunn/fzf.git ~/.fzf 98 | ~/.fzf/install 99 | ``` 100 | -------------------------------------------------------------------------------- /tools/tmux.md: -------------------------------------------------------------------------------- 1 | # tmux 2 | 3 | 4 | 5 | ## 配置 6 | 7 | vim ~/.tmux.config 8 | tmux source ~/.tmux.config 9 | 10 | ```bash 11 | ## ====================== 将以下内容输入 12 | 13 | #set -g prefix C-z # 修改 默认的ctrl-b 组合键为 ctrl-z 14 | 15 | bind | split-window -h # ctrl-b + | 左右分屏 16 | bind - split-window -v # ctrl-b + - 上下分屏 17 | 18 | # 开启鼠标切换tmux窗口 19 | setw -g mouse-resize-pane on 20 | setw -g mouse-select-pane on 21 | setw -g mouse-select-window on 22 | setw -g mode-mouse on 23 | 24 | set -g base-index 1 # 窗口编号从 1 开始计数 25 | set -g pane-base-index 1 # 窗格编号从 1 开始计数 26 | set -g renumber-windows on # 关掉某个窗口后,编号重排 27 | setw -g allow-rename off # 禁止活动进程修改窗口名 28 | setw -g automatic-rename off # 禁止自动命名新窗口 29 | 30 | set -g status-right '#{prefix_highlight} #H | %a %Y-%m-%d %H:%M' 31 | set -g @prefix_highlight_show_copy_mode 'on' 32 | set -g @prefix_highlight_copy_mode_attr 'fg=white,bg=blue' 33 | ## ====================== :wq! 保存退出 34 | ``` 35 | 36 | ## 操作 37 | 38 | ```bash 39 | tmux # 开启一个窗口 40 | exit # 销毁/关闭该窗口 41 | tmux detach # 将当前会话与窗口分离,跑长时间记得使用(快捷键 按下ctrl-b松手 再按 d) 42 | tmux attach -t # 例如 tmux aatch -t 0 43 | 44 | tmux ls # 查看当前所有的 Tmux 会话(快捷键ctrl-b + s) 45 | tmux kill-session -t 0 # 命令用于杀死某个会话,数字0是编号 46 | 47 | tmux split-window # 划分上下两个窗格 ctrl-b + - 48 | tmux split-window -h # 划分左右两个窗格 ctrl-b + | 49 | 50 | # 快捷键Ctrl+b :光标切换到其他窗格。ctrl-b + 上下左右 51 | tmux select-pane -U # 光标切换到上方窗格 52 | tmux select-pane -D # 光标切换到下方窗格 53 | tmux select-pane -L # 光标切换到左边窗格 54 | tmux select-pane -R # 光标切换到右边窗格 55 | ``` 56 | 57 | 一般某个连接服务器后某个进程需要长时间的话,就先 `tmux` 开启后,跑任务,再 `tmux detach` 58 | 59 | 更多见:https://www.ruanyifeng.com/blog/2019/10/tmux.html 60 | 61 | -------------------------------------------------------------------------------- /tools/vim.md: -------------------------------------------------------------------------------- 1 | # Vim 2 | 3 | 4 | 5 | ## 配置 6 | 7 | vim ~/.vimrc 8 | 9 | source ~/.vimrc 10 | 11 | ```bash 12 | 13 | set wildmenu"按TAB键时命令行自动补齐" 14 | set ignorecase"忽略大小写" 15 | set number "显示行号" 16 | set ruler"显示当前光标位置" 17 | set autoread"文件在Vim之外修改过,自动重新读入" 18 | set autowrite"设置自动保存内容" 19 | set autochdir"当前目录随着被编辑文件的改变而改变" 20 | set cindent "c/c++自动缩进" 21 | set smartindent 22 | set autoindent"参考上一行的缩进方式进行自动缩进" 23 | set softtabstop=4 "4 character as a tab" 24 | set shiftwidth=4 25 | set smarttab 26 | set hlsearch "开启搜索结果的高亮显示" 27 | set incsearch "边输入边搜索(实时搜索)" 28 | 29 | ``` 30 | 31 | ## 操作 32 | 33 | - 模式切换 34 | - 正常为命令模式,按 `h` `j` `k` `l` 分别为左下上右 35 | - 按i进入编辑模式,按esc退出编辑模式 36 | - 按v进入可视模式,此时是一个个选择,按V进入行选择 37 | 38 | - 复制 39 | - 在命令模式下,将光标移动到将要复制的行处,按 `yy` 进行复制; 40 | - 按 `nyy` 复制n行;其中n为1、2、3…… 41 | 42 | - 粘贴 43 | - 按 `p` 进行粘贴 44 | 45 | - 删除 46 | - 按 `d` 后按数字,其中数字表示删除的行数 47 | 48 | - 撤回 49 | - 撤回上一步操作:按 `u` 50 | - 撤回多步操作:按 `U` 51 | 52 | - 查找 53 | - 按 `/` 进入查找模式,输入关键词,按 `n` 查找下一个,按 `N` 查找上一个 54 | 55 | - 替换 56 | - 按 `:%s/old/new/g` 进行替换 57 | - `g` 表示全局替换 58 | 59 | - 在vim中比较两个文件的不同 60 | - `vimdiff a.file b.file` 或者在一个文件中 `:vs b.file` 61 | - 使用 `crtl + w` + `w` 进行切换左右侧 62 | 63 | ## nvim 64 | 65 | nvim比vim感觉更好看点,而且好用些 66 | 67 | 安装neovim 68 | 69 | - Mac 70 | 71 | ```bash 72 | brew install neovim 73 | ``` 74 | 75 | - linux 76 | 77 | ```bash 78 | curl -LO https://github.com/neovim/neovim/releases/latest/download/nvim.appimage 79 | chmod u+x nvim.appimage 80 | ./nvim.appimage 81 | 82 | mv ./nvim.appimage ~/.local/bin/nvim 83 | ``` 84 | 85 | 再创建配置文件,并和 `~/.vimrc` 一样配置 86 | 87 | ```bash 88 | mkdir ~/.config/nvim 89 | touch ~/.config/nvim/init.vim 90 | 91 | cp ~/.vimrc ~/.config/nvim/init.vim 92 | ``` 93 | 94 | 然后修改 `~/.zshrc` 或 `./bashrc` 95 | 96 | ```bash 97 | alias vim="nvim" 98 | alias vi="nvim" 99 | ``` 100 | -------------------------------------------------------------------------------- /tools/vim_file.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tfruan2000/mlsys-study-note/19f39c66c737950818d58785fddb98399ee7d3c0/tools/vim_file.zip --------------------------------------------------------------------------------