├── .gitignore
├── .pre-commit-config.yaml
├── .style.yapf
├── Fluid
    ├── developer's_guide_for_Fluid
    │   ├── Developer's_Guide_to_Paddle_Fluid.md
    │   ├── Developer's_Guide_to_Paddle_Fluid.pdf
    │   └── images
    │   │   ├── 1.png
    │   │   ├── 2.png
    │   │   ├── 3.png
    │   │   ├── 4.png
    │   │   ├── LoDTensor.png
    │   │   ├── compile_run_time.png
    │   │   ├── executor.png
    │   │   ├── fluid-compiler.png
    │   │   ├── fluid_examples.png
    │   │   ├── fluid_module_1.png
    │   │   ├── fluid_module_2.png
    │   │   ├── graph_construction_example_all.png
    │   │   ├── layer.png
    │   │   ├── operator1.png
    │   │   ├── operator2.png
    │   │   ├── place.png
    │   │   ├── print_fluid_program.png
    │   │   ├── program_desc1.png
    │   │   ├── program_desc2.png
    │   │   ├── raw_input.png
    │   │   ├── scope_variable_tensor.png
    │   │   ├── sorted_input.png
    │   │   ├── transpiler.png
    │   │   └── user_interface.png
    └── nmt_on_fluid
    │   ├── NMT on fluid.md
    │   └── images
    │       ├── 1.png
    │       ├── 2.png
    │       ├── 3.png
    │       ├── 4.png
    │       ├── attention.png
    │       ├── raw_input.png
    │       ├── sorted_input.png
    │       └── user_interface.png
├── README.md
├── TeXNotes
    ├── .gitignore
    ├── 00_templates
    │   ├── assignment
    │   │   ├── contents.tex
    │   │   ├── figures
    │   │   │   └── figure1.png
    │   │   ├── main.tex
    │   │   ├── references.bib
    │   │   └── structure.tex
    │   └── slides
    │   │   ├── slides.tex
    │   │   └── structure.tex
    ├── Efficient_attention_and_RNNs
    │   ├── contents
    │   │   ├── linear_rnn.tex
    │   │   ├── loop_bounds.tex
    │   │   ├── miscellany.tex
    │   │   ├── parallel_rnn.tex
    │   │   ├── scan.tex
    │   │   ├── ssm.tex
    │   │   └── stacked_rnns.tex
    │   ├── define_language.tex
    │   ├── figures
    │   │   ├── SSM-overview.pdf
    │   │   ├── attention-train.pdf
    │   │   ├── attention.pdf
    │   │   ├── cond_branchs.pdf
    │   │   ├── figures.pptx
    │   │   ├── mamba-mixer.pdf
    │   │   ├── mamba-model.pdf
    │   │   ├── mamba-ssm.pdf
    │   │   ├── rnn_layer1.pdf
    │   │   ├── rnn_layer2.pdf
    │   │   ├── scan_step.pdf
    │   │   ├── signal_flow_structure_of_stacked_rnn.pdf
    │   │   ├── stacked_rnns1.pdf
    │   │   └── stacked_rnns2.pdf
    │   ├── main.pdf
    │   ├── main.tex
    │   ├── references.bib
    │   └── structure.tex
    ├── Flash_Attention
    │   ├── contents
    │   │   ├── CTA_offset.tex
    │   │   ├── IO_complexity.tex
    │   │   ├── flash_attention.tex
    │   │   ├── fuse_reduce.tex
    │   │   └── online_softmax.tex
    │   ├── figures
    │   │   ├── attention_offset-a.pdf
    │   │   ├── attention_offset-b.pdf
    │   │   ├── fuse_reduce.pdf
    │   │   └── logsoftmax_expression_tree.pdf
    │   ├── main.pdf
    │   ├── main.tex
    │   ├── references.bib
    │   └── structure.tex
    ├── Formalize_Flash_Attention
    │   ├── FlashAttention_formalization.pdf
    │   ├── contents
    │   │   ├── Background.tex
    │   │   ├── backward.tex
    │   │   ├── beyond_flash_attention.tex
    │   │   ├── fused_chained_map_and_then_aggregate.tex
    │   │   ├── map_and_then_aggreage.tex
    │   │   └── welford_algorithm.tex
    │   ├── figures
    │   │   ├── Transformer-block.pdf
    │   │   ├── Transformer-block.png
    │   │   ├── attention.pptx
    │   │   ├── attention_expression_tree1.pdf
    │   │   ├── attention_expression_tree2.pdf
    │   │   ├── attention_expression_tree3.pdf
    │   │   ├── fused_mha.pdf
    │   │   ├── fused_transformer_block.pdf
    │   │   ├── map_and_aggregate.pdf
    │   │   ├── multi-scale-attn1.pdf
    │   │   ├── multi-scale-attn2.pdf
    │   │   └── transformer.png
    │   ├── main.pdf
    │   ├── main.tex
    │   ├── references.bib
    │   └── structure.tex
    ├── LLM_inference
    │   ├── contents
    │   │   ├── background.tex
    │   │   ├── bytetransformer.tex
    │   │   ├── deepspeed-inference.tex
    │   │   └── flexgen.tex
    │   ├── images
    │   │   ├── BERT-performance-breakdown.pdf
    │   │   ├── ILP.png
    │   │   ├── LLM-inference-graph.png
    │   │   ├── LLM.xlsx
    │   │   ├── MHA-variable-seq-length.pdf
    │   │   ├── SBI-GEMM.pdf
    │   │   ├── block-schedule-algorithm.png
    │   │   ├── block-schedule.pdf
    │   │   ├── byte-transformer-overview2.pdf
    │   │   ├── bytetransformer-padding-free-input-batch.pdf
    │   │   ├── bytetransformer_overview.pdf
    │   │   ├── different-batch-size-in-pipeline-parallelism.png
    │   │   ├── fused_transformer_block-deepspeed-inference.pdf
    │   │   ├── grouped-gemm.pdf
    │   │   ├── grouped-mha.pdf
    │   │   ├── llm-inference.pdf
    │   │   ├── llm-inference.pptx
    │   │   ├── logo.jpeg
    │   │   ├── memory-system.pdf
    │   │   ├── pipeline-schedule-deep-speed-inference.png
    │   │   ├── two-stages-in-llm-inference.pdf
    │   │   ├── variable-length-mha.pdf
    │   │   └── zero-padding-algorithm.pdf
    │   ├── llm_inference.pdf
    │   ├── llm_inference.tex
    │   └── references.bib
    └── Parallel_Execution_of_DO_Loops
    │   ├── basics.tex
    │   ├── example.tex
    │   ├── hyperplane.tex
    │   ├── hypertheorem.tex
    │   ├── main.pdf
    │   ├── main.tex
    │   ├── optimalpi.tex
    │   ├── references.bib
    │   └── structure.tex
├── code_reading
    ├── README.md
    └── pet
    │   ├── README.md
    │   ├── basics.md
    │   ├── clang_pulgin.md
    │   ├── extract_scop.md
    │   └── tree2scop.md
├── engineering_a_compiler
    └── scanner.md
├── julia_learning_notes
    ├── Basics
    │   ├── 01_JPL_FQ.md
    │   ├── 02_JPL_gpu_computing.md
    │   ├── 03_JPL_metaprogramming.md
    │   ├── Generated_function.ipynb
    │   ├── Good_references.md
    │   ├── Julia_ASTs.ipynb
    │   ├── Macro.ipynb
    │   ├── Tips.ipynb
    │   ├── Type_inference
    │   │   ├── README.md
    │   │   └── Type_inference.pdf
    │   ├── Types.ipynb
    │   ├── WhyJulia
    │   │   ├── README.md
    │   │   └── whyJulia.pdf
    │   ├── broadcast.ipynb
    │   └── images
    │   │   └── gpu_julia_packages.png
    ├── CodeSnippets
    │   ├── UnionAll_types.ipynb
    │   ├── assignment_and_mutation.ipynb
    │   ├── infer_type_parameter.ipynb
    │   ├── modules.ipynb
    │   ├── parameteric_types.ipynb
    │   └── scope.ipynb
    ├── Flux
    │   ├── Flux_implementation.md
    │   ├── Test_user_interface.ipynb
    │   └── char_rnn_example.ipynb
    ├── IRTools
    │   ├── Meta.ipynb
    │   └── _methods_by_ftype.ipynb
    ├── README.md
    ├── Zygote
    │   ├── CodesStudy1.ipynb
    │   ├── CodesStudy2.ipynb
    │   └── code_snippets
    │   │   ├── hello_world_test.jl
    │   │   └── program_representation_in_Julia.ipynb
    └── experiments
    │   ├── README.md
    │   └── lstm_test
    │       ├── README.md
    │       ├── naive_cpu_test
    │           ├── README.md
    │           ├── Recurrent
    │           │   ├── common.jl
    │           │   ├── lstm.jl
    │           │   └── recurrent.jl
    │           └── cpu_test.jl
    │       └── naive_gpu_test
    │           ├── Recurrent
    │               ├── lstm.jl
    │               └── recurrent.jl
    │           └── gpu_test.jl
├── notes_for_tf_pt
    ├── compile-pt.md
    └── compile-tf.md
├── paper_notes
    ├── Diffusion
    │   └── README.md
    ├── README.md
    ├── Template.md
    ├── array-programming-model
    │   └── README.md
    ├── auto-diff
    │   ├── BP_and_implicit_function_theorem
    │   │   └── README.md
    │   ├── Dynamic_Automatic_Differentiation_of_GPU_Broadcast_Kernels.md
    │   ├── SCT_AD
    │   │   ├── Differentiating_SSA_form_program.md
    │   │   └── README.md
    │   ├── brief_introduction_to_AD.md
    │   ├── images
    │   │   ├── expression_graph.png
    │   │   ├── mix_forward_and_reverse_mode_AD.png
    │   │   └── multidimensional_dual_numbers.png
    │   └── tape_based_ad.md
    ├── compiler-stuffs
    │   ├── CFG-optimizations
    │   │   ├── Control-Flow-Optimization.md
    │   │   ├── README.md
    │   │   └── images
    │   │   │   └── discussion-on-cfg.png
    │   ├── Code-optimizations
    │   │   ├── dmxpy-optimizations.ipynb
    │   │   ├── dmxpy.py
    │   │   ├── images
    │   │   │   ├── dmxpy.png
    │   │   │   ├── dmxpy.pptx
    │   │   │   ├── excerpt-from-dmxpy-in-linpack.png
    │   │   │   ├── manual-optimizations-for-dmxpy.png
    │   │   │   ├── simple-version-of-dmxpy.png
    │   │   │   └── strength-reduction.png
    │   │   ├── introduction-to-optimizations.md
    │   │   └── local-optimizatioins.md
    │   ├── Compiler_and_Interpreter.pdf
    │   ├── Dependence_analysis
    │   │   ├── README.md
    │   │   └── dependence_abstraction
    │   │   │   ├── contents.tex
    │   │   │   ├── main.tex
    │   │   │   ├── references.bib
    │   │   │   └── structure.tex
    │   ├── Intermediate-Representations
    │   │   ├── IR.md
    │   │   └── images
    │   │   │   ├── AST-example.png
    │   │   │   ├── DAG-example.png
    │   │   │   ├── IR-level-of-abstraction.png
    │   │   │   ├── an-simple-example-of-dependency-graph.png
    │   │   │   ├── dependence-graph.png
    │   │   │   ├── different-levels-of-abstraction-for-an-array-subscript-reference.png
    │   │   │   ├── naming-leads-to-different-translations.png
    │   │   │   ├── one-address-code.png
    │   │   │   └── three-address-code.png
    │   ├── Loop_analysis
    │   │   ├── Denpendence_analysis
    │   │   │   ├── Basics
    │   │   │   │   ├── README.md
    │   │   │   │   ├── Terminology.md
    │   │   │   │   └── images
    │   │   │   │   │   ├── a_single_rnn.png
    │   │   │   │   │   ├── bi_directional_rnn.png
    │   │   │   │   │   ├── bi_directional_rnn.pptx
    │   │   │   │   │   ├── different_dependences.png
    │   │   │   │   │   ├── dilated_rnn.png
    │   │   │   │   │   ├── example-01.png
    │   │   │   │   │   ├── grid_rnn.png
    │   │   │   │   │   ├── polyhedral_representation.png
    │   │   │   │   │   ├── stack_rnn.png
    │   │   │   │   │   ├── wh.png
    │   │   │   │   │   └── workflow.png
    │   │   │   └── dependence_analysis.pdf
    │   │   ├── Polyhedral_representation_in_Pet
    │   │   │   ├── references.bib
    │   │   │   ├── section1.tex
    │   │   │   ├── slides.tex
    │   │   │   └── structure.tex
    │   │   ├── The_parallel_execution_of_do_loops.pdf
    │   │   └── auto-vectorization
    │   │   │   ├── README.md
    │   │   │   └── auto-vectorizing-TensorFlow-graphs.md
    │   ├── Polyhedral_compilatioin
    │   │   ├── A_Performance_Vocabulary_for_Affine_Loop_Transformations
    │   │   │   ├── contents.tex
    │   │   │   ├── main.tex
    │   │   │   ├── references.bib
    │   │   │   └── structure.tex
    │   │   ├── A_decopled_approach_to_high-level_loop_optimization.md
    │   │   ├── A_decoupled_approach_to_high-level_loop_optimization
    │   │   │   └── mathematical_foundations_of_polyhedra.md
    │   │   ├── Array_regions_analyses_and_applications
    │   │   │   └── Array_regions_analyses_and_applications.md
    │   │   ├── Data_dependence_and_PIP
    │   │   │   ├── contents.tex
    │   │   │   ├── main.tex
    │   │   │   ├── references.bib
    │   │   │   └── structure.tex
    │   │   ├── Mathmatical_foundations
    │   │   │   └── concepts.md
    │   │   ├── More_Legal_Transformations_for_Locality
    │   │   │   ├── contents.tex
    │   │   │   ├── images
    │   │   │   │   ├── SCoP_decomposition.png
    │   │   │   │   ├── a_skewing_transformation.png
    │   │   │   │   └── farkas_lemma.png
    │   │   │   ├── main.pdf
    │   │   │   ├── main.tex
    │   │   │   ├── references.bib
    │   │   │   └── structure.tex
    │   │   ├── Polyhedral_representation
    │   │   │   ├── 2d_puls_1.tex
    │   │   │   ├── main.tex
    │   │   │   ├── references.bib
    │   │   │   ├── schedule_tree.tex
    │   │   │   └── structure.tex
    │   │   ├── README.md
    │   │   ├── Tiramisu
    │   │   │   ├── README.md
    │   │   │   └── tiramisu_overview.png
    │   │   ├── images
    │   │   │   ├── introduction-01.png
    │   │   │   ├── introduction-02.png
    │   │   │   ├── introduction-03.png
    │   │   │   └── introduction-04.png
    │   │   ├── isl
    │   │   │   ├── contents.tex
    │   │   │   ├── main.pdf
    │   │   │   ├── main.tex
    │   │   │   ├── presburger_sets_and_relations.tex
    │   │   │   ├── pw_quasi_affine.tex
    │   │   │   ├── references.bib
    │   │   │   ├── sets_and_maps.tex
    │   │   │   └── structure.tex
    │   │   ├── mathematical_foundations_of_polyhedra.md
    │   │   ├── polyhedral_background_01.pdf
    │   │   ├── polyhedral_background_02.pdf
    │   │   └── polyhedral_background_03.pdf
    │   ├── README.md
    │   └── intermediate-code-generation.md
    ├── data_processing_systems
    │   ├── CIEL.md
    │   ├── README.md
    │   └── figures
    │   │   ├── ciel_cluster_architecture.png
    │   │   └── dynamic-task-graph.png
    ├── dataflow-architectures
    │   ├── Advances_in_dataflow_programming_languages.md
    │   ├── Dataflow_computers_their_history_and_future.md
    │   ├── README.md
    │   ├── Scheduled_dataflow.md
    │   ├── images
    │   │   ├── Manchester-dynamic-dataflow-machine.png
    │   │   ├── dataflow-accumulator.png
    │   │   ├── dataflow-graph-1.png
    │   │   ├── ill-formed_multi-rate-dataflow.png
    │   │   ├── img1.png
    │   │   ├── img2.png
    │   │   ├── paper-screenshot-1.png
    │   │   ├── periodic_admissible_schedule.png
    │   │   ├── solve_G.q.png
    │   │   ├── static-dataflow-architecture.png
    │   │   ├── topology_matrix.png
    │   │   └── two-input_add_actor_and_two-output_duplicate_actor.png
    │   └── synchronous_dataflow.md
    ├── dl-compiler
    │   ├── Glow
    │   │   ├── Glow.md
    │   │   ├── Glow.pdf
    │   │   └── images
    │   │   │   └── low-level-glow-ir.png
    │   ├── MLIR
    │   │   ├── MLIR.md
    │   │   ├── README.md
    │   │   ├── swift_for_tensorflow.md
    │   │   └── swift_for_tensorflow.pdf
    │   ├── README.md
    │   ├── TVM
    │   │   ├── Relay.md
    │   │   ├── TVM.md
    │   │   └── TVM.pdf
    │   ├── XLA
    │   │   └── XLA.md
    │   ├── figures
    │   │   └── sm-and-sub-core-of-volta.png
    │   └── fusion
    │   │   └── README.md
    ├── dl-models
    │   ├── attention_simplification
    │   │   └── README.md
    │   ├── beyond_transformer
    │   │   └── README.md
    │   ├── miscellanea
    │   │   ├── Geometric_deep_learning.md
    │   │   └── README.md
    │   ├── nlp
    │   │   ├── RNN-modeling
    │   │   │   ├── CW-RNN
    │   │   │   │   ├── A_Clockwork_RNN.md
    │   │   │   │   └── A_Clockwork_RNN.pdf
    │   │   │   ├── GridLSTM
    │   │   │   │   ├── GridLSTM.md
    │   │   │   │   └── GridLSTM.pdf
    │   │   │   ├── HM-LSTM
    │   │   │   │   ├── Hierarchical_multiscale_RNN.md
    │   │   │   │   └── Hierarchical_multiscale_RNN.pdf
    │   │   │   ├── How_Much_Attention_Do_You_Need.md
    │   │   │   ├── MD-LSTM
    │   │   │   │   ├── MD-LSTM.md
    │   │   │   │   └── MD-LSTM.pdf
    │   │   │   ├── Mogrifier-LSTM
    │   │   │   │   ├── contents.tex
    │   │   │   │   ├── images
    │   │   │   │   │   └── MogrifierLSTM.png
    │   │   │   │   ├── main.pdf
    │   │   │   │   ├── main.tex
    │   │   │   │   ├── references.bib
    │   │   │   │   └── structure.tex
    │   │   │   ├── Neural_Speed_Reading_via_Skim_RNN
    │   │   │   │   └── Neural_Speed_Reading_via_Skim_RNN.md
    │   │   │   ├── ON-LSTM
    │   │   │   │   ├── ON-LSTM.md
    │   │   │   │   └── ON-LSTM.pdf
    │   │   │   ├── Quasi-Recurrent_neural_network
    │   │   │   │   ├── Quasi-Recurrent_neural_network.md
    │   │   │   │   └── Quasi-Recurrent_neural_network.pdf
    │   │   │   ├── README.md
    │   │   │   ├── RNN_Variants_Slides_190820
    │   │   │   │   ├── RNN_Variants.md
    │   │   │   │   ├── RNN_Variants.pdf
    │   │   │   │   └── images
    │   │   │   │   │   ├── CudnnLSTM.png
    │   │   │   │   │   └── Recurrent_neural_network_unfold.svg.png
    │   │   │   ├── Sliced_Recurrent_Neural_Networks
    │   │   │   │   ├── Sliced_Recurrent_Neural_Networks.md
    │   │   │   │   └── images
    │   │   │   │   │   └── SRNN.png
    │   │   │   ├── The_Unreasonable_Effectiveness_of_the_Forget_Gate
    │   │   │   │   ├── The_Unreasonable_Effectiveness_of_the_Forget_Gate.md
    │   │   │   │   └── The_Unreasonable_Effectiveness_of_the_Forget_Gate.pdf
    │   │   │   ├── Training_RNNs_as_Fast_as_CNNs
    │   │   │   │   ├── Training_RNNs_as_Fast_as_CNNs.md
    │   │   │   │   └── Training_RNNs_as_Fast_as_CNNs.pdf
    │   │   │   ├── Transformer
    │   │   │   │   ├── README.md
    │   │   │   │   ├── README.pdf
    │   │   │   │   └── images
    │   │   │   │   │   └── QK.png
    │   │   │   ├── WaveRNN.md
    │   │   │   └── images
    │   │   │   │   ├── 2d_lstm_1.png
    │   │   │   │   ├── 3D-GridLSTM.png
    │   │   │   │   ├── CWRNN.png
    │   │   │   │   ├── CWRNN.pptx
    │   │   │   │   ├── DilatedRNN.png
    │   │   │   │   ├── DilatedRNN1.png
    │   │   │   │   ├── GridLSTM-NMT.png
    │   │   │   │   ├── HM-LSTM-pre-activation.png
    │   │   │   │   ├── LSTM_equation.png
    │   │   │   │   ├── active_modules.png
    │   │   │   │   ├── active_modules.pptx
    │   │   │   │   ├── boundary_state.png
    │   │   │   │   ├── boundary_state.pptx
    │   │   │   │   ├── hardsigmoid.png
    │   │   │   │   ├── hm-lstm-cell-update.png
    │   │   │   │   ├── hm-lstm-output-hidden.png
    │   │   │   │   ├── multi-dimensioanl-rnn.png
    │   │   │   │   ├── multi-dimensional-multi-directional-context.png
    │   │   │   │   ├── wh.png
    │   │   │   │   └── wh.pptx
    │   │   ├── pre-training
    │   │   │   ├── ALBERT.md
    │   │   │   ├── BERT.md
    │   │   │   ├── ELMo.md
    │   │   │   ├── GPT.md
    │   │   │   ├── README.md
    │   │   │   ├── ULM-FiT.md
    │   │   │   ├── XLNet.md
    │   │   │   ├── images
    │   │   │   │   ├── ELMo.png
    │   │   │   │   ├── GPT-auxiliary-training-object.png
    │   │   │   │   ├── STLR-figure.png
    │   │   │   │   ├── ULM-FiT-STLR.png
    │   │   │   │   ├── biLM-ELMo.png
    │   │   │   │   ├── dataset-ULM-FiT.png
    │   │   │   │   ├── elmo-vectors.png
    │   │   │   │   ├── example-language-inference.jpg
    │   │   │   │   ├── example-of-auxiliary-prediction-taks.png
    │   │   │   │   ├── highlight-bert-LM1.png
    │   │   │   │   ├── highlight-bert-LM2.png
    │   │   │   │   ├── highlight-bert-LM3.png
    │   │   │   │   ├── highlight-bert-input.png
    │   │   │   │   ├── highway.png
    │   │   │   │   ├── highway2.png
    │   │   │   │   ├── how-bert-comes-out.png
    │   │   │   │   ├── input-of-bert.png
    │   │   │   │   ├── intro.png
    │   │   │   │   ├── task-specific-input-transformation.png
    │   │   │   │   └── transformer-block.png
    │   │   │   ├── learning-language-representation-slides.md
    │   │   │   └── learning-language-representation-slides.pdf
    │   │   └── x-former
    │   │   │   └── README.md
    │   ├── structured_state_space_models
    │   │   ├── README.md
    │   │   └── maba
    │   │   │   └── README.md
    │   └── vision
    │   │   ├── README.md
    │   │   ├── ResNeXt
    │   │       └── README.md
    │   │   ├── SSD
    │   │       ├── SSD.md
    │   │       └── images
    │   │       │   ├── SSD.png
    │   │       │   └── SSD2.png
    │   │   ├── SqueezeNet
    │   │       └── README.md
    │   │   └── Xception
    │   │       └── README.md
    ├── dl-systems
    │   ├── A_computational_model_for_TensorFlow.md
    │   ├── AutoGraph.md
    │   ├── Beyond_Data_and_Model_Parallelism_for_Deep_Neural_Networks.md
    │   ├── Cavs_An_Efficient_Runtime_System_for_Dynamic_Neural_Networks.md
    │   ├── JANUS.md
    │   ├── JAX.md
    │   ├── Machine_Learning_Systems_are_Stuck_in_a_Rut.md
    │   ├── Pydron.md
    │   ├── TensorFlow_Eager.md
    │   ├── images
    │   │   ├── Pydron.png
    │   │   ├── SSA_translation.png
    │   │   ├── functioin_call_translation.png
    │   │   ├── tf_eager_01.png
    │   │   ├── tf_eager_02.png
    │   │   └── tf_eager_03.png
    │   └── tf-cfg-design
    │   │   ├── Deep_learning_with_dynamic_computation_graphs.md
    │   │   ├── Dynamic_Control_Flow_in_Large-Scale_Machine_Learning.md
    │   │   └── tf-while-op-impl.md
    ├── dl-workload-optimizations
    │   ├── DeepCPU.md
    │   ├── Optimizing_RNN_performance
    │   │   ├── Optimizing_RNN_performance.md
    │   │   ├── Optimizing_RNN_performance.pdf
    │   │   └── images
    │   │   │   ├── multiple_layer_optimization.png
    │   │   │   ├── pic1.png
    │   │   │   ├── pic2.png
    │   │   │   ├── single_cell_optimization.png
    │   │   │   ├── single_layer_optimization.png
    │   │   │   └── starting_point.png
    │   ├── README.md
    │   └── ShuffleNet_v2.md
    ├── generalization-of-neural-network
    │   ├── README.md
    │   ├── rendered
    │   │   ├── A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.pdf
    │   │   ├── Bayesian_Model_Comparison.pdf
    │   │   ├── Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.pdf
    │   │   ├── Large_Batch_Training_of_Convolutional_Networks.pdf
    │   │   ├── On_Large-Batch_Training_for_Deep_Learning.pdf
    │   │   └── Train_Longer_Generalize_Better.pdf
    │   └── sources
    │   │   ├── A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.md
    │   │   ├── Accurate_Large_Minibatch_SGD.md
    │   │   ├── Bayesian_Model_Comparison.md
    │   │   ├── Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.md
    │   │   ├── Large_Batch_Training_of_Convolutional_Networks.md
    │   │   ├── On_Large-Batch_Training_for_Deep_Learning.md
    │   │   ├── Train_Longer_Generalize_Better.md
    │   │   └── images
    │   │       ├── f1.png
    │   │       ├── f2.png
    │   │       ├── fig1.png
    │   │       ├── figure1.png
    │   │       ├── figure2.png
    │   │       ├── insert_bn_after_pool5.png
    │   │       ├── mixed_precision_with_LARS.png
    │   │       ├── network_configuration.png
    │   │       ├── scalability.png
    │   │       ├── sharpness_metric.png
    │   │       ├── sharpness_of_minimizers_1.png
    │   │       ├── sharpness_of_minimizers_2.png
    │   │       └── warmup_experiments.png
    ├── large-language-models
    │   ├── GPT
    │   │   ├── GPT-models.md
    │   │   ├── README.md
    │   │   └── figures
    │   │   │   ├── Full_GPT_architecture.png
    │   │   │   └── GPT-3-model-size.png
    │   ├── README.md
    │   ├── fast-attention
    │   │   ├── Flash-Attention.pdf
    │   │   └── README.md
    │   ├── llm_inference.pdf
    │   ├── transformer-optimizations
    │   │   ├── FlexGen.md
    │   │   ├── README.md
    │   │   ├── RMS_layernorm.md
    │   │   ├── figures
    │   │   │   ├── block-schedule-with-overlap.png
    │   │   │   ├── computeation-graph-of-llm-inference.png
    │   │   │   ├── pre-post-layer-normalization-in-transformer.png
    │   │   │   └── two-different-schedules.png
    │   │   └── welford_algorithm_and_layer_norm.md
    │   └── whisper
    │   │   ├── README.md
    │   │   └── figures
    │   │       ├── whisper-model-size.png
    │   │       └── whisper_overview.png
    ├── leading-edge-ai
    │   ├── Capsules
    │   │   ├── Dynamic_Routing_between_Capsule.md
    │   │   └── README.md
    │   ├── README.md
    │   └── RIM.md
    ├── miscellanea
    │   ├── Neural_Ordinary_Differential_Equations.md
    │   └── README.md
    ├── ml-with-discrete-variables
    │   ├── README.md
    │   └── Straight-throughEstimator.md
    ├── normalization-in-NN
    │   ├── L2_Regularization_versus_Batch_and_Weight_Normalization
    │   │   └── L2_Regularization_versus_Batch_and_Weight_Normalization.md
    │   ├── Layer_Normalization
    │   │   ├── layer_normalization.md
    │   │   └── layer_normalization.pdf
    │   ├── README.md
    │   ├── Weight_Normalization
    │   │   ├── weight_normalization.md
    │   │   └── weight_normalization.pdf
    │   └── optimization
    │   │   ├── Hessian_and_DeepLearning_Optimizaiton.md
    │   │   └── Hessian_and_DeepLearning_Optimizaiton.pdf
    ├── parallel-computing
    │   ├── IRs
    │   │   ├── lift.md
    │   │   └── nova.md
    │   ├── Nesl
    │   │   ├── VCODE.md
    │   │   ├── images
    │   │   │   └── nesl-1.png
    │   │   └── nesl.md
    │   ├── README.md
    │   ├── data_parallel_language
    │   │   └── README.md
    │   ├── execution_model
    │   │   ├── ActorModel.md
    │   │   ├── CSP.md
    │   │   └── MessagePassing.md
    │   └── programming_model
    │   │   ├── DMLL.md
    │   │   ├── README.md
    │   │   ├── collection_orientated_languages.md
    │   │   └── images
    │   │       ├── DMLL-comparison.png
    │   │       ├── co-ori-lang-1.png
    │   │       ├── mimd.gif
    │   │       ├── simd.gif
    │   │       └── vcode-instruction.png
    ├── partial_aggregation
    │   ├── README.md
    │   └── figures
    │   │   ├── decomposable_function_1.png
    │   │   ├── decomposable_function_2.png
    │   │   ├── decomposable_function_3.png
    │   │   ├── decomposable_function_4.png
    │   │   ├── execution_plan_1.png
    │   │   └── execution_plan_2.png
    ├── programming-language
    │   ├── About_programming_language.md
    │   ├── Glossary
    │   │   ├── README.md
    │   │   ├── basic_concepts.md
    │   │   ├── images
    │   │   │   └── 1920px-Tree_edges.svg.png
    │   │   ├── program_analysis.md
    │   │   ├── programming_paradigm.md
    │   │   └── type.md
    │   ├── Nominative_and_structure_type.md
    │   ├── PL_Design.md
    │   ├── ProgrammingParadigms.md
    │   ├── README.md
    │   ├── SSA
    │   │   ├── README.md
    │   │   ├── SSA.md
    │   │   ├── Simple_and_Efficient_Construction_of_Static_Single_Assignment_Form.md
    │   │   └── images
    │   │   │   ├── SSA_example1.1.png
    │   │   │   ├── SSA_example1.2.png
    │   │   │   └── SSA_example1.3.png
    │   ├── Types.md
    │   └── abstract_binding_tree.md
    ├── tensor_operations
    │   ├── README.md
    │   ├── concepts.tex
    │   ├── constructs.tex
    │   ├── images
    │   │   ├── mm_example.png
    │   │   ├── nested_tensorarray.png
    │   │   ├── tensor.png
    │   │   └── transformer.png
    │   ├── ir.tex
    │   ├── item_access.tex
    │   ├── main.pdf
    │   ├── main.tex
    │   ├── nn.tex
    │   ├── optimization.tex
    │   ├── shape_operation.tex
    │   ├── structure.tex
    │   ├── tensorarray_creation.tex
    │   ├── transformer.tex
    │   └── vectorization.tex
    └── type-systems
    │   ├── README.md
    │   ├── basic-concepts.md
    │   └── notations
    │       ├── README.md
    │       ├── bussproofs.sty
    │       ├── contents
    │           └── kinding.tex
    │       ├── formal-grammar.sty
    │       ├── main.pdf
    │       ├── main.tex
    │       └── structure.tex
├── reinforcement_learning
    ├── README.md
    └── basic_concepts
    │   ├── basic_concepts_about_reinforcement_learning.pdf
    │   └── basic_concepts_about_reinforcement_learning.ppt
├── text_generation_for_gitchat
    ├── README.md
    ├── pic
    │   ├── pic1.one-hot.png
    │   ├── pic10.highway.png
    │   ├── pic11.generate_text_from_language_model.png
    │   ├── pic12.encoder_decoder.png
    │   ├── pic13.neural_turing_machine.png
    │   ├── pic2.word_embedding.png
    │   ├── pic3.rnn.png
    │   ├── pic4.bp_through_all_nodes.png
    │   ├── pic5.bp_through_shortcut.png
    │   ├── pic6.lstm.png
    │   ├── pic7.gru.png
    │   ├── pic8.deep_rnn.png
    │   └── pic9.residual_block.png
    ├── text_generation.pdf
    └── trans_2_html.sh
└── tiled_efficient_attention
    ├── README.md
    ├── README.pdf
    ├── figures
        ├── cal_p.png
        ├── chunk_form_parallelism.png
        ├── chunk_recurrent.png
        ├── first_kv.png
        ├── fused_chunk_gla_fwd_kernel.png
        ├── fwd_decay_cumsum.png
        ├── fwd_inner_chunk.png
        ├── gated_linear_attention.pptx
        ├── gated_linear_attention_layer.png
        ├── gla_data_accessed.png
        ├── gla_equation.png
        ├── last_decay.png
        └── ~$gated_linear_attention.pptx
    ├── main.py
    └── model
        ├── __init__.py
        ├── chunk.py
        ├── chunk_fuse.py
        ├── chunk_util.py
        ├── configuration.py
        ├── gla.py
        ├── naive.py
        ├── recurrent_fuse.py
        └── utils.py


/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | -   repo: https://github.com/pre-commit/mirrors-yapf.git
 2 |     sha: v0.16.0
 3 |     hooks:
 4 |     -   id: yapf
 5 |         files: \.py$
 6 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 7 |     sha: a11d9314b22d8f8c7556443875b731ef05965464
 8 |     hooks:
 9 |     -   id: check-merge-conflict
10 |     -   id: check-symlinks
11 |     -   id: detect-private-key
12 |         files: (?!.*paddle)^.*$
13 |     -   id: end-of-file-fixer
14 |         files: \.md$
15 |     -   id: trailing-whitespace
16 |         files: \.md$
17 | -   repo: https://github.com/Lucas-C/pre-commit-hooks
18 |     sha: v1.0.1
19 |     hooks:
20 |     -   id: forbid-crlf
21 |         files: \.md$
22 |     -   id: remove-crlf
23 |         files: \.md$
24 |     -   id: forbid-tabs
25 |         files: \.md$
26 |     -   id: remove-tabs
27 |         files: \.md$
28 | 


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = pep8
3 | column_limit = 80
4 | 


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/Developer's_Guide_to_Paddle_Fluid.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/Developer's_Guide_to_Paddle_Fluid.pdf


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/1.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/2.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/3.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/4.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/LoDTensor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/LoDTensor.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/compile_run_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/compile_run_time.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/executor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/executor.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/fluid-compiler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/fluid-compiler.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/fluid_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/fluid_examples.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/fluid_module_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/fluid_module_1.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/fluid_module_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/fluid_module_2.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/graph_construction_example_all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/graph_construction_example_all.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/layer.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/operator1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/operator1.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/operator2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/operator2.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/place.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/place.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/print_fluid_program.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/print_fluid_program.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/program_desc1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/program_desc1.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/program_desc2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/program_desc2.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/raw_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/raw_input.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/scope_variable_tensor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/scope_variable_tensor.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/sorted_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/sorted_input.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/transpiler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/transpiler.png


--------------------------------------------------------------------------------
/Fluid/developer's_guide_for_Fluid/images/user_interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/user_interface.png


--------------------------------------------------------------------------------
/Fluid/nmt_on_fluid/images/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/1.png


--------------------------------------------------------------------------------
/Fluid/nmt_on_fluid/images/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/2.png


--------------------------------------------------------------------------------
/Fluid/nmt_on_fluid/images/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/3.png


--------------------------------------------------------------------------------
/Fluid/nmt_on_fluid/images/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/4.png


--------------------------------------------------------------------------------
/Fluid/nmt_on_fluid/images/attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/attention.png


--------------------------------------------------------------------------------
/Fluid/nmt_on_fluid/images/raw_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/raw_input.png


--------------------------------------------------------------------------------
/Fluid/nmt_on_fluid/images/sorted_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/sorted_input.png


--------------------------------------------------------------------------------
/Fluid/nmt_on_fluid/images/user_interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/user_interface.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | My learning notes.
2 | 


--------------------------------------------------------------------------------
/TeXNotes/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/.gitignore


--------------------------------------------------------------------------------
/TeXNotes/00_templates/assignment/contents.tex:
--------------------------------------------------------------------------------
 1 | 从global向shared memory传输数据和从shared memory向register file传输数据总觉得有某种微妙的不同。
 2 | 
 3 | 我们首先规定\colorbox{hl}{text}“拷贝”：是将数据$D$ 从place $A$完全地移动到place $B$存储为$D'$。$D'$可以是$D$的一个permutation，也就是说$D'$中的元素和$D$中的元素一一对应，元素数目不变，而顺序可以不相同。
 4 | 
 5 | global memory是外存，shared memory，cache和RF是片上存储，后者的capacity总是小于前者。
 6 | 
 7 | 给定一个AccessMap将其完全地翻译成实现，不是一个单纯的copy macro kernel问题。这里面涉及了（1）分数据（根据capacity决定一次空间执行的数据块的大小）；（2）拷贝；（3）时间上重复执行的执行顺序问题；
 8 | 
 9 | 非常朴素地想，计算过程涉及到数据和多线程两种要素。对于数据，我们总是提供逻辑和物理两种视角：
10 | \begin{enumerate}
11 |     \setlength{\itemsep}{-0.1cm}
12 |     \item high-dimensional array-like的\colorbox{hl}{逻辑视角}；能够用\textcolor{blue}{高维逻辑indices寻址}，逻辑视角能够改善可编程性，并且隔离与hardware强相关的实现选择问题
13 |     \item \colorbox{hl}{物理视角}
14 |     \item Layout是logical high-dimensional indices和物理寻址之间的映射函数
15 | \end{enumerate}
16 | 
17 | \begin{figure}[h]
18 |     \centering
19 |     \includegraphics[width=0.8\textwidth]{figures/shared_2_rf_with_ldmatrix.pdf}
20 |     \caption{使用ldmatrix指令从shared memory向regisger file加载数据。}
21 | \end{figure}
22 | 
23 | 使用ldmatrix从shared memory加载数据到每个线程thread local的寄存器，warp中的每32线程构成一个 $2 \times 2$的线程tile，每个tile内部8线程，
24 | \textcolor{red}{调用ldmatrix的时候每个线程都需要传入一个shared memory指针}，然后单线程读取shared memory中连续128 bits。ldmatrix一次执行最大读取 $16 \times 16$大小的半精度矩阵。
25 | 
26 | 在实现中，每个线程都需要正确地计算出自己要读取的shared memory位置的指针偏移。
27 | 
28 | ldmatrix的一次执行32个线程能一次性读取$16 \times 16$大小的$2D$ tile，一次执行单线程数据tile大小$1 \times 8$，如果将$n$次执行ldmatrix的结果都保留在thread local的寄存器上，单线程数据块的大小是$1 \times \left( n \times 8 \right)$。
29 | 
30 | 所以我们将目标的layout配置成$(1, n*8)$
31 | 
32 | \newpage
33 | 
34 | \begin{enumerate}
35 |     \setlength{\itemsep}{-0.1cm}
36 |     \item 第一个嵌套层级：一个shared memory数据块要转化为多次$(m, n)$次对copy\_2d\_tile\_s2r在时间上的调用，要用for循环issue出去。每个处理一个小分块 $\mathcal{T}$。
37 |     \item 第二个嵌套层级：一个copy\_2d\_tile\_s2r
38 | \end{enumerate}


--------------------------------------------------------------------------------
/TeXNotes/00_templates/assignment/figures/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/00_templates/assignment/figures/figure1.png


--------------------------------------------------------------------------------
/TeXNotes/00_templates/assignment/references.bib:
--------------------------------------------------------------------------------
 1 | @article{lamport1974parallel,
 2 |   title={The parallel execution of do loops},
 3 |   author={Lamport, Leslie},
 4 |   journal={Communications of the ACM},
 5 |   volume={17},
 6 |   number={2},
 7 |   pages={83--93},
 8 |   year={1974},
 9 |   publisher={ACM New York, NY, USA}
10 | }
11 | 


--------------------------------------------------------------------------------
/TeXNotes/00_templates/slides/slides.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {beamer}
 2 | 
 3 | \input {structure.tex}
 4 | 
 5 | \title[] {Ying's Slides}
 6 | \subtitle{A short story}
 7 | \author {Ying Cao}
 8 | \institute {}
 9 | \date {\today}
10 | 
11 | \begin {document}
12 | 
13 | \AtBeginSection[]
14 | {
15 |   \begin{frame}
16 |     \frametitle{Table of Contents}
17 |     \tableofcontents[currentsection]
18 |   \end{frame}
19 | }
20 | 
21 | \begin {frame}
22 | \titlepage
23 | \end {frame}
24 | 
25 | \section {Section 1}
26 | 
27 | \begin {frame} {Title frame 1}
28 | 
29 | In this slide, some important text will be
30 | \alert{highlighted} because it's important.
31 | Please, don't abuse it.
32 | 
33 | \begin {block} {Block blue}
34 | \begin {itemize}
35 | \item Item 1
36 | \item Item 2
37 | \item Item 3
38 | \end {itemize}
39 | \end {block}
40 | \end {frame}
41 | 
42 | \section {Section 2}
43 | \begin {frame} {Title Frame 2}
44 | \[\ support(X \to Y) = p(X \cup Y) = \frac { {n(X \cup Y)}} {N}\]
45 | \end {frame}
46 | 
47 | \end {document}


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/contents/miscellany.tex:
--------------------------------------------------------------------------------
 1 | 若存在可逆矩阵$P$，使得一个关于矩阵$A$的如下等式成立：
 2 | $$A = (PDP)^{-1}$$
 3 | 
 4 | 则称符合这样关系的矩阵$A$与$D$是相似矩阵，记作：$A \sim D$，则$A$的幂可以通过求矩阵$D$的幂求得
 5 | 
 6 | $$A^{m} = (PDP^{-1})^{m} = (PDP^{-1})(PDP^{-1})\dots(PDP^{-1})=PD^{m}P$$
 7 | 
 8 | \textcolor{red}{如果我们能够得出$D$是一个很简单的矩阵，例如对角矩阵，那么就可以很简单的计算出$A$的幂值}。
 9 | 然而，一般的矩阵在实数域不一定能对角化，然而几乎所有矩阵都能在复数域对角化\cite{lru-kexue}。
10 | 于是$A$总能写成：
11 | 
12 | \begin{align*}
13 | A=P\Lambda P^{-1} & A^{m} = P\Lambda^{m} P^{-1}
14 | \end{align*}


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/contents/parallel_rnn.tex:
--------------------------------------------------------------------------------
1 | \subsection{典型代表}
2 | 
3 | \subsubsection{RWKV\cite{peng2023rwkv}}
4 | 
5 | \subsubsection{LRU（Linear Recurrent Unit）\cite{orvieto2023resurrecting}}


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/SSM-overview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/SSM-overview.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/attention-train.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/attention-train.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/attention.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/attention.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/cond_branchs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/cond_branchs.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/figures.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/figures.pptx


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-mixer.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-mixer.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-model.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-ssm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-ssm.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/rnn_layer1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/rnn_layer1.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/rnn_layer2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/rnn_layer2.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/scan_step.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/scan_step.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/signal_flow_structure_of_stacked_rnn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/signal_flow_structure_of_stacked_rnn.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/stacked_rnns1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/stacked_rnns1.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/figures/stacked_rnns2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/stacked_rnns2.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/main.pdf


--------------------------------------------------------------------------------
/TeXNotes/Efficient_attention_and_RNNs/main.tex:
--------------------------------------------------------------------------------
 1 | %!TEX program = xelatex
 2 | \documentclass[UTF8]{ctexart}
 3 | 
 4 | \input{structure.tex}
 5 | \input{define_language.tex}
 6 | 
 7 | \title{RNNs and scan}
 8 | 
 9 | \begin{document}
10 | 
11 | \tableofcontents
12 | \thispagestyle{empty}
13 | \newpage
14 | \setcounter{page}{1}
15 | 
16 | \bibliographystyle{plain}
17 | 
18 | \noindent
19 | \linespread{1.2}
20 | \selectfont
21 | \setlength{\topskip}{0ex}
22 | \setlength{\parskip}{1ex}
23 | \setlength{\lineskip}{1em}
24 | 
25 | \section{Transformer和线性RNN}
26 | \input{contents/linear_rnn.tex}
27 | 
28 | \section{SSM（state-space model）}
29 | \input{contents/ssm.tex}
30 | 
31 | \newpage
32 | \section{并行RNN}
33 | \input{contents/parallel_rnn.tex}
34 | 
35 | \newpage
36 | \section{General non-linear recurrence 的并行计算问题}
37 | \input{contents/stacked_rnns.tex}
38 | \input{contents/loop_bounds.tex}
39 | 
40 | \begin{appendices}
41 | \input{contents/miscellany.tex}
42 | \end{appendices}
43 | 
44 | \newpage
45 | \bibliography{references.bib}
46 | \end{document}
47 | 


--------------------------------------------------------------------------------
/TeXNotes/Flash_Attention/figures/attention_offset-a.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/figures/attention_offset-a.pdf


--------------------------------------------------------------------------------
/TeXNotes/Flash_Attention/figures/attention_offset-b.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/figures/attention_offset-b.pdf


--------------------------------------------------------------------------------
/TeXNotes/Flash_Attention/figures/fuse_reduce.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/figures/fuse_reduce.pdf


--------------------------------------------------------------------------------
/TeXNotes/Flash_Attention/figures/logsoftmax_expression_tree.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/figures/logsoftmax_expression_tree.pdf


--------------------------------------------------------------------------------
/TeXNotes/Flash_Attention/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/main.pdf


--------------------------------------------------------------------------------
/TeXNotes/Flash_Attention/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Flash Attention}
 5 | % \author{Ying Cao}
 6 | % \date{\today}
 7 | 
 8 | \begin{document}
 9 | \bibliographystyle{plain}
10 | 
11 | % \maketitle % Print the title
12 | % \tableofcontents
13 | 
14 | \noindent
15 | \linespread{1.2}
16 | \selectfont
17 | \setlength{\topskip}{0ex}
18 | \setlength{\parskip}{1ex}
19 | \setlength{\lineskip}{1em}
20 | 
21 | %---------------------------------------------------------------
22 | % unnumbered section
23 | %---------------------------------------------------------------
24 | 
25 | \section{Flash Attention}
26 | \input{contents/flash_attention.tex}
27 | 
28 | \newpage
29 | \section{Online Normalized Softmax}
30 | \input{contents/online_softmax.tex}
31 | 
32 | \newpage
33 | \section{CTA Offset}
34 | \input{contents/CTA_offset.tex}
35 | 
36 | \newpage
37 | \section{I/O Complexity Analysis}
38 | \input{contents/IO_complexity.tex}
39 | 
40 | \newpage
41 | \section{Fuse Consecutive Aggregations}
42 | \input{contents/fuse_reduce.tex}
43 | 
44 | \bibliography{references.bib}
45 | \end{document}
46 | 


--------------------------------------------------------------------------------
/TeXNotes/Flash_Attention/references.bib:
--------------------------------------------------------------------------------
 1 | @article{DBLP:journals/corr/abs-2112-05682,
 2 |   author    = {Markus N. Rabe and
 3 |                Charles Staats},
 4 |   title     = {\href{https://arxiv.org/pdf/2112.05682.pdf}{Self-attention Does Not Need O(n\({}^{\mbox{2}}\)) Memory}},
 5 |   journal   = {CoRR},
 6 |   volume    = {abs/2112.05682},
 7 |   year      = {2021},
 8 |   url       = {https://arxiv.org/abs/2112.05682},
 9 |   eprinttype = {arXiv},
10 |   eprint    = {2112.05682},
11 |   timestamp = {Tue, 14 Dec 2021 14:21:31 +0100},
12 |   biburl    = {https://dblp.org/rec/journals/corr/abs-2112-05682.bib},
13 |   bibsource = {dblp computer science bibliography, https://dblp.org}
14 | }
15 | 
16 | @misc{lse-trick,
17 |   author = {Gregory Gundersen},
18 |   year = {2020},
19 |   howpublished = "\url{https://gregorygundersen.com/blog/2020/02/09/log-sum-exp/}",
20 |   urldate = {February 9, 2020},
21 |   title = {The Log-Sum-Exp Trick},
22 |   note = "[Online; accessed 17-April-2023]"
23 | }


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/FlashAttention_formalization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/FlashAttention_formalization.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/Transformer-block.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/Transformer-block.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/Transformer-block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/Transformer-block.png


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/attention.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/attention.pptx


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree1.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree2.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree3.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/fused_mha.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/fused_mha.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/fused_transformer_block.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/fused_transformer_block.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/map_and_aggregate.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/map_and_aggregate.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/multi-scale-attn1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/multi-scale-attn1.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/multi-scale-attn2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/multi-scale-attn2.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/figures/transformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/transformer.png


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/main.pdf


--------------------------------------------------------------------------------
/TeXNotes/Formalize_Flash_Attention/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Flash Attention}
 5 | % \author{Ying Cao}
 6 | % \date{\today}
 7 | 
 8 | \begin{document}
 9 | \bibliographystyle{plain}
10 | 
11 | \maketitle % Print the title
12 | \tableofcontents
13 | 
14 | \clearpage
15 | \noindent
16 | \linespread{1.2}
17 | \selectfont
18 | \setlength{\topskip}{0ex}
19 | \setlength{\parskip}{1ex}
20 | \setlength{\lineskip}{1em}
21 | 
22 | %---------------------------------------------------------------
23 | % unnumbered section
24 | %---------------------------------------------------------------
25 | 
26 | \noindent $::$ is read as "have a type of".
27 | 
28 | \noindent $\rightarrow$ is read as "maps to".
29 | 
30 | \section{Background: The Computational Process of Reduce and Map}\label{sec1}
31 | \input{contents/background.tex}
32 | 
33 | \section{A Generalized \textit{Broadcast-and-then-Aggregate} Operation}
34 | \input{contents/map_and_then_aggreage.tex}
35 | 
36 | \section{Block Execution of a Chain of \textit{Broadcast-and-then-Aggregate}}
37 | \input{contents/fused_chained_map_and_then_aggregate.tex}
38 | 
39 | \section{The Transformer Block}
40 | \input{contents/beyond_flash_attention.tex}
41 | \clearpage
42 | \begin{appendices}
43 | \input{contents/welford_algorithm.tex}
44 | \input{contents/backward.tex}
45 | \end{appendices}
46 | 
47 | \clearpage
48 | \bibliographystyle{abbrv}
49 | \bibliography{references.bib}
50 | 
51 | \end{document}


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/BERT-performance-breakdown.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/BERT-performance-breakdown.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/ILP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/ILP.png


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/LLM-inference-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/LLM-inference-graph.png


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/LLM.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/LLM.xlsx


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/MHA-variable-seq-length.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/MHA-variable-seq-length.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/SBI-GEMM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/SBI-GEMM.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/block-schedule-algorithm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/block-schedule-algorithm.png


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/block-schedule.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/block-schedule.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/byte-transformer-overview2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/byte-transformer-overview2.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/bytetransformer-padding-free-input-batch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/bytetransformer-padding-free-input-batch.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/bytetransformer_overview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/bytetransformer_overview.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/different-batch-size-in-pipeline-parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/different-batch-size-in-pipeline-parallelism.png


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/fused_transformer_block-deepspeed-inference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/fused_transformer_block-deepspeed-inference.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/grouped-gemm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/grouped-gemm.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/grouped-mha.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/grouped-mha.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/llm-inference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/llm-inference.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/llm-inference.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/llm-inference.pptx


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/logo.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/logo.jpeg


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/memory-system.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/memory-system.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/pipeline-schedule-deep-speed-inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/pipeline-schedule-deep-speed-inference.png


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/two-stages-in-llm-inference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/two-stages-in-llm-inference.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/variable-length-mha.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/variable-length-mha.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/images/zero-padding-algorithm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/zero-padding-algorithm.pdf


--------------------------------------------------------------------------------
/TeXNotes/LLM_inference/llm_inference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/llm_inference.pdf


--------------------------------------------------------------------------------
/TeXNotes/Parallel_Execution_of_DO_Loops/example.tex:
--------------------------------------------------------------------------------
 1 | This is the implementation of Grid LSTM.
 2 | 
 3 | \begin{lstlisting}[language=Python]
 4 | # data parallelism: iterate over samples in a batch.
 5 | for sample_id in range(0, batch_size, 1):
 6 |     x = src_array_batch[sample_id]
 7 |     y = trg_array_batch[sample_id]
 8 | 
 9 |     src_length = x.size()[0]
10 |     trg_length = y.size()[0]
11 | 
12 |     # dim 1: stack Grid LSTM Cell to form depth.
13 |     for d in range(0, depth, 1):
14 |         # dim 2: iterate over source sequence length.
15 |         for i in range(1, src_length + 1, 1):
16 |             # dim 3: iterate over target sequence length.
17 |             for j in range(1, trg_length + 1, 1):
18 |                 cell_x = cells[d][0]
19 |                 cell_y = cells[d][1]
20 | 
21 |                 output_d = outputs[sample_id][d]
22 | 
23 |                 if d == 0:
24 |                     x_t = x[i - 1, :].view(1, input_dim)
25 |                     y_t = y[j - 1, :].view(1, input_dim)
26 |                 else:
27 |                     x_t = outputs[sample_id][d - 1][i][j][0][0]
28 |                     y_t = outputs[sample_id][d - 1][i][j][1][0]
29 |                 states_x = output_d[i][j - 1][0]
30 |                 states_y = output_d[i - 1][j][1]
31 | 
32 |                 h_x_prev, c_x_prev = states_x
33 |                 h_y_prev, c_y_prev = states_y
34 | 
35 |                 h = torch.cat((h_x_prev, h_y_prev), dim=1)
36 |                 h_x, c_x = cell_x(x_t, (h, c_x_prev))
37 |                 h_y, c_y = cell_y(y_t, (h, c_y_prev))
38 | 
39 |                 output_d[i][j][0].append(h_x)  # hidden for direction x
40 |                 output_d[i][j][0].append(c_x)  # cell for direction x
41 | 
42 |                 output_d[i][j][1].append(h_y)  # hidden for direction y
43 |                 output_d[i][j][1].append(c_y)  # cell for direction y
44 | 
45 | \end{lstlisting}
46 | 


--------------------------------------------------------------------------------
/TeXNotes/Parallel_Execution_of_DO_Loops/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Parallel_Execution_of_DO_Loops/main.pdf


--------------------------------------------------------------------------------
/TeXNotes/Parallel_Execution_of_DO_Loops/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Note for
 5 | \textit{\href{https://www.microsoft.com/en-us/research/wp-content/uploads/2016/12/The-Parallel-Execution-of-DO-Loops.pdf}
 6 | {The Parallel Execution of DO Loops}}}
 7 | \author{Ying Cao}
 8 | \date{\today}
 9 | 
10 | \begin{document}
11 | 
12 | \maketitle
13 | \tableofcontents
14 | 
15 | \section{Basics}
16 | \input{basics.tex}
17 | 
18 | \section{The hyperplane method}
19 | \input{hyperplane.tex}
20 | 
21 | {
22 | \small
23 | \raggedright
24 | \bibliographystyle{ieeetr}
25 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt
26 | \begin{spacing}{1}
27 | \bibliography{references.bib}
28 | \end{spacing}
29 | }
30 | \end{document}
31 | 


--------------------------------------------------------------------------------
/TeXNotes/Parallel_Execution_of_DO_Loops/references.bib:
--------------------------------------------------------------------------------
 1 | @article{lamport1974parallel,
 2 |   title={The parallel execution of do loops},
 3 |   author={Lamport, Leslie},
 4 |   journal={Communications of the ACM},
 5 |   volume={17},
 6 |   number={2},
 7 |   pages={83--93},
 8 |   year={1974},
 9 |   publisher={ACM New York, NY, USA}
10 | }
11 | @Misc{EliBendersky,
12 | howpublished = {\url{https://eli.thegreenplace.net/2018/affine-transformations/}},
13 | note = {Accessed Feburary 4, 2020},
14 | title = {Affine transformations},
15 | author = {Eli Bendersky}
16 | }
17 | @Misc{Diophantinewiki,
18 | howpublished = {\url{https://en.wikipedia.org/wiki/Diophantine_equation}},
19 | note = {Accessed Feburary 4, 2020},
20 | title = {Diophantine equation},
21 | author = {}
22 | }
23 | @inproceedings{irigoin1988supernode,
24 |   title={Supernode partitioning},
25 |   author={Irigoin, Fran{\c{c}}ois and Triolet, Remi},
26 |   booktitle={Proceedings of the 15th ACM SIGPLAN-SIGACT symposium on Principles of programming languages},
27 |   pages={319--329},
28 |   year={1988}
29 | }
30 | @article{wolf1991loop,
31 |   title={A loop transformation theory and an algorithm to maximize parallelism},
32 |   author={Wolf, Michael E and Lam, Monica S},
33 |   journal={IEEE Transactions on Parallel \& Distributed Systems},
34 |   number={4},
35 |   pages={452--471},
36 |   year={1991},
37 |   publisher={IEEE}
38 | }
39 | @book{mordell1969diophantine,
40 |   title={Diophantine equations},
41 |   author={Mordell, Louis Joel},
42 |   year={1969},
43 |   publisher={Academic Press}
44 | }
45 | 


--------------------------------------------------------------------------------
/code_reading/README.md:
--------------------------------------------------------------------------------
1 | [TBD]
2 | 


--------------------------------------------------------------------------------
/code_reading/pet/README.md:
--------------------------------------------------------------------------------
1 | - [Clang AST](clang_pulgin.md)
2 | - [basics](basics.md)
3 | - [extract SCoP](extract_scop.md)
4 | - [tree to SCoP](tree2scop.md)
5 | 


--------------------------------------------------------------------------------
/code_reading/pet/clang_pulgin.md:
--------------------------------------------------------------------------------
1 | See [this example](https://github.com/llvm/llvm-project/blob/master/clang/examples/PrintFunctionNames/PrintFunctionNames.cpp) to implement a Clang plugin.
2 | 
3 | # References
4 | 
5 | 1. [How to write RecursiveASTVisitor based ASTFrontendActions](https://clang.llvm.org/docs/RAVFrontendAction.html)
6 | 1. [Clang tutorial part 1: introduction](https://kevinaboos.wordpress.com/2013/07/23/clang-tutorial-part-i-introduction/)
7 | 1. [Clang Tutorial Part II: LibTooling Example](https://kevinaboos.wordpress.com/2013/07/23/clang-tutorial-part-i-introduction/)
8 | 


--------------------------------------------------------------------------------
/engineering_a_compiler/scanner.md:
--------------------------------------------------------------------------------
1 | <!-- TOC depthFrom:1 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 -->
2 | 
3 | - [Scanner](#scanner)
4 | 
5 | <!-- /TOC -->
6 | 
7 | # Scanner
8 | 


--------------------------------------------------------------------------------
/julia_learning_notes/Basics/03_JPL_metaprogramming.md:
--------------------------------------------------------------------------------
 1 | # [Metaprogramming](https://docs.julialang.org/en/release-0.4/manual/metaprogramming/)
 2 | 
 3 | * Julia represents its own code as a data structure of the language itself.
 4 |   * allow sophisticated code generation without extra build steps
 5 |   * allow true Lisp-style macros <span style="background-color:#A3D1D1;">_**operating at the level of abstract syntax trees**_</span>.
 6 |   * powerful [reflection](https://en.wikipedia.org/wiki/Reflection_%28computer_programming%29) capabilities
 7 | 
 8 | ## Program representation
 9 | 
10 | * every Julia program starts life as a string
11 | * parse (I understand this function as lexical analysis) each string into an object called an expression, represented by the Julia's type `Expr`.
12 |   * `Expr` objects contain three parts:
13 |     1. `Symbol`
14 |         * In the context of an expression, symbols are used to indicate access to variables.
15 |   * when an expression is evaluated, a symbol is replaced with the value bound to that symbol in the appropriate scope.
16 |     2. the expression arguments
17 |     3. the expression result type
18 | 


--------------------------------------------------------------------------------
/julia_learning_notes/Basics/Good_references.md:
--------------------------------------------------------------------------------
1 | 1. [The Julia Language Challenge](https://nextjournal.com/sdanisch/the-julia-challenge)
2 | 1. [The Julia Challenge in C++](https://medium.com/@wolfv/the-julia-challenge-in-c-21272d36c002)
3 | 


--------------------------------------------------------------------------------
/julia_learning_notes/Basics/Type_inference/README.md:
--------------------------------------------------------------------------------
1 | # Inference Convergence Algorithm
2 | 
3 | 1. [Inference Convergence](https://juliacomputing.com/blog/2016/04/04/inference-convergence.html)
4 | 1. [Inference Convergence Algorithm in Julia - Revisited](https://juliacomputing.com/blog/2017/05/15/inference-converage2.html)
5 | 1. [Notes](Type_inference.pdf)
6 | 


--------------------------------------------------------------------------------
/julia_learning_notes/Basics/Type_inference/Type_inference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/julia_learning_notes/Basics/Type_inference/Type_inference.pdf


--------------------------------------------------------------------------------
/julia_learning_notes/Basics/WhyJulia/README.md:
--------------------------------------------------------------------------------
 1 | 1. [Why Julia](https://ucidatascienceinitiative.github.io/IntroToJulia/Html/WhyJulia)
 2 | 1. [Notes](whyJulia.pdf)
 3 | 
 4 | ---
 5 | 
 6 | ### What is [Type-stable](https://docs.julialang.org/en/v1.2-dev/manual/faq/#man-type-stability-1)
 7 | 
 8 | _**The type of the output is predictable from the types of the inputs.**_ In particular, it means that the type of the output cannot vary depending on the values of the inputs.
 9 | 
10 | The following codes are type-unstable:
11 | 
12 | ```julia
13 | function unstable(flag::Bool)
14 |   if flag
15 |     return 1
16 |   else
17 |     return 1.0
18 |   end
19 | end
20 | ```
21 | 
22 | _**Julia can't predict the return type of function that is type unstable at compile-time, making generation of fast machine code difficult.**_
23 | 


--------------------------------------------------------------------------------
/julia_learning_notes/Basics/WhyJulia/whyJulia.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/julia_learning_notes/Basics/WhyJulia/whyJulia.pdf


--------------------------------------------------------------------------------
/julia_learning_notes/Basics/images/gpu_julia_packages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/julia_learning_notes/Basics/images/gpu_julia_packages.png


--------------------------------------------------------------------------------
/julia_learning_notes/README.md:
--------------------------------------------------------------------------------
1 | Learning notes of the Julia programming language.
2 | 


--------------------------------------------------------------------------------
/julia_learning_notes/Zygote/code_snippets/hello_world_test.jl:
--------------------------------------------------------------------------------
 1 | xs = [fill(1.1, 3), fill(2.2, 3)];
 2 | 
 3 | # using Pkg
 4 | # Pkg.activate(".")
 5 | 
 6 | # zero(x::Array{Float64,1}) = [zero(x) for x in x]
 7 | #
 8 | # function case1(xs)
 9 | #     h = xs[1][1]
10 | #     # sum(h)
11 | # end
12 | #
13 | # Zygote.gradient(case1, xs)
14 | 
15 | 
16 | using Pkg
17 | Pkg.activate(".")
18 | using Zygote
19 | 
20 | function case1(xs)
21 |     h = xs[1]
22 |     # for i in 2:length(xs)
23 |     #     h = h .* xs[i]
24 |     # end
25 |     sum(h)
26 | end
27 | @show case1(xs)
28 | 
29 | Zygote.gradient(case1, xs)
30 | 


--------------------------------------------------------------------------------
/julia_learning_notes/experiments/README.md:
--------------------------------------------------------------------------------
1 | For test only. The codes are not optimized and ugly.
2 | 


--------------------------------------------------------------------------------
/julia_learning_notes/experiments/lstm_test/README.md:
--------------------------------------------------------------------------------
1 | Extremely naive implementation. Codes are not optimized. They are ugly and redundant.
2 | For Test ONLY.
3 | 


--------------------------------------------------------------------------------
/julia_learning_notes/experiments/lstm_test/naive_cpu_test/README.md:
--------------------------------------------------------------------------------
1 | The codes are only tested under Julia v0.7-beta.
2 | 
3 | - [`@code_lowered`](docs/code_lowered.txt)
4 | - [`@code_typed`](docs/code_typed.txt)
5 | - [`@code_llvm`](docs/code_llvm.txt)
6 | - [`@code_native`](docs/code_native.txt)
7 | 


--------------------------------------------------------------------------------
/julia_learning_notes/experiments/lstm_test/naive_cpu_test/Recurrent/common.jl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env julia
 2 | 
 3 | abstract type NN
 4 | end
 5 | 
 6 | mutable struct Param
 7 |   n::Integer    # input size
 8 |   d::Integer    # output size
 9 |   w::AbstractArray{AbstractFloat}    # learnable weight matrix
10 |   dw::AbstractArray{AbstractFloat}   # gradients of learnable weight matrix
11 | 
12 |   Param(n::Integer) = new(n, n, randn(n, n), randn(n, n))
13 |   Param(n::Integer, d::Integer) = new(n, d, randn(n, d), randn(n, d))
14 |   Param(n::Integer, d::Integer, w::Array, dw::Array) = new(n, d, w, dw)
15 | end
16 | 
17 | randParam(n::Integer, d::Integer, std::Real=0.1) = Param(
18 |         n, d, randn(n, d) * std, zeros(n, d))
19 | onesParam(n::Integer, d::Integer) = Param(n, d, ones(n, d), zeros(n, d))
20 | 
21 | function softmax(m::Param)
22 |   out = Param(m.n, m.d)
23 |   maxval = maximum(m.w, 2)
24 |   out.w .= exp.(m.w .- maxval)
25 |   out.w ./= sum(out.w, 2)
26 |   return out
27 | end
28 | 
29 |  σ(x) = 1.0 / (1.0 + exp(-x))
30 | 


--------------------------------------------------------------------------------
/julia_learning_notes/experiments/lstm_test/naive_cpu_test/Recurrent/recurrent.jl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env julia
 2 | module Recurrent
 3 | 
 4 | export Param, randParam, onesParam, softmax, sigmoid
 5 | export LSTMCell, LSTM_forward
 6 | 
 7 | include("common.jl")
 8 | include("lstm.jl")
 9 | 
10 | end  # module
11 | 


--------------------------------------------------------------------------------
/julia_learning_notes/experiments/lstm_test/naive_cpu_test/cpu_test.jl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env julia
 2 | include("Recurrent/recurrent.jl")
 3 | using .Recurrent
 4 | 
 5 | srand(1)
 6 | 
 7 | const batch_size = 2
 8 | const seq_len = 3
 9 | const input_dim = 4
10 | const hidden_dim = 4
11 | 
12 | rand_inputs = randn(batch_size * seq_len, input_dim)
13 | 
14 | lstm_cell = LSTMCell(input_dim, hidden_dim)
15 | cell_state, hidden_states = LSTM_forward(rand_inputs, lstm_cell,
16 |                                          input_dim, hidden_dim, seq_len)
17 | 
18 | println("cell states :")
19 | display(cell_state)
20 | 
21 | println("\nhidden states :")
22 | display(hidden_states)
23 | 


--------------------------------------------------------------------------------
/julia_learning_notes/experiments/lstm_test/naive_gpu_test/Recurrent/recurrent.jl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env julia
 2 | module Recurrent
 3 | 
 4 | using CuArrays
 5 | 
 6 | include("lstm.jl")
 7 | 
 8 | export LSTMCell
 9 | export σ, lstm_forward
10 | 
11 | 
12 | end # module
13 | 


--------------------------------------------------------------------------------
/julia_learning_notes/experiments/lstm_test/naive_gpu_test/gpu_test.jl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env julia
 2 | include("Recurrent/recurrent.jl")
 3 | 
 4 | using .Recurrent
 5 | using CuArrays: CuArray
 6 | 
 7 | srand(1)
 8 | 
 9 | const batch_size = 512
10 | const seq_len = 10
11 | const input_dim = 512
12 | const hidden_dim = 512
13 | 
14 | rand_inputs_d = CuArray(randn(batch_size * seq_len, input_dim))
15 | lstm_cell = LSTMCell(input_dim, hidden_dim)
16 | 
17 | 
18 | for i = 1 : 10
19 |   cell_states, hidden_states = lstm_forward(rand_inputs_d, lstm_cell, seq_len)
20 | 
21 |   # println("cell states : ")
22 |   # display(cell_states)
23 | 
24 |   # println("hidden_states : ")
25 |   # display(hidden_states)
26 | end
27 | 


--------------------------------------------------------------------------------
/paper_notes/Diffusion/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 1. "Controlling Text-to-Image Diffusion by Orthogonal Finetuning"[[PDF]](https://arxiv.org/pdf/2306.07280.pdf)
3 | 1. LoRA
4 | 1. ControlNet


--------------------------------------------------------------------------------
/paper_notes/Template.md:
--------------------------------------------------------------------------------
 1 | <!-- TOC depthFrom:1 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 -->
 2 | 
 3 | - [Title](#title)
 4 | 	- [My Takeaways and Some Thoughts](#my-takeaways-and-some-thoughts)
 5 | 	- [Overall](#overall)
 6 | 		- [Problem Proposed](#problem-proposed)
 7 | 		- [Goal](#goal)
 8 | 		- [Approach](#approach)
 9 | 		- [Evaluation Methods in this Paper](#evaluation-methods-in-this-paper)
10 | 	- [Related Works Recommended for Further Reading](#related-works-recommended-for-further-reading)
11 | 	- [Detail](#detail)
12 | 		- [Chanllenges](#chanllenges)
13 | 		- [Proposed Solutions](#proposed-solutions)
14 | 	- [Reference](#reference)
15 | 
16 | <!-- /TOC -->
17 | 
18 | # Title
19 | 
20 | [link](https://arxiv.org/pdf/1812.01329.pdf)
21 | 
22 | ## My Takeaways and Some Thoughts
23 | 
24 | ## Overall
25 | 
26 | ### Problem Proposed
27 | 
28 | ### Goal
29 | 
30 | ### Approach
31 | 
32 | ### Evaluation Methods in this Paper
33 | 
34 | ## Related Works Recommended for Further Reading
35 | 
36 | ## Detail
37 | 
38 | ### Chanllenges
39 | 
40 | ### Proposed Solutions
41 | 
42 | ## Reference
43 | 


--------------------------------------------------------------------------------
/paper_notes/auto-diff/BP_and_implicit_function_theorem/README.md:
--------------------------------------------------------------------------------
 1 | # Backpropagation is not Just the Chain-Rule
 2 | 
 3 | ## Backprop and the Adjoint Method
 4 | 
 5 | 
 6 | 
 7 | ## Reference
 8 | 
 9 | 1. [Backprop is not just the chain rule](https://timvieira.github.io/blog/post/2017/08/18/backprop-is-not-just-the-chain-rule/)
10 | 1. [A new trick for calculating Jacobian vector products](https://j-towns.github.io/2017/06/12/A-new-trick.html)
11 | 1. [Mechanics of Lagrangians](http://www.argmin.net/2016/05/31/mechanics-of-lagrangians/)
12 | 1. [Mates of Costate](http://www.argmin.net/2016/05/18/mates-of-costate/)
13 | 1. [Black-box optimization](https://timvieira.github.io/blog/post/2018/03/16/black-box-optimization/)
14 | 


--------------------------------------------------------------------------------
/paper_notes/auto-diff/SCT_AD/README.md:
--------------------------------------------------------------------------------
 1 | 1. [Tapenade](Tapenade.md)
 2 |     - Project webpage: https://www-sop.inria.fr/tropics/tapenade.html
 3 |     - Paper: [The Tapenade Automatic Differentiation tool: principles, model, and specification](https://hal.inria.fr/hal-00913983/document)
 4 |     - [Slides](http://www-sop.inria.fr/tropics/Laurent.Hascoet/slidesLesHouches.pdf)
 5 | 1. [Myia](Automatic_Differentiation_in_Myia.md)
 6 |     - Github Project: https://github.com/mila-udem/myia
 7 |     - Paper: [Automatic Differentiation in Myia](https://openreview.net/pdf?id=S1hcluzAb)
 8 | 1. [Tagent](Tagent.md)
 9 |     - Github Project: https://github.com/google/tangent
10 |     - [Tangent: Source-to-Source Debuggable Derivatives](https://ai.googleblog.com/2017/11/tangent-source-to-source-debuggable.html)
11 | 1. [JAX](JAX.md)
12 |     - Github Project: https://github.com/google/jax
13 |     - [Compiling machine learning programs via high-level tracing](https://www.sysml.cc/doc/146.pdf)
14 | 1. [Zygote](Differentiating_SSA_form_program.md)
15 |     - Github Project: https://github.com/FluxML/Zygote.jl
16 |     - Paper: [Don't Unroll Adjoint: Differentiating SSA-Form Programs](https://arxiv.org/pdf/1810.07951.pdf)
17 | 
18 | 1. [DLVM: A modern compiler infrastructure for deep learning systems with adjoint code generation in a domain-speciﬁc IR](https://arxiv.org/pdf/1711.03016.pdf)
19 | 
20 | ---
21 | 
22 | ### Some Related Research Work
23 | 
24 | 1. [JANUS Fast and Flexible Deep Learning via Symbolic Graph Execution of Imperative Programs](JANUS.md)
25 | 


--------------------------------------------------------------------------------
/paper_notes/auto-diff/images/expression_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/auto-diff/images/expression_graph.png


--------------------------------------------------------------------------------
/paper_notes/auto-diff/images/mix_forward_and_reverse_mode_AD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/auto-diff/images/mix_forward_and_reverse_mode_AD.png


--------------------------------------------------------------------------------
/paper_notes/auto-diff/images/multidimensional_dual_numbers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/auto-diff/images/multidimensional_dual_numbers.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/CFG-optimizations/README.md:
--------------------------------------------------------------------------------
1 | # Reading List
2 | 
3 | - [ ] [Optimizing control flow in loops using interval and dependence analysis](https://www.ics.uci.edu/~givargis/pubs/J18.pdf)
4 | - [ ] [Control Flow Analysis Dragon Book Section 8.4](http://www.cs.ecu.edu/karl/5220/spr16/Notes/Optimization/controlflow.html)
5 | - [ ] [Assignment 2: Control Flow Optimization](http://aggregate.org/OC/s18a2.html)
6 | - [ ] [Lecture 4: Control Flow Optimization, COS 598C - Advanced Compilers](https://www.cs.princeton.edu/courses/archive/spr04/cos598C/lectures/04-ControlFlow-3x1.pdf)
7 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/CFG-optimizations/images/discussion-on-cfg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/CFG-optimizations/images/discussion-on-cfg.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Code-optimizations/images/dmxpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/dmxpy.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Code-optimizations/images/dmxpy.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/dmxpy.pptx


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Code-optimizations/images/excerpt-from-dmxpy-in-linpack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/excerpt-from-dmxpy-in-linpack.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Code-optimizations/images/manual-optimizations-for-dmxpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/manual-optimizations-for-dmxpy.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Code-optimizations/images/simple-version-of-dmxpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/simple-version-of-dmxpy.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Code-optimizations/images/strength-reduction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/strength-reduction.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Code-optimizations/local-optimizatioins.md:
--------------------------------------------------------------------------------
1 | # Local Optimizations
2 | 
3 | Programmers will protest that they do not write code that contains redundant. In practice, redundancy elimination ﬁnds many opportunities. ***Translation from source code to ir elaborates many details, such as address calculations, and introduces redundant expressions***.
4 | 
5 | ## Local Value Numbering (LVN)
6 | 
7 | ***Local value numbering*** is one of the oldest and most powerful redundency elimination.
8 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Compiler_and_Interpreter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Compiler_and_Interpreter.pdf


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Dependence_analysis/README.md:
--------------------------------------------------------------------------------
1 | TBD
2 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/contents.tex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/contents.tex


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Dependence Abstraction}
 5 | \author{Ying Cao}
 6 | \date{\today}
 7 | 
 8 | \begin{document}
 9 | 
10 | \maketitle
11 | \tableofcontents
12 | \newpage
13 | \input{contents.tex}
14 | 
15 | {
16 | \small
17 | \raggedright
18 | \bibliographystyle{ieeetr}
19 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt
20 | \begin{spacing}{1}
21 | \bibliography{references.bib}
22 | \end{spacing}
23 | }
24 | \end{document}
25 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/references.bib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/references.bib


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/AST-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/AST-example.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/DAG-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/DAG-example.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/IR-level-of-abstraction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/IR-level-of-abstraction.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/an-simple-example-of-dependency-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/an-simple-example-of-dependency-graph.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/dependence-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/dependence-graph.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/different-levels-of-abstraction-for-an-array-subscript-reference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/different-levels-of-abstraction-for-an-array-subscript-reference.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/naming-leads-to-different-translations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/naming-leads-to-different-translations.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/one-address-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/one-address-code.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Intermediate-Representations/images/three-address-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/three-address-code.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/Terminology.md:
--------------------------------------------------------------------------------
 1 | # Schedule
 2 | 
 3 | # Polyhedra/Polyhedron
 4 | 
 5 | # Dependence system
 6 | 
 7 | # Lexicographic order
 8 | 
 9 | If $R_1$ is a definition and $R_2$ a use, lexico-positive points of $\Pi$ correspond to data dependences from $S_1$ to $S_2$ (from write to read), while lexico-negative points correspond to anti dependences from $S_2$ to $S_1$ (from read to write).
10 | 
11 | # Legality of Unimodular Transformations
12 | 
13 | # Fully permutabiliy
14 | 
15 | # Wavefront transformation
16 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/a_single_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/a_single_rnn.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/bi_directional_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/bi_directional_rnn.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/bi_directional_rnn.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/bi_directional_rnn.pptx


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/different_dependences.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/different_dependences.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/dilated_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/dilated_rnn.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/example-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/example-01.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/grid_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/grid_rnn.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/polyhedral_representation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/polyhedral_representation.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/stack_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/stack_rnn.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/wh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/wh.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/workflow.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/dependence_analysis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/dependence_analysis.pdf


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Polyhedral_representation_in_Pet/references.bib:
--------------------------------------------------------------------------------
 1 | @article{lamport1974parallel,
 2 |   title={The parallel execution of do loops},
 3 |   author={Lamport, Leslie},
 4 |   journal={Communications of the ACM},
 5 |   volume={17},
 6 |   number={2},
 7 |   pages={83--93},
 8 |   year={1974},
 9 |   publisher={ACM New York, NY, USA}
10 | }
11 | @article{wolf1991loop,
12 |   title={A loop transformation theory and an algorithm to maximize parallelism},
13 |   author={Wolf, Michael E and Lam, Monica S},
14 |   journal={IEEE Transactions on Parallel \& Distributed Systems},
15 |   number={4},
16 |   pages={452--471},
17 |   year={1991},
18 |   publisher={IEEE}
19 | }
20 | @book{banerjee2013loop,
21 |   title={Loop parallelization},
22 |   author={Banerjee, Utpal},
23 |   year={2013},
24 |   publisher={Springer Science \& Business Media}
25 | }
26 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Polyhedral_representation_in_Pet/section1.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{Definitions and Notations in isl}
 2 | \begin{block}{Named Integer Tuples}
 3 | A \textit{named integer tuple} consists of an identifier (name) and a sequence of integer values.
 4 | The identifier may be omitted and the sequence of integers may have a zero length.
 5 | \end{block}
 6 | 
 7 | \item \textbf{Notation}:
 8 | 
 9 | \end{frame}
10 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/Polyhedral_representation_in_Pet/slides.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[10pt,aspectratio=43,mathserif]{beamer}
 2 | 
 3 | \input{structure.tex}
 4 | 
 5 | \title[] {Represent a Polyhedral Model using \textit{isl}}
 6 | \author{Ying Cao}
 7 | \institute {}
 8 | \date{\today}
 9 | 
10 | \begin {document}
11 | % \begin{changemargin}{0.2cm}{0.2cm}
12 | 
13 | \begin {frame}
14 | \titlepage
15 | \end {frame}
16 | 
17 | % \AtBeginSection[]
18 | % {
19 | %   \begin{frame}
20 | %     \frametitle{Outlines}
21 | %     \tableofcontents[
22 | %     %currentsection,
23 | %     % currentsubsection,
24 | %     hideallsubsections,
25 | %     sectionstyle= hide, % show
26 | %     % subsectionstyle=shaded  % show
27 | %     ]
28 | %   \end{frame}
29 | % }
30 | 
31 | \section{Definitions and notations in isl}
32 | \input{section1.tex}
33 | 
34 | % \section{Issues, Next Plans}
35 | % \input{plans.tex}
36 | 
37 | \section{References}
38 | \begin{frame}[allowframebreaks]{References}
39 | \bibliographystyle{ieeetr}
40 | \bibliography{references.bib}
41 | \end{frame}
42 | 
43 | % \end{changemargin}
44 | \end {document}
45 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/The_parallel_execution_of_do_loops.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/The_parallel_execution_of_do_loops.pdf


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Loop_analysis/auto-vectorization/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/auto-vectorization/README.md


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/contents.tex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/contents.tex


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Note for
 5 | \textit{\href{https://arxiv.org/pdf/1811.06043.pdf}{A performance vocabulary for affine loop transformations}}}
 6 | \author{Ying Cao}
 7 | \date{\today}
 8 | 
 9 | \begin{document}
10 | 
11 | \maketitle
12 | \tableofcontents
13 | \newpage
14 | \input{contents.tex}
15 | 
16 | {
17 | \small
18 | \raggedright
19 | \bibliographystyle{ieeetr}
20 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt
21 | \begin{spacing}{1}
22 | \bibliography{references.bib}
23 | \end{spacing}
24 | }
25 | \end{document}
26 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/references.bib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/references.bib


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Data_dependence_and_PIP/contents.tex:
--------------------------------------------------------------------------------
1 | \section{Formulation of the data dependence problem}
2 | 
3 | \section{PIP and its output}
4 | 
5 | \section{What can we do when we have complete and accurate data dependence?}
6 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Data_dependence_and_PIP/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Data dependence and PIP}
 5 | \author{Ying Cao}
 6 | \date{\today}
 7 | 
 8 | \begin{document}
 9 | 
10 | \maketitle
11 | \tableofcontents
12 | \newpage
13 | \input{contents.tex}
14 | 
15 | {
16 | \small
17 | \raggedright
18 | \bibliographystyle{ieeetr}
19 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt
20 | \begin{spacing}{1}
21 | \bibliography{references.bib}
22 | \end{spacing}
23 | }
24 | \end{document}
25 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Data_dependence_and_PIP/references.bib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Data_dependence_and_PIP/references.bib


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Mathmatical_foundations/concepts.md:
--------------------------------------------------------------------------------
 1 | # [Paritally Ordered Set]()
 2 | 
 3 | A _**partially ordered set**_ (_**also poset**_) formalizes and generalizes the intuitive concept of an _ordering_, _sequencing_, or _arrangement_ of the elements of a set. A poset consists of _**a set together with a binary relation**_ indicating that, for certain pairs of elements in the set, one of the elements precedes the other in the ordering. The _**relation itself is called a "partial order"**_.
 4 | 
 5 | The word partial in the names "partial order" and "partially ordered set" is used as an indication that _**not every pair of elements needs to be comparable**_. That is, there may be pairs of elements for which neither element precedes the other in the poset. Partial orders thus generalize total orders, in which every pair is comparable.
 6 | 
 7 | # [Lattice](https://en.wikipedia.org/wiki/Lattice_(order))
 8 | 
 9 | A lattice consists of a partially ordered set in which every two elements have a unique supremum (also called a least upper bound or join) and a unique infimum (also called a greatest lower bound or meet).
10 | 
11 | An example is given by the natural numbers, partially ordered by divisibility, for which the unique supremum is the least common multiple and the unique infimum is the greatest common divisor.
12 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/SCoP_decomposition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/SCoP_decomposition.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/a_skewing_transformation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/a_skewing_transformation.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/farkas_lemma.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/farkas_lemma.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/main.pdf


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Note for
 5 | \textit{\href{https://hal.inria.fr/inria-00001056/document}{More Legal Transformations for Locality}}}
 6 | \author{Ying Cao}
 7 | \date{\today}
 8 | 
 9 | \begin{document}
10 | 
11 | \maketitle
12 | \tableofcontents
13 | \newpage
14 | \input{contents.tex}
15 | 
16 | {
17 | \small
18 | \raggedright
19 | \bibliographystyle{ieeetr}
20 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt
21 | \begin{spacing}{1}
22 | \bibliography{references.bib}
23 | \end{spacing}
24 | }
25 | \end{document}
26 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/references.bib:
--------------------------------------------------------------------------------
 1 | @Misc{Bas12,
 2 |   author={Bastoul, C\'{e}dric},
 3 |   title={\textit{Contributions to High-Level Program Optimization}. {H}abilitation {T}hesis. {P}aris-{S}ud {U}niversity, {F}rance},
 4 |   month=dec,
 5 |   year=2012,
 6 | }
 7 | 
 8 | @PhdThesis{TBas,
 9 |   author={Bastoul, C\'{e}dric},
10 |   title={Improving Data Locality in Static Control Programs},
11 |   school={University Paris 6, Pierre et Marie Curie, France},
12 |   month=dec,
13 |   year=2004,
14 | }
15 | 
16 | @article{xue1997tiling,
17 |   title={\href{http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.26.9719&rep=rep1&type=pdf}{On tiling as a loop transformation}},
18 |   author={Xue, Jingling},
19 |   journal={Parallel Processing Letters},
20 |   volume={7},
21 |   number={04},
22 |   pages={409--424},
23 |   year={1997},
24 |   publisher={World Scientific}
25 | }
26 | @inproceedings{griebl1998code,
27 |   title={\href{https://www.infosun.fim.uni-passau.de/publications/docs/GLW98pact.pdf}{Code generation in the polytope model}},
28 |   author={Griebl, Martin and Lengauer, Christian and Wetzel, Sabine},
29 |   booktitle={Proceedings. 1998 International Conference on Parallel Architectures and Compilation Techniques (Cat. No. 98EX192)},
30 |   pages={106--111},
31 |   year={1998},
32 |   organization={IEEE}
33 | }
34 | @inproceedings{lengauer1993loop,
35 |   title={\href{http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.29.8716&rep=rep1&type=pdf}{Loop parallelization in the polytope model}},
36 |   author={Lengauer, Christian},
37 |   booktitle={International Conference on Concurrency Theory},
38 |   pages={398--416},
39 |   year={1993},
40 |   organization={Springer}
41 | }
42 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Polyhedral_representation/2d_puls_1.tex:
--------------------------------------------------------------------------------
 1 | \section{the $2\mathbf{d}+1$ representation}
 2 | 
 3 | Refer to paper \cite{girbal2006semi}\cite{vasilache2007scalable}\cite{bastoul2011openscop} for details.
 4 | 
 5 | \subsection {Classical loop transformations}
 6 | 
 7 | \begin{enumerate}
 8 |   \item only modif the iteration domain but do not affact the order in which
 9 |   statement instances are executed or the way arrays are accessed;
10 |   \begin{enumerate}
11 |     \item loop unrolling
12 |     \item strip-mining
13 |   \end{enumerate}
14 |   \item modifies both iteration domain and a schedule transformation;
15 |   \begin{enumerate}
16 |     \item tiling: a combination of strip-mining and loop interchange;
17 |   \end{enumerate}
18 |   \item modifies schedule;
19 |   \begin{enumerate}
20 |     \item shifting/pipelining
21 |   \end{enumerate}
22 |   \item modifies array subscripts
23 |   \begin{enumerate}
24 |     \item privatization
25 |   \end{enumerate}
26 |   \item only modifies the array declarations (data layout)
27 |   \begin{enumerate}
28 |     \item padding
29 |   \end{enumerate}
30 | \end{enumerate}
31 | 
32 | \subsection{Polyhedral model}
33 | 
34 | The polyhedral representation is a semantics-based representation instead of
35 | syntax-based representation. It clearly separates the four different types of
36 | actions performed by program transformations:
37 | 
38 | \begin{enumerate}
39 |   \item modification of the iteration domain (loop bounds and strides);
40 |   \item modification of the schedule of each individual statement;
41 |   \item modification of access functions (array subscripts)
42 |   \item modification of the data layout (array declarations)
43 | \end{enumerate}
44 | 
45 | Loop transformations are expressed as a "syntax-free" function compositions.
46 | 
47 | Aribitrarily complex compositions of classical transformations can be captured
48 | in one single transformation step of the polyhedral model.
49 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Polyhedral_representation/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{\href{https://link.springer.com/content/pdf/10.1007/s10766-006-0012-3.pdf}{Semi-Automatic Composition of Loop Transformations for Deep Parallelism and Memory Hierarchies}}
 5 | \author{Ying Cao}
 6 | \date{\today}
 7 | 
 8 | \begin{document}
 9 | 
10 | \maketitle
11 | \tableofcontents
12 | \newpage
13 | \input{2d_puls_1.tex}
14 | \input{schedule_tree.tex}
15 | 
16 | {
17 | \small
18 | \raggedright
19 | \bibliographystyle{ieeetr}
20 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt
21 | \begin{spacing}{1}
22 | \bibliography{references.bib}
23 | \end{spacing}
24 | }
25 | \end{document}
26 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Polyhedral_representation/schedule_tree.tex:
--------------------------------------------------------------------------------
1 | \section{Schedule Trees}
2 | Refer to paper \cite{grosser2014decoupled} \cite{verdoolaege2014schedule} for details.
3 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/README.md:
--------------------------------------------------------------------------------
 1 | # Polyhderal Compilation
 2 | 
 3 | ## Polyhedral Program Representation
 4 | 
 5 | 1. Bastoul, Cédric, et al. "[Putting polyhedral loop transformations to work](https://hal.inria.fr/file/index/docid/71681/filename/RR-4902.pdf)." International Workshop on Languages and Compilers for Parallel Computing. Springer, Berlin, Heidelberg, 2003.
 6 |     - Clan project: http://icps.u-strasbg.fr/~bastoul/development/clan/#DL
 7 |     - [Clan - a polyhedral representation extractor for high level programs](http://icps.u-strasbg.fr/~bastoul/development/clan/docs/clan.html)
 8 | 
 9 |     >_This is the paper for Clan, a tool to extract polyhedral program representation from C programs._
10 |     >
11 |     >_This paper answers the question of what a polyhedral program representation look like and helps get quick understandings and intuitions of basic concepts of polyhedral compilation and its workflow._
12 |     >
13 |     >_The paper goes through basic concepts, the whole workflow with polyhedral program representation a focus, and inputs, outputs, requirements and formulations for each step without digging into whys and hows._
14 | 
15 | 1. Verdoolaege, Sven, and Tobias Grosser. "[Polyhedral extraction tool](https://www.grosser.es/publications/grosser-2012-Polyhedral-Extraction-Tool-IMPACT.pdf)." Second International Workshop on Polyhedral Compilation Techniques (IMPACT’12), Paris, France. 2012.
16 |     - Pet project: https://github.com/Meinersbur/pet
17 |     - The doctoral thesis of Pet's author. Chapter 9 of: Grosser, Tobias. [A decoupled approach to high-level loop optimization: tile shapes, polyhedral building blocks and low-level compilers](https://tel.archives-ouvertes.fr/tel-01144563/document). Diss. 2014.
18 | 
19 | 1. [Polyhedral Process Networks](https://www.semanticscholar.org/paper/Polyhedral-Process-Networks-Verdoolaege/e8f64c573a680cddb6ede148c1778b94afb70830)
20 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Tiramisu/tiramisu_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Tiramisu/tiramisu_overview.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-01.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-02.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-03.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-04.png


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/contents.tex:
--------------------------------------------------------------------------------
1 | Refer to this tutorial\cite{verdoolaege2016presburger} for the first-hand knowledge.
2 | 
3 | Concepts are highlighted in \textcolor{vr}{violet red}, and its corresponding isl implementation is highlighted in \textcolor{og}{oliver green}.
4 | 
5 | \input{sets_and_maps.tex}
6 | \input{presburger_sets_and_relations.tex}
7 | \input{pw_quasi_affine.tex}
8 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/main.pdf


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{\textit{isl} Basics}
 5 | \author{Ying Cao}
 6 | \date{\today}
 7 | 
 8 | \begin{document}
 9 | 
10 | \maketitle
11 | \tableofcontents
12 | \newpage
13 | \input{contents.tex}
14 | 
15 | {
16 | \small
17 | \raggedright
18 | \bibliographystyle{ieeetr}
19 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt
20 | \begin{spacing}{1}
21 | \bibliography{references.bib}
22 | \end{spacing}
23 | }
24 | \end{document}
25 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/references.bib:
--------------------------------------------------------------------------------
1 | @article{verdoolaege2016presburger,
2 |   title={\href{https://lirias.kuleuven.be/retrieve/361209}{Presburger formulas and polyhedral compilation}},
3 |   author={Verdoolaege, Sven},
4 |   year={2016}
5 | }
6 | 


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_01.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_01.pdf


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_02.pdf


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_03.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_03.pdf


--------------------------------------------------------------------------------
/paper_notes/compiler-stuffs/intermediate-code-generation.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/intermediate-code-generation.md


--------------------------------------------------------------------------------
/paper_notes/data_processing_systems/README.md:
--------------------------------------------------------------------------------
1 | 1. Murray D G, Schwarzkopf M, Smowton C, et al. [Ciel: a universal execution engine for distributed data-flow computing](https://web.kaust.edu.sa/Faculty/MarcoCanini/classes/CS345/S19/papers/ciel.pdf)[C]//Proc. 8th ACM/USENIX Symposium on Networked Systems Design and Implementation. 2011: 113-126.
2 | 1. Malewicz, Grzegorz, et al. "[Pregel: a system for large-scale graph processing](https://www.researchgate.net/profile/James-Dehnert/publication/221257383_Pregel_A_system_for_large-scale_graph_processing/links/00b7d537c615821fa4000000/Pregel-A-system-for-large-scale-graph-processing.pdf)." Proceedings of the 2010 ACM SIGMOD International Conference on Management of data. 2010.
3 | 1. Rocklin M. [Dask: Parallel computation with blocked algorithms and task scheduling](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.825.5314&rep=rep1&type=pdf)[C]//Proceedings of the 14th python in science conference. Austin, TX: SciPy, 2015, 126.
4 | 1. Power R, Li J. [Piccolo: Building Fast, Distributed Programs with Partitioned Tables](https://static.usenix.org/events/osdi10/tech/full_papers/Power.pdf)[C]//OSDI. 2010, 10: 293-306.
5 | 


--------------------------------------------------------------------------------
/paper_notes/data_processing_systems/figures/ciel_cluster_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/data_processing_systems/figures/ciel_cluster_architecture.png


--------------------------------------------------------------------------------
/paper_notes/data_processing_systems/figures/dynamic-task-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/data_processing_systems/figures/dynamic-task-graph.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/README.md:
--------------------------------------------------------------------------------
 1 | # Reading List
 2 | 
 3 | ## Dataflow architecutre
 4 | 
 5 | - [ ] [Timely Dataflow: A model](https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/43546.pdf)
 6 | - [ ] [Dataflow computers: their history and future](https://csrl.cse.unt.edu/kavi/Research/encyclopedia-dataflow.pdf)
 7 | - [ ] [Reducing control overhead in dataflow architectures](http://arcade.cs.columbia.edu/ws-pact06.pdf)
 8 | 
 9 | ## Scheduled Dataflow
10 | 
11 | - [ ] [Scheduled dataflow: execution paradigm, architecture, and performance evaluation](https://www.researchgate.net/profile/Roberto_Giorgi/publication/3044386_Scheduled_Dataflow_Execution_paradigm_architecture_and_performance_evaluation/links/0912f50c049bbceb3c000000/Scheduled-Dataflow-Execution-paradigm-architecture-and-performance-evaluation.pdf?origin=publication_detail)
12 | 
13 | ## Some slides
14 | 
15 | - [ ] [Dataflow architectures](https://homes.cs.washington.edu/~kstrauss/presentations/df-class.pdf)
16 | - [ ] [Computer Architecture: Dataflow (Part I)](https://www.archive.ece.cmu.edu/~ece740/f13/lib/exe/fetch.php?media=onur-740-fall13-module5.2.1-dataflow-part1.pdf)
17 | 
18 | ## Miscellanea
19 | 
20 | 1. [mueller's publicatioins](https://arcb.csc.ncsu.edu/~mueller/publications.html#mueller91)
21 | 1. [ECE 4530 Hardware/Software Codesign](https://schaumont.dyn.wpi.edu/ece4530f19/)
22 | 1. [dMazeRunner: Executing Perfectly Nested Loops on Dataflow Accelerators](https://dl.acm.org/doi/pdf/10.1145/3358198)
23 | 


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/Manchester-dynamic-dataflow-machine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/Manchester-dynamic-dataflow-machine.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/dataflow-accumulator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/dataflow-accumulator.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/dataflow-graph-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/dataflow-graph-1.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/ill-formed_multi-rate-dataflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/ill-formed_multi-rate-dataflow.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/img1.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/img2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/img2.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/paper-screenshot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/paper-screenshot-1.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/periodic_admissible_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/periodic_admissible_schedule.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/solve_G.q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/solve_G.q.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/static-dataflow-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/static-dataflow-architecture.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/topology_matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/topology_matrix.png


--------------------------------------------------------------------------------
/paper_notes/dataflow-architectures/images/two-input_add_actor_and_two-output_duplicate_actor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/two-input_add_actor_and_two-output_duplicate_actor.png


--------------------------------------------------------------------------------
/paper_notes/dl-compiler/Glow/Glow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/Glow/Glow.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-compiler/Glow/images/low-level-glow-ir.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/Glow/images/low-level-glow-ir.png


--------------------------------------------------------------------------------
/paper_notes/dl-compiler/MLIR/MLIR.md:
--------------------------------------------------------------------------------
 1 | # MLIR
 2 | 
 3 | "flow-sensitive" type checking.
 4 | 
 5 | 
 6 | # Reference
 7 | 
 8 | 1. [Multi-Level Intermediate Representation Overview](https://github.com/tensorflow/mlir#multi-level-intermediate-representation-overview)
 9 | 1. [MLIR Tutorial: Building a Compiler with MLIR](https://llvm.org/devmtg/2019-04/slides/Tutorial-AminiVasilacheZinenko-MLIR.pdf)
10 | 1. [A discussion about MLIR from the TVM community](https://discuss.tvm.ai/t/google-lasted-work-mlir-primer/1721/2).
11 | 1. [A Reddit discussion about MLIR](https://www.reddit.com/r/ProgrammingLanguages/comments/at0alm/mlir_primer_a_compiler_infrastructure_for_the_end/)
12 | 1. [2019 EuroLLVM Developers’ Meeting: “MLIR: Multi-Level Intermediate  Representation”](https://www.youtube.com/watch?v=qzljG6DKgic&feature=youtu.be)
13 | 


--------------------------------------------------------------------------------
/paper_notes/dl-compiler/MLIR/README.md:
--------------------------------------------------------------------------------
1 | # References
2 | 
3 | 1. [The github project](https://github.com/tensorflow/mlir)
4 | 1. Tow vedios on Youtube
5 |     1. [2019 EuroLLVM Developers’ Meeting: Mehdi & Vasilache & Zinenko “Building a Compiler with MLIR”](https://www.youtube.com/watch?v=cyICUIZ56wQ)
6 | 


--------------------------------------------------------------------------------
/paper_notes/dl-compiler/MLIR/swift_for_tensorflow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/MLIR/swift_for_tensorflow.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-compiler/TVM/TVM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/TVM/TVM.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-compiler/XLA/XLA.md:
--------------------------------------------------------------------------------
 1 | # XLA: The TensorFlow compiler framework
 2 | 
 3 | * [TensorFlow, Compiled!](https://autodiff-workshop.github.io/slides/JeffDean.pdf)
 4 | 
 5 | ## Goals
 6 | 
 7 | 1. Improved execution speed.
 8 | 1. Improved tensor buffer memory usage.
 9 | 1. Make the performance of low-level Ops be the same as that of hand-written fused implementations.
10 | 1. Improved mobile footprint. Eliminate the TensorFlow runtime.
11 | 1. Improved protability.
12 |     * It should be relatively easy to write a new back-end for novel hardware.
13 | 
14 | ## XLA
15 | 
16 | * The semantics of operations are _**high level**_. This preserves enough information to allow sophisticated scheduling and optimization.
17 | 
18 | ![](https://www.tensorflow.org/images/how-does-xla-work.png)
19 | 
20 | * XLA program = static, decomposed TF ops
21 |   * math-looking _**primitive ops**_
22 |   * make _**macro-ops by composition**_
23 | 
24 | ### A key question: why write every new macro-op?
25 | 
26 | * Why write every new macro-op in C++?
27 | * Why can't compose new operators out of existing TF ops?
28 | 
29 | ### Compliation benefits
30 | 
31 | 1. Eliminates op dispatch overhead.
32 | 1. Fuses ops.
33 |     * reduce memory access
34 | 1. Memory usage analysis
35 |     * reuse memory
36 |     * update in-place
37 | 1. Models to executables: reduce executable size by generating what you need.
38 | 


--------------------------------------------------------------------------------
/paper_notes/dl-compiler/figures/sm-and-sub-core-of-volta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/figures/sm-and-sub-core-of-volta.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/beyond_transformer/README.md:
--------------------------------------------------------------------------------
1 | 1. [VanillaNet: the Power of Minimalism in
2 | Deep Learning](https://arxiv.org/pdf/2305.12972.pdf)
3 | 1. [RWKV: Reinventing RNNs for the Transformer Era](https://arxiv.org/abs/2305.13048)
4 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/miscellanea/Geometric_deep_learning.md:
--------------------------------------------------------------------------------
1 | # [Geometric Deep Learning: Grids, Groups, Graphs, Geodesics, and Gauges](https://arxiv.org/pdf/2104.13478.pdf)
2 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/miscellanea/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/miscellanea/README.md


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/CW-RNN/A_Clockwork_RNN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/CW-RNN/A_Clockwork_RNN.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/GridLSTM/GridLSTM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/GridLSTM/GridLSTM.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/HM-LSTM/Hierarchical_multiscale_RNN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/HM-LSTM/Hierarchical_multiscale_RNN.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/How_Much_Attention_Do_You_Need.md:
--------------------------------------------------------------------------------
1 | # [How Much Attention Do you need](http://aclweb.org/anthology/P18-1167)
2 | 
3 | 1. The performance of recurrent and convolutional models can be very close to the Transformer performance by borrowing concepts from the Transformer architecture, but not using self-attention.
4 | 1. Self-attention is much more important for the encoder side than for the decoder side.
5 |     * In the encoder side, self-attention can be replaced by a RNN or CNN without a loss in performance in most settings.
6 |     * One surpising experimental result is even a model without any target side self-attention performs well.
7 | 1. Source attention on lower encoder layers brings no additional benefit.
8 | 1. The largest gains come from multiple attention mechanisms and residual feed-forward layers.
9 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/MD-LSTM/MD-LSTM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/MD-LSTM/MD-LSTM.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/images/MogrifierLSTM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/images/MogrifierLSTM.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/main.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Note for
 5 | \textit{\href{https://arxiv.org/abs/1909.01792}{Mogrifier LSTM}}}
 6 | \author{Ying Cao}
 7 | \date{\today}
 8 | 
 9 | \begin{document}
10 | 
11 | \maketitle
12 | \tableofcontents
13 | 
14 | \begin{info}[Codes information]
15 | 
16 | \begin{itemize}
17 | \item Currently, the authors of this paper only release their \href{https://github.com/RMichaelSwan/MogrifierLSTM}{experimental codes}
18 | on the github.
19 | \item The final codes are not released yet. When the codes is available, it should
20 | be at \href{https://github.com/deepmind/lamb}{https://github.com/deepmind/lamb}.
21 | \end{itemize}
22 | \end{info}
23 | 
24 | \input{contents.tex}
25 | 
26 | {
27 | \small
28 | \raggedright
29 | \bibliographystyle{ieeetr}
30 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt
31 | \begin{spacing}{1}
32 | \bibliography{references.bib}
33 | \end{spacing}
34 | }
35 | \end{document}
36 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/references.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{foerster2017input,
 2 |   title={Input switched affine networks: an RNN architecture designed for interpretability},
 3 |   author={Foerster, Jakob N and Gilmer, Justin and Sohl-Dickstein, Jascha and Chorowski, Jan and Sussillo, David},
 4 |   booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70},
 5 |   pages={1136--1145},
 6 |   year={2017},
 7 |   organization={JMLR. org}
 8 | }
 9 | @article{ha2016hypernetworks,
10 |   title={Hypernetworks},
11 |   author={Ha, David and Dai, Andrew and Le, Quoc V},
12 |   journal={arXiv preprint arXiv:1609.09106},
13 |   year={2016}
14 | }
15 | @article{krause2016multiplicative,
16 |   title={Multiplicative LSTM for sequence modelling},
17 |   author={Krause, Ben and Lu, Liang and Murray, Iain and Renals, Steve},
18 |   journal={arXiv preprint arXiv:1609.07959},
19 |   year={2016}
20 | }
21 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Neural_Speed_Reading_via_Skim_RNN/Neural_Speed_Reading_via_Skim_RNN.md:
--------------------------------------------------------------------------------
1 | [The Gumbel-Max Trick for Discrete Distributions](https://hips.seas.harvard.edu/blog/2013/04/06/the-gumbel-max-trick-for-discrete-distributions/)
2 | 
3 | [Categorical Variational Autoencoders using Gumbel-Softmax](https://blog.evjang.com/2016/11/tutorial-categorical-variational.html)
4 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/ON-LSTM/ON-LSTM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/ON-LSTM/ON-LSTM.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Quasi-Recurrent_neural_network/Quasi-Recurrent_neural_network.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Quasi-Recurrent_neural_network/Quasi-Recurrent_neural_network.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/RNN_Variants.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/RNN_Variants.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/images/CudnnLSTM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/images/CudnnLSTM.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/images/Recurrent_neural_network_unfold.svg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/images/Recurrent_neural_network_unfold.svg.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Sliced_Recurrent_Neural_Networks/Sliced_Recurrent_Neural_Networks.md:
--------------------------------------------------------------------------------
 1 | # [Sliced Recurrent Neural Networks](https://arxiv.org/abs/1807.02291)
 2 | 
 3 | ## Model Structure
 4 | 
 5 | 2 hyperparameters of SRNN
 6 | 
 7 | 1. slice number $n$
 8 | 1. slicing times $k$
 9 | 
10 | The input sequence is $X = [x_1, x_2, ..., x_t]$ whose length is $T$.
11 | 
12 | 1. Slice $X$ into $n$ subsequences of equal length.
13 | 1. Repeat the above process $k$ times until a pre-defined minimum sequence length is obtained.
14 | 1. Apply RNN function to each subsequence.
15 | 
16 | <p align="center">
17 | <img src="images/SRNN.png" width=70%>
18 | </p>
19 | 
20 | ## My Comments
21 | 
22 | Personally, I don't think this work is interesting, for the following reasons:
23 | 
24 | 1. SRNN cannot be applied to sequence labeling tasks. How to use it to sequence to sequence models are not clear and not studied.
25 |     * It is only evaluated in text classification (sentiment classification). Text classification is a simple task in the NLP field. Sometimes it does not require "understanding the semantics of the language" (A good sentiment analysis model does need to understand the semantics, which is also the core challenge in NLP field. Whether SRNN shows some advantages over modeling semantics or not requires more careful evaluation). The model can achieve high accuracy by overfitting or capturing some statistical significance of training data.
26 |     * SRNN even cannot be directly used in an RNN LM.
27 |     * The evaluation is not enough.
28 | 1. SRNN cannot be stacked for multiple layers which are very important in RNN modeling. If there is only one RNN unit, the state transition between the current and previous state is shallow.
29 | 1. SRNN is not novel. How it works is hugely like recursive neural networks which are proposed by Socher several years ago. I don't think it makes new contributions. The work is not reliable.
30 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Sliced_Recurrent_Neural_Networks/images/SRNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Sliced_Recurrent_Neural_Networks/images/SRNN.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/The_Unreasonable_Effectiveness_of_the_Forget_Gate/The_Unreasonable_Effectiveness_of_the_Forget_Gate.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/The_Unreasonable_Effectiveness_of_the_Forget_Gate/The_Unreasonable_Effectiveness_of_the_Forget_Gate.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Training_RNNs_as_Fast_as_CNNs/Training_RNNs_as_Fast_as_CNNs.md:
--------------------------------------------------------------------------------
 1 | # [Training RNNs as Fast as CNNs](https://arxiv.org/abs/1709.02755)
 2 | 
 3 | ## Model
 4 | 
 5 | ### Main motivations
 6 | 
 7 | 1. _**process the input at each step independently of the other inputs.**_
 8 | 1. _**do the recurrent combination with relatively lightweight computaion (element-wise operations that can be fused into a single kernel function call).**_
 9 | 
10 | ### Euqatioins of Simple Recurrent Units (SRU)
11 | 
12 | * _**linear transformation of the input**_
13 |   $$ \mathbf{\tilde{x}}_t = \mathbf{W}\mathbf{x}_t $$
14 | 
15 | * _**forget gate**_
16 | 
17 |   $$ \mathbf{f}_t = \sigma(\mathbf{W}_f\mathbf{x}_t + \mathbf{b}_f) $$
18 | 
19 | * _**reset gate**_
20 |   $$ \mathbf{r}_t = \sigma(\mathbf{W}_r\mathbf{x}_t + \mathbf{b}_r) $$
21 | 
22 | * _**internal state**_
23 | 
24 |   $$ \mathbf{c}_t = \mathbf{f}_t \odot \mathbf{c}_{t - 1} + (\mathbf{1} - \mathbf{f}_t) \odot \mathbf{\tilde{x}}_t $$
25 | 
26 | * _**output state**_
27 | 
28 |   $$ \mathbf{h}_t = \mathbf{r}_t \odot g(\mathbf{c_t}) + (\mathbf{1} - \mathbf{r}_t \odot \mathbf{x}_t)$$
29 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Training_RNNs_as_Fast_as_CNNs/Training_RNNs_as_Fast_as_CNNs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Training_RNNs_as_Fast_as_CNNs/Training_RNNs_as_Fast_as_CNNs.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Transformer/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Transformer/README.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/Transformer/images/QK.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Transformer/images/QK.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/WaveRNN.md:
--------------------------------------------------------------------------------
1 | # [Efficient Neural Audio Synthesis](https://arxiv.org/pdf/1802.08435.pdf)
2 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/2d_lstm_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/2d_lstm_1.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/3D-GridLSTM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/3D-GridLSTM.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/CWRNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/CWRNN.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/CWRNN.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/CWRNN.pptx


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/DilatedRNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/DilatedRNN.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/DilatedRNN1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/DilatedRNN1.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/GridLSTM-NMT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/GridLSTM-NMT.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/HM-LSTM-pre-activation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/HM-LSTM-pre-activation.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/LSTM_equation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/LSTM_equation.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/active_modules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/active_modules.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/active_modules.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/active_modules.pptx


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/boundary_state.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/boundary_state.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/boundary_state.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/boundary_state.pptx


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/hardsigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/hardsigmoid.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/hm-lstm-cell-update.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/hm-lstm-cell-update.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/hm-lstm-output-hidden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/hm-lstm-output-hidden.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/multi-dimensioanl-rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/multi-dimensioanl-rnn.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/multi-dimensional-multi-directional-context.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/multi-dimensional-multi-directional-context.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/wh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/wh.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/RNN-modeling/images/wh.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/wh.pptx


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/ALBERT.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/ALBERT.md


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/XLNet.md:
--------------------------------------------------------------------------------
1 | # [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237)
2 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/ELMo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/ELMo.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/GPT-auxiliary-training-object.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/GPT-auxiliary-training-object.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/STLR-figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/STLR-figure.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/ULM-FiT-STLR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/ULM-FiT-STLR.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/biLM-ELMo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/biLM-ELMo.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/dataset-ULM-FiT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/dataset-ULM-FiT.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/elmo-vectors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/elmo-vectors.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/example-language-inference.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/example-language-inference.jpg


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/example-of-auxiliary-prediction-taks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/example-of-auxiliary-prediction-taks.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM1.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM2.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM3.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-input.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/highway.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highway.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/highway2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highway2.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/how-bert-comes-out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/how-bert-comes-out.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/input-of-bert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/input-of-bert.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/intro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/intro.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/task-specific-input-transformation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/task-specific-input-transformation.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/images/transformer-block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/transformer-block.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/pre-training/learning-language-representation-slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/learning-language-representation-slides.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-models/nlp/x-former/README.md:
--------------------------------------------------------------------------------
1 | 1. [The Transformer Family](https://lilianweng.github.io/lil-log/2020/04/07/the-transformer-family.html)
2 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/structured_state_space_models/README.md:
--------------------------------------------------------------------------------
 1 | # Paper
 2 | 
 3 | 1. [Hungry Hungry Hippos: Towards Language Modeling with State Space Models](https://arxiv.org/abs/2212.14052)
 4 | 1. [Efficiently Modeling Long Sequences with Structured State Spaces](https://arxiv.org/abs/2111.00396)
 5 | 
 6 | # Blogs
 7 | 
 8 | 1. [Structured State Spaces for Sequence Modeling (S4)](https://hazyresearch.stanford.edu/blog/2022-01-14-s4-1)
 9 | 1. [Simplifying S4](https://hazyresearch.stanford.edu/blog/2022-06-11-simplifying-s4)
10 | 1. [The Annotated S4](https://srush.github.io/annotated-s4/)
11 | 1. [H3: Language Modeling with State Space Models and (Almost) No Attention](https://hazyresearch.stanford.edu/blog/2023-01-20-h3)
12 | 
13 | 1. Simplified State Space Layers for Sequence Modeling (S5): [[知乎]](https://zhuanlan.zhihu.com/p/616238788)
14 | 1. [Mamba: S4史诗级升级](https://zhuanlan.zhihu.com/p/661237120)
15 | 1. [VQ一下Key，Transformer的复杂度就变成线性了](https://kexue.fm/archives/9844)


--------------------------------------------------------------------------------
/paper_notes/dl-models/structured_state_space_models/maba/README.md:
--------------------------------------------------------------------------------
 1 | # MABA
 2 | 
 3 | 序列建模的一个根本性问题是将context压缩为state。LTI（Linear Time Invariant）：线性时不变系统，参数与输入无关，CNN和RNN模型都可以认为是LTI。**而attention的成功arguably地认为system的dynamics是data dependent**。计算attention时，QKV序列的token之间会进行交互。
 4 | 
 5 | RNN将上下文压缩进有限长度的state，相比之下，attention完全不压缩context。autoagressive模式预测时，压缩context到固定长度的状态，决定了RNN在时间和空间上都是高效的，而attention要保留所有的context不进行压缩，计算和空间都是不高效的。
 6 | 
 7 | RNN模型的有效性受到how well the context is compresed的影响。
 8 | 
 9 | # Reference
10 | 
11 | 1. Mamba: Linear-Time Sequence Modeling with Selective State Spaces: [[paper]](https://arxiv.org/pdf/2312.00752.pdf) [[codes]](https://github.com/state-spaces/mamba)[open review](https://openreview.net/forum?id=AL1fq05o7H)
12 | 1. [Transformer Quality in Linear Time](https://arxiv.org/pdf/2202.10447.pdf)
13 | 1. [Mamba - a replacement for Transformers?](https://www.youtube.com/watch?v=ouF-H35atOY)
14 | 1. [Legendre Memory Units: Continuous- Time Representation in Recurrent Neural Networks](https://proceedings.neurips.cc/paper/2019/file/952285b9b7e7a1be5aa7849f32ffff05-Paper.pdf)
15 | 1. [Combining recurrent, convolutional, and continuous-time models with linear state-space layers](https://arxiv.org/pdf/2110.13985.pdf)
16 | 
17 | 几篇与RNN有关的工作
18 | 
19 | 1. Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention [[youtube vedio]](https://www.youtube.com/watch?v=hAooAOFRsYc)
20 | 1. Retentive Network: A Successor to Transformer for Large Language Models [[youtube vedio]](https://www.youtube.com/watch?v=ec56a8wmfRk)
21 | 1. RWKV: Reinventing RNNs for the Transformer Era [[youtube vedio]](https://www.youtube.com/watch?v=x8pW19wKfXQ)


--------------------------------------------------------------------------------
/paper_notes/dl-models/vision/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/vision/README.md


--------------------------------------------------------------------------------
/paper_notes/dl-models/vision/ResNeXt/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/vision/ResNeXt/README.md


--------------------------------------------------------------------------------
/paper_notes/dl-models/vision/SSD/images/SSD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/vision/SSD/images/SSD.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/vision/SSD/images/SSD2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/vision/SSD/images/SSD2.png


--------------------------------------------------------------------------------
/paper_notes/dl-models/vision/SqueezeNet/README.md:
--------------------------------------------------------------------------------
1 | 
2 | Identify a CNN architecture with fewer parameters, but equivalent accuracy.
3 | 
4 | ## Related works
5 | 
6 | * CNN microarchitecture.
7 | 
8 |   * With the trend of designing very deep CNNs, it becomes cumbersome to manually select filter dimensions for each layer.
9 | 


--------------------------------------------------------------------------------
/paper_notes/dl-models/vision/Xception/README.md:
--------------------------------------------------------------------------------
 1 | #### Background
 2 | 
 3 | - LeNet style models
 4 |     - simple stacks of convolutions for feature extraction and max-pooling operations for spatial sub-sampling.
 5 |     - refine into AlexNet
 6 |         - convolution operations were being repeated multiple times in-between max-pooling.
 7 |     - this style network goes deeper: a refined version: VGG
 8 | - a new style architecture: Inception architecture
 9 |     - inspired by [network-int-network](https://arxiv.org/abs/1312.4400)
10 |     - architectures:
11 |         - GoogleLeNet : InceptionV1
12 |         - InceptionV2 : [Batch normalization: Accelerating
13 | deep network training by reducing internal covariate shift](https://arxiv.org/abs/1502.03167)
14 |         - InceptionV3 : [Rethinking the inception architecture for computer vision](https://arxiv.org/abs/1512.00567)
15 |         - Inception-ResNet : [Inception-v4,
16 | inception-resnet and the impact of residual connections on
17 | learning](https://arxiv.org/abs/1602.07261)
18 | 
19 | #### Inception hypothesis
20 | 
21 | >cross-channel correlations and spatial correlations are sufficiently decoupled that it is
22 | preferable not to map them jointly.
23 | 
24 | - A convolution layer attempts to learn filters in a 3D space, with 2 spatial dimensions (width and height) and a channel dimension
25 | - thus a single convolution kernel is tasked with **simultaneously mapping cross-channel correlations and spatial correlations**.
26 | - make this process easier and more efficient by explicitly factoring it into a series of operations that would **independently** look at (1) cross-channel correlations and at (2) spatial correlations.
27 | 


--------------------------------------------------------------------------------
/paper_notes/dl-systems/A_computational_model_for_TensorFlow.md:
--------------------------------------------------------------------------------
 1 | <!-- TOC depthFrom:1 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 -->
 2 | 
 3 | - [A Computational Model for TensorFlow](#a-computational-model-for-tensorflow)
 4 | - [My Takeaways](#my-takeaways)
 5 | - [References](#references)
 6 | 
 7 | <!-- /TOC -->
 8 | 
 9 | # A Computational Model for TensorFlow
10 | 
11 | [link](http://delivery.acm.org/10.1145/3090000/3088527/pldiws17mapl-maplmainid2.pdf?ip=167.220.233.19&id=3088527&acc=OA&key=9625D833D637755D%2E9625D833D637755D%2E49016EFF332EB890%2E5945DC2EABF3343C&__acm__=1562203775_b4b8c2917e3b8a8dc79240c123a435f0)
12 | 
13 | # My Takeaways
14 | 
15 | 1. TensorFlow's design has root in _**a long line of work: dataflow system that includes much deeper and harder results**_. See reference [1](#References) and [2](#References)
16 | 
17 | # References
18 | 
19 | 1. [A relational model of non-deterministic dataﬂow](https://www.cl.cam.ac.uk/~gw104/journalbib.pdf)
20 | 1. A fully abstract trace model for dataﬂow and asynchronous networks
21 | 


--------------------------------------------------------------------------------
/paper_notes/dl-systems/Beyond_Data_and_Model_Parallelism_for_Deep_Neural_Networks.md:
--------------------------------------------------------------------------------
1 | # [Beyond Data and Model Parallelism for Deep Neural Networks](https://www.sysml.cc/doc/2019/16.pdf)
2 | 


--------------------------------------------------------------------------------
/paper_notes/dl-systems/Cavs_An_Efficient_Runtime_System_for_Dynamic_Neural_Networks.md:
--------------------------------------------------------------------------------
1 | # [Cavs: An Efficient Runtime System for Dynamic Neural Networks](https://www.usenix.org/system/files/conference/atc18/atc18-xu-shizhen.pdf)
2 | 


--------------------------------------------------------------------------------
/paper_notes/dl-systems/images/Pydron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/Pydron.png


--------------------------------------------------------------------------------
/paper_notes/dl-systems/images/SSA_translation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/SSA_translation.png


--------------------------------------------------------------------------------
/paper_notes/dl-systems/images/functioin_call_translation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/functioin_call_translation.png


--------------------------------------------------------------------------------
/paper_notes/dl-systems/images/tf_eager_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/tf_eager_01.png


--------------------------------------------------------------------------------
/paper_notes/dl-systems/images/tf_eager_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/tf_eager_02.png


--------------------------------------------------------------------------------
/paper_notes/dl-systems/images/tf_eager_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/tf_eager_03.png


--------------------------------------------------------------------------------
/paper_notes/dl-systems/tf-cfg-design/Deep_learning_with_dynamic_computation_graphs.md:
--------------------------------------------------------------------------------
 1 | # [Deep Learning with Dynamic Computation Graphs](https://arxiv.org/abs/1702.02181)
 2 | 
 3 | ## Problem proposed in this paper
 4 | 
 5 | Create single static graphs that emulate dynamic computation graphs of arbitrary shape and size.
 6 | 
 7 | ## Dynamic batching
 8 | 
 9 | TF fold is a high-level library that provides:
10 | - _**compositional blocks**_: sub-graph. This is to simplify the creation of dynamic graph models.
11 | - _**batch-wise**_ parallel implementations for a variety of models.
12 | 
13 | This paper proposes the dynamic batching approach.
14 | 
15 | 1. The dynamic batching algorithm is implemented as graph rewriting.
16 | 1. Inputs are described as computation graphs. Dynamic batching takes DAG as its input.
17 | 1. Schedule on sub-graph, not operations.
18 | 1. batch both computation and input data
19 |     - Nodes(operations) with the same height are independent that can be batched together.
20 |     - `gather`, `concatenate`, etc. are inserted to collect input data. Correspondingly, `scatter`, `split`, etc. are inserted in gradient computation.
21 | 1. use `tf.while_op` to iterate over depth which relies on input data.
22 | 


--------------------------------------------------------------------------------
/paper_notes/dl-systems/tf-cfg-design/tf-while-op-impl.md:
--------------------------------------------------------------------------------
 1 | two flavours of control flow constructs.
 2 | 
 3 | 1. functional ops
 4 | 1. low-level primitive
 5 | 
 6 | 
 7 | 
 8 | # Reference
 9 | 
10 | 1. [Update in TF 2.0: Functional while_loop](https://github.com/tensorflow/community/blob/master/rfcs/20180821-differentiable-functional-while.md)
11 | 


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/Optimizing_RNN_performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/Optimizing_RNN_performance.pdf


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/multiple_layer_optimization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/multiple_layer_optimization.png


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/pic1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/pic1.png


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/pic2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/pic2.png


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/single_cell_optimization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/single_cell_optimization.png


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/single_layer_optimization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/single_layer_optimization.png


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/starting_point.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/starting_point.png


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/README.md:
--------------------------------------------------------------------------------
 1 | * Computation
 2 |     * [Cudnn RNN optimization](https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/)
 3 |     * [Mixed precision training](https://github.com/NVIDIA/OpenSeq2Seq) : need device-level support, may for for NVIDIA Volta GPUs
 4 |     * [Persistent RNN](http://proceedings.mlr.press/v48/diamos16.pdf)
 5 | * Memory Acess
 6 | 
 7 | _**Usually the above two goals are achieved by optimized kenerl implementation.**_
 8 | 
 9 | * Communication
10 |   * optimize TensorFlow's original implementation
11 |       * [Baidu's allreduce for TensorFlow](https://github.com/baidu-research/tensorflow-allreduce/compare/allreduce-patch-1.0)
12 |       * [Uber's Horovod](https://github.com/uber/horovod)
13 |   * quantized gradients
14 | * Operator scheduling
15 |   * overlap computation and memory copy
16 | * synchronization overhead by synchronous SGD algorithm
17 | 
18 | ---
19 | 
20 | For RNN model, how to train _**very large and deep models for very long sequences**_ on one GPU efficiently.
21 | 


--------------------------------------------------------------------------------
/paper_notes/dl-workload-optimizations/ShuffleNet_v2.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/ShuffleNet_v2.md


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/README.md:
--------------------------------------------------------------------------------
 1 | ### Numeric/experimental investigations
 2 | 
 3 | - Wilson D R, Martinez T R. The general inefficiency of batch training for gradient descent learning[J]. Neural Networks, 2003, 16(10): 1429-1451.
 4 | - Keskar N S, Mudigere D, Nocedal J, et al. [On large-batch training for deep learning: Generalization gap and sharp minima](rendered/On_Large-Batch_Training_for_Deep_Learning.pdf)[J]. arXiv preprint arXiv:1609.04836, 2016.
 5 | 
 6 | ### Theoretial Analysis
 7 | 
 8 | > Background Knowledge: [Bayesian model comparison](rendered/Bayesian_Model_Comparison.pdf)
 9 | 
10 | - Smith S L, Le Q V. [A bayesian perspective on generalization and stochastic gradient descent](rendered/A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.pdf)[C]//Proceedings of Second workshop on Bayesian Deep Learning (NIPS 2017). 2017.
11 | - Hoffer E, Hubara I, Soudry D. [Train longer, generalize better: closing the generalization gap in large batch training of neural networks](rendered/Train_Longer_Generalize_Better.pdf)[C]//Advances in Neural Information Processing Systems. 2017: 1729-1739.
12 | 
13 | ### Proposed Solutions
14 | 
15 | - [Large batch training of convolutional networks](rendered/Large_Batch_Training_of_Convolutional_Networks.pdf)
16 | - Devarakonda A, Naumov M, Garland M. AdaBatch: Adaptive Batch Sizes for Training Deep Neural Networks[J]. arXiv preprint arXiv:1712.02029, 2017.
17 | - Masters D, Luschi C. Revisiting Small Batch Training for Deep Neural Networks[J]. arXiv preprint arXiv:1804.07612, 2018.
18 | - [Highly Scalable Deep Learning Training System with Mixed-Precision: Training ImageNet in Four Minutes](rendered/Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.pdf)
19 | 


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/rendered/A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.pdf


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/rendered/Bayesian_Model_Comparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/Bayesian_Model_Comparison.pdf


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/rendered/Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.pdf


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/rendered/Large_Batch_Training_of_Convolutional_Networks.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/Large_Batch_Training_of_Convolutional_Networks.pdf


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/rendered/On_Large-Batch_Training_for_Deep_Learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/On_Large-Batch_Training_for_Deep_Learning.pdf


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/rendered/Train_Longer_Generalize_Better.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/Train_Longer_Generalize_Better.pdf


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/Accurate_Large_Minibatch_SGD.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/Accurate_Large_Minibatch_SGD.md


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/f1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/f1.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/f2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/f2.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/fig1.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/figure1.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/figure2.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/insert_bn_after_pool5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/insert_bn_after_pool5.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/mixed_precision_with_LARS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/mixed_precision_with_LARS.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/network_configuration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/network_configuration.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/scalability.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/scalability.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/sharpness_metric.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/sharpness_metric.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/sharpness_of_minimizers_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/sharpness_of_minimizers_1.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/sharpness_of_minimizers_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/sharpness_of_minimizers_2.png


--------------------------------------------------------------------------------
/paper_notes/generalization-of-neural-network/sources/images/warmup_experiments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/warmup_experiments.png


--------------------------------------------------------------------------------
/paper_notes/large-language-models/GPT/GPT-models.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 | <img src="figures/Full_GPT_architecture.png" width=60%><br>
 3 | Fig. The original GPT model. (from [wiki page](https://en.wikipedia.org/wiki/Generative_pre-trained_transformer))
 4 | </p>
 5 | 
 6 | 
 7 | <p align="center">
 8 | <img src="figures/GPT-3-model-size.png" width=80%>
 9 | </p>
10 | 
11 | |Model|Context Window Length|
12 | |:--|:--|
13 | |GPT|512|
14 | |GPT-2|1024|
15 | |GPT-3|2048|
16 | |GPT-4|[8000](https://help.openai.com/en/articles/7127966-what-is-the-difference-between-the-gpt-4-models)|


--------------------------------------------------------------------------------
/paper_notes/large-language-models/GPT/figures/Full_GPT_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/GPT/figures/Full_GPT_architecture.png


--------------------------------------------------------------------------------
/paper_notes/large-language-models/GPT/figures/GPT-3-model-size.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/GPT/figures/GPT-3-model-size.png


--------------------------------------------------------------------------------
/paper_notes/large-language-models/README.md:
--------------------------------------------------------------------------------
 1 | 这里主要用来记录自己围绕着LLM相关的学习和相关工作梳理。计划按照这样的几条线索：
 2 | 
 3 | 模型研究层面（模型结构，训练实践）：
 4 | 1. GPT
 5 | 2. ViT
 6 | 3. Diffusion
 7 | 
 8 | 系统 for LLM
 9 | 1. LLM相关的系统研究。
10 | 
11 | 
12 | # 相关项目
13 | 
14 | 1. **LLAMA download**: [llama-dl](https://github.com/shawwn/llama-dl)
15 | 1. "**LLaMA**: Open and Efficient Foundation Language Models"[[PDF]](https://arxiv.org/abs/2302.13971v1) [[github]](https://github.com/facebookresearch/llama)
16 | 1. "**Alpaca**: A Strong, Replicable Instruction-Following Model"[[PDF]](https://arxiv.org/pdf/2303.16199.pdf) [[github]](https://github.com/replicate/cog_stanford_alpaca) [[website]](https://crfm.stanford.edu/2023/03/13/alpaca.html)
17 | 1. **Alpaca-LoRA**: [[github]](https://github.com/tloen/alpaca-lora#-alpaca-lora)
18 | 1. **VisualGLM-6B**: [[github]](https://github.com/THUDM/VisualGLM-6B)
19 | 1. **Falcon LLM**: [huggingface](https://huggingface.co/tiiuae)


--------------------------------------------------------------------------------
/paper_notes/large-language-models/fast-attention/Flash-Attention.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/fast-attention/Flash-Attention.pdf


--------------------------------------------------------------------------------
/paper_notes/large-language-models/fast-attention/README.md:
--------------------------------------------------------------------------------
1 | # Reading List
2 | 
3 | 1. "xFormers: A modular and hackable Transformer modelling library" [[codes]](https://github.com/facebookresearch/xformers)
4 | 1. “Simple Local Attentions Remain Competitive for Long-Context Tasks” [[PDF]](https://arxiv.org/pdf/2112.07210.pdf)
5 | 1. "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention"[[PDF]](https://arxiv.org/pdf/2006.16236.pdf)
6 | 1. "Linformer: Self-Attention with Linear Complexity"[[PDF]](https://arxiv.org/abs/2006.04768)
7 | 1. "Reformer: The Efficient Transformer"[[PDF]](https://arxiv.org/abs/2001.04451)


--------------------------------------------------------------------------------
/paper_notes/large-language-models/llm_inference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/llm_inference.pdf


--------------------------------------------------------------------------------
/paper_notes/large-language-models/transformer-optimizations/RMS_layernorm.md:
--------------------------------------------------------------------------------
1 | [Root Mean Square Layer Normalization](https://arxiv.org/pdf/1910.07467.pdf) [[**code**]](https://github.com/bzhangGo/rmsnorm)
2 | 
3 | 


--------------------------------------------------------------------------------
/paper_notes/large-language-models/transformer-optimizations/figures/block-schedule-with-overlap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/transformer-optimizations/figures/block-schedule-with-overlap.png


--------------------------------------------------------------------------------
/paper_notes/large-language-models/transformer-optimizations/figures/computeation-graph-of-llm-inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/transformer-optimizations/figures/computeation-graph-of-llm-inference.png


--------------------------------------------------------------------------------
/paper_notes/large-language-models/transformer-optimizations/figures/pre-post-layer-normalization-in-transformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/transformer-optimizations/figures/pre-post-layer-normalization-in-transformer.png


--------------------------------------------------------------------------------
/paper_notes/large-language-models/transformer-optimizations/figures/two-different-schedules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/transformer-optimizations/figures/two-different-schedules.png


--------------------------------------------------------------------------------
/paper_notes/large-language-models/whisper/figures/whisper-model-size.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/whisper/figures/whisper-model-size.png


--------------------------------------------------------------------------------
/paper_notes/large-language-models/whisper/figures/whisper_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/whisper/figures/whisper_overview.png


--------------------------------------------------------------------------------
/paper_notes/leading-edge-ai/Capsules/Dynamic_Routing_between_Capsule.md:
--------------------------------------------------------------------------------
 1 | # [Dynamic Routing between Capsule](https://arxiv.org/pdf/1710.09829.pdf)
 2 | 
 3 | ## Overview
 4 | 
 5 | This work is inspired by introspecting of human version: human vision ignores irrelevant details by using a carefully determined sequence of fixation points.
 6 | 
 7 | 1. This paper designs a _**multi-layer visual system**_: a parse tree-like structure on _**each fixation**_.
 8 |     - For a single fixation, a parse tree is carved(what's this?) out of a fixed _**multilayer**_ neural network.
 9 |     - Each group
10 | 1. _**Ignores**_ the issue of how these single fixation parse trees are coordinated over multiple fixations.
11 | 


--------------------------------------------------------------------------------
/paper_notes/leading-edge-ai/Capsules/README.md:
--------------------------------------------------------------------------------
1 | # Reading List
2 | 


--------------------------------------------------------------------------------
/paper_notes/leading-edge-ai/README.md:
--------------------------------------------------------------------------------
1 | # Reading List
2 | 
3 | 1. [Recurrent Independent Mechanisms](https://arxiv.org/abs/1909.10893)
4 |     - [Reddit dicussion](https://www.reddit.com/r/MachineLearning/comments/dak4ym/r_recurrent_independent_mechanisms/)
5 | 


--------------------------------------------------------------------------------
/paper_notes/leading-edge-ai/RIM.md:
--------------------------------------------------------------------------------
 1 | # RIM
 2 | 
 3 | ## My take-aways
 4 | 
 5 | ## Motivation
 6 | 
 7 | ## Challenges
 8 | 
 9 | ## Approach
10 | 
11 | ## Evalution
12 | 
13 | # Reference
14 | 
15 | 1. [Recurrent Independent Mechanisms](https://arxiv.org/abs/1909.10893)
16 | 


--------------------------------------------------------------------------------
/paper_notes/miscellanea/Neural_Ordinary_Differential_Equations.md:
--------------------------------------------------------------------------------
 1 | # Neural Ordinary Differential Equations
 2 | 
 3 | ---
 4 | 
 5 | ## Background
 6 | 
 7 | ### What is an ordinary differential equation(ODE)?
 8 | 
 9 | An ODE is an equation that involves some ordinary derivatives, the ordinary here is as opposed to partial derivatives of a function.
10 | * For example, if we know: $\frac{\partial{dx}}{\partial{dt}}(t) = \text{cos}t$, then what is the function $x(t)$
11 | * In general, the basic principle to solve an ODE is _**always integration**_.
12 | 
13 | [An introduction to ordinary differential equations](https://mathinsight.org/ordinary_differential_equation_introduction)
14 | 
15 | ---
16 | 
17 | ### Input and output of ODE solver?
18 | 
19 | ### The adjoint sensitive method
20 | * http://math.mit.edu/~stevenj/18.336/adjoint.pdf
21 | * A blog about the [Adjoint Sensitivity Method](https://advancedoptimizationatharvard.wordpress.com/2014/03/02/adjoint-sensitivity-method/)
22 | 
23 | ## References
24 | 
25 | 1. [DiffEqFlux.jl – A Julia Library for Neural Differential Equations](https://julialang.org/blog/2019/01/fluxdiffeq)
26 | 


--------------------------------------------------------------------------------
/paper_notes/miscellanea/README.md:
--------------------------------------------------------------------------------
1 | Backup of unclassified notes.
2 | 


--------------------------------------------------------------------------------
/paper_notes/ml-with-discrete-variables/README.md:
--------------------------------------------------------------------------------
 1 | # Reading List
 2 | 
 3 | - [ ] [Categorical Reparameterization with Gumbel-Softmax](https://openreview.net/pdf?id=rkE3y85ee)
 4 |     - [The Gumbel-Max Trick for Discrete Distributions](https://lips.cs.princeton.edu/the-gumbel-max-trick-for-discrete-distributions/)
 5 |     - [The Humble Gumbel Distribution](http://amid.fish/humble-gumbel)
 6 | - [ ] [Binarized neural networks: Training deep neural networks with weights and activations constrained to+ 1 or-1](https://arxiv.org/abs/1602.02830)
 7 | - [ ] [Strategic attentive writer for learning macro-actions](https://pdfs.semanticscholar.org/c3dd/2bf141c1371398e29ad37ced18bee34e1766.pdf)
 8 | - [ ] [Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation](https://arxiv.org/pdf/1308.3432.pdf)
 9 | - [ ] [Gradient Estimation Using Stochastic Computation Graphs](https://arxiv.org/pdf/1506.05254.pdf)
10 | - [ ] [Estimating or Propagating Gradients Through Stochastic Neurons]()
11 | 
12 | ## Some blog
13 | 
14 | 1. [Neural Networks gone wild! They can sample from discrete distributions now](http://anotherdatum.com/gumbel-gan.html)
15 | 


--------------------------------------------------------------------------------
/paper_notes/ml-with-discrete-variables/Straight-throughEstimator.md:
--------------------------------------------------------------------------------
1 | # STE
2 | 
3 | 
4 | # Reference
5 | 
6 | 1. [Understanding Straight-Through Estimator in Training Activation Quantized Neural Nets](https://arxiv.org/abs/1903.05662)
7 | 1. [Understanding Straight-Through Estimator in Training Activation Quantized Neural Nets](https://openreview.net/forum?id=Skh4jRcKQ) [ICLR 2019]
8 | 


--------------------------------------------------------------------------------
/paper_notes/normalization-in-NN/L2_Regularization_versus_Batch_and_Weight_Normalization/L2_Regularization_versus_Batch_and_Weight_Normalization.md:
--------------------------------------------------------------------------------
1 | - weight scale invariance in normalization method
2 | 


--------------------------------------------------------------------------------
/paper_notes/normalization-in-NN/Layer_Normalization/layer_normalization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/normalization-in-NN/Layer_Normalization/layer_normalization.pdf


--------------------------------------------------------------------------------
/paper_notes/normalization-in-NN/README.md:
--------------------------------------------------------------------------------
1 | - [Layer Normalization](Layer_Normalization/layer_normalization.pdf)
2 | - [Weight Normalizaton](Weight_Normalization/weight_normalization.pdf)
3 | - A good blog to explain why batch normalization works intuitively: [An Intuitive Explanation of Why Batch Normalization Really Works]( http://mlexplained.com/2018/01/10/an-intuitive-explanation-of-why-batch-normalization-really-works-normalization-in-deep-learning-part-1/)
4 | 
5 |   >_Batch normalization makes the mean and variance of the activations of each layer independent from the values themselves. This means that the magnitude of the higher order interactions are going to be suppressed, allowing larger learning rates to be used._
6 | 


--------------------------------------------------------------------------------
/paper_notes/normalization-in-NN/Weight_Normalization/weight_normalization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/normalization-in-NN/Weight_Normalization/weight_normalization.pdf


--------------------------------------------------------------------------------
/paper_notes/normalization-in-NN/optimization/Hessian_and_DeepLearning_Optimizaiton.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/normalization-in-NN/optimization/Hessian_and_DeepLearning_Optimizaiton.pdf


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/IRs/lift.md:
--------------------------------------------------------------------------------
 1 | <!-- TOC depthFrom:1 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 -->
 2 | 
 3 | - [LIFT](#lift)
 4 |   - [Intermediate language](#intermediate-language)
 5 |     - [1. Algorithmic Patterns](#1-algorithmic-patterns)
 6 |     - [2. Data Layout Patterns](#2-data-layout-patterns)
 7 |     - [3. Parallel Patterns](#3-parallel-patterns)
 8 |     - [4. Address Space Patterns](#4-address-space-patterns)
 9 |     - [5. Vectorize Pattern](#5-vectorize-pattern)
10 | - [Reference](#reference)
11 | 
12 | <!-- /TOC -->
13 | 
14 | # LIFT
15 | 
16 | - High-level languages based on parallel patterns capture rich information about the algorithmic structure of programs.
17 | - The foundation of the Lift IL is lambda calculus which formalizes the reasoning about functions, their **composition**, **nesting** and application.
18 | 
19 | ## Intermediate language
20 | 
21 | The Lift IL expresses program as compositions and nesting of functions which operate on <font color=#C71585> **arrays**</font>.
22 | 
23 | ### 1. Algorithmic Patterns
24 | 
25 | 1. mapSeq
26 | 1. reduceSeq
27 | 1. iterate
28 | 
29 | ### 2. Data Layout Patterns
30 | 
31 | 1. split
32 | 1. join
33 | 1. gather
34 | 1. scatter
35 | 1. zip
36 | 1. slide
37 | 
38 | ### 3. Parallel Patterns
39 | 
40 | 1. mapGlb
41 | 1. mapWrg
42 | 1. mapLcl
43 | 
44 | ### 4. Address Space Patterns
45 | 
46 | 1. toGlobal
47 | 1. toLocal
48 | 1. toPrivate
49 | 
50 | ### 5. Vectorize Pattern
51 | 
52 | 1. asVector
53 | 1. asScalar
54 | 1. mapVec
55 | 
56 | # Reference
57 | 
58 | 1. Steuwer, Michel, Toomas Remmelg, and Christophe Dubach. "[Lift: a functional data-parallel IR for high-performance GPU code generation](https://eprints.gla.ac.uk/146596/1/146596.pdf)." 2017 IEEE/ACM International Symposium on Code Generation and Optimization (CGO). IEEE, 2017.
59 | 


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/IRs/nova.md:
--------------------------------------------------------------------------------
 1 | <!-- vscode-markdown-toc -->
 2 | - [Nova](#nova)
 3 |   - [1. <a name='Built-inparalleloperations'></a> Built-in parallel operations](#1--built-in-parallel-operations)
 4 | - [Reference](#reference)
 5 | 
 6 | <!-- vscode-markdown-toc-config
 7 |     numbering=true
 8 |     autoSave=true
 9 |     /vscode-markdown-toc-config -->
10 | <!-- /vscode-markdown-toc -->
11 | 
12 | # Nova
13 | 
14 | ##  1. <a name='Built-inparalleloperations'></a> Built-in parallel operations
15 | 
16 | |Operation|
17 | |--|
18 | |map|
19 | |reduce|
20 | |scan|
21 | |permute|
22 | |gather|
23 | |slice|
24 | |filter|
25 | 
26 | # Reference
27 | 
28 | 1. Collins, Alexander, et al. "[NOVA: A functional language for data parallelism](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.666.8678&rep=rep1&type=pdf)." Proceedings of ACM SIGPLAN International Workshop on Libraries, Languages, and Compilers for Array Programming. 2014.
29 | 


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/Nesl/VCODE.md:
--------------------------------------------------------------------------------
 1 | <!-- vscode-markdown-toc -->
 2 | - [VCODE](#vcode)
 3 |   - [VCODE instructions](#vcode-instructions)
 4 | - [Reference](#reference)
 5 | 
 6 | <!-- vscode-markdown-toc-config
 7 |     numbering=true
 8 |     autoSave=true
 9 |     /vscode-markdown-toc-config -->
10 | <!-- /vscode-markdown-toc -->
11 | 
12 | # VCODE
13 | 
14 | ##  VCODE instructions
15 | 
16 | <p align="center">
17 | <img src="../programming_model/images/vcode-instruction.png" width=75%>
18 | </p>
19 | 
20 | # Reference
21 | 
22 | 1. Blelloch, Guy E., and Siddhartha Chatterjee. "[VCODE: A data-parallel intermediate language](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.6593&rep=rep1&type=pdf)." Proceedings Frontiers of Massively Parallel Computation. 1990.
23 | 


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/Nesl/images/nesl-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/Nesl/images/nesl-1.png


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/README.md:
--------------------------------------------------------------------------------
 1 | <!-- TOC depthFrom:1 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 -->
 2 | 
 3 | - [Reading list](#reading-list)
 4 |   - [Parallel programming model](#parallel-programming-model)
 5 |     - [Papers](#papers)
 6 |     - [Others](#others)
 7 | 
 8 | <!-- /TOC -->
 9 | 
10 | # Reading list
11 | 
12 | ## Parallel programming model
13 | 
14 | ### Papers
15 | 
16 | 1. Sipelstein J M, Blelloch G E. [Collection-oriented languages](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.56.836&rep=rep1&type=pdf)[J]. Proceedings of the IEEE, 1991, 79(4): 504-523.
17 | 
18 | ### Others
19 | 
20 | 1. [Introduction to Parallel Computing Tutorial](https://hpc.llnl.gov/training/tutorials/introduction-parallel-computing-tutorial)
21 | 1. [A Library of Parallel Algorithms](https://www.cs.cmu.edu/~scandal/nesl/algorithms.html)
22 | 1. [NESL: A Parallel Programming Language](https://www.cs.cmu.edu/~scandal/nesl.html)
23 | 1. [CME 323: Distributed Algorithms and Optimization](https://stanford.edu/~rezab/dao/)
24 |     - https://github.com/lamastex/scalable-data-science/blob/master/read/daosu.pdf
25 | 
26 | 1. https://github.com/serge-sans-paille/pythran
27 | 
28 | ---
29 | 
30 | 1. [Introduction to Parallel Computing](https://computing.llnl.gov/tutorials/parallel_comp/)
31 | 1. [Message Passing and the Actor Model](http://dist-prog-book.com/chapter/3/message-passing.html)
32 | 
33 | ---
34 | 
35 | 1. [Scheduling For Efficient Large-Scale Machine Learning Training](https://www.youtube.com/watch?v=_rAkFBE-ItE)
36 | 


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/data_parallel_language/README.md:
--------------------------------------------------------------------------------
1 | 1. Larus J. [C**: A large-grain, object-oriented, data-parallel programming language](https://minds.wisconsin.edu/bitstream/handle/1793/59682/TR1126.pdf?sequence=1)[C]//International Workshop on Languages and Compilers for Parallel Computing. Springer, Berlin, Heidelberg, 1992: 326-341.
2 | 


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/execution_model/ActorModel.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/execution_model/ActorModel.md


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/execution_model/CSP.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/execution_model/CSP.md


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/execution_model/MessagePassing.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/execution_model/MessagePassing.md


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/programming_model/DMLL.md:
--------------------------------------------------------------------------------
 1 | # DMLL
 2 | 
 3 | <p align="center">
 4 | <img src="images/DMLL-comparison.png" width=75%>
 5 | </p>
 6 | 
 7 | # Reference
 8 | 
 9 | 1. Brown, Kevin J., et al. "[Have abstraction and eat performance, too: Optimized heterogeneous computing with parallel patterns](https://dawn.cs.stanford.edu/pubs/abstraction-cgo2016.pdf)." 2016 IEEE/ACM International Symposium on Code Generation and Optimization (CGO). IEEE, 2016.
10 | 


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/programming_model/images/DMLL-comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/DMLL-comparison.png


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/programming_model/images/co-ori-lang-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/co-ori-lang-1.png


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/programming_model/images/mimd.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/mimd.gif


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/programming_model/images/simd.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/simd.gif


--------------------------------------------------------------------------------
/paper_notes/parallel-computing/programming_model/images/vcode-instruction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/vcode-instruction.png


--------------------------------------------------------------------------------
/paper_notes/partial_aggregation/README.md:
--------------------------------------------------------------------------------
 1 | ## References
 2 | 
 3 | 1. Yu, Yuan, Pradeep Kumar Gunda, and Michael Isard. "[Distributed aggregation for data-parallel computing: interfaces and implementations](http://www.michaelisard.com/pubs/sosp-035-yu.pdf)." Proceedings of the ACM SIGOPS 22nd symposium on Operating systems principles. 2009.
 4 | 1. Liu, Chang, et al. "[Automating distributed partial aggregation](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/main-8.pdf)." Proceedings of the ACM Symposium on Cloud Computing. 2014.
 5 | ## Decomposable functions
 6 | 
 7 | <p align="center">
 8 | <img src="figures/decomposable_function_1.png" width=60%><br>
 9 | <img src="figures/decomposable_function_2.png" width=60%><br>
10 | <img src="figures/decomposable_function_3.png" width=60%><br>
11 | <img src="figures/decomposable_function_4.png" width=60%>
12 | </p>
13 | 
14 | 1. If an aggregation computation can be represented as a set of ***associative-decomposable*** functions followed by some final processing, then it can be split up in such a way that the query plan in Figure 2 below can be applied.
15 | 1. If the computation is instead formed from ***decomposable*** functions followed by final processing then the plan from Figure 2 can be applied, but without any intermediate aggregation stages.
16 | 1. If the computation is ***not decomposable*** then the plan from Figure 1 is required.
17 | 
18 | <p align="center">
19 | <img src="figures/execution_plan_1.png" width=35%>
20 | <img src="figures/execution_plan_2.png" width=35%>
21 | </p>


--------------------------------------------------------------------------------
/paper_notes/partial_aggregation/figures/decomposable_function_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/decomposable_function_1.png


--------------------------------------------------------------------------------
/paper_notes/partial_aggregation/figures/decomposable_function_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/decomposable_function_2.png


--------------------------------------------------------------------------------
/paper_notes/partial_aggregation/figures/decomposable_function_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/decomposable_function_3.png


--------------------------------------------------------------------------------
/paper_notes/partial_aggregation/figures/decomposable_function_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/decomposable_function_4.png


--------------------------------------------------------------------------------
/paper_notes/partial_aggregation/figures/execution_plan_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/execution_plan_1.png


--------------------------------------------------------------------------------
/paper_notes/partial_aggregation/figures/execution_plan_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/execution_plan_2.png


--------------------------------------------------------------------------------
/paper_notes/programming-language/Glossary/images/1920px-Tree_edges.svg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/Glossary/images/1920px-Tree_edges.svg.png


--------------------------------------------------------------------------------
/paper_notes/programming-language/Glossary/program_analysis.md:
--------------------------------------------------------------------------------
 1 | # [Back Edges](https://stackoverflow.com/questions/44494426/back-edges-in-a-graph/44494705)
 2 | 
 3 | Given a DFS tree of a graph, a Back Edge is an edge that connects a vertex to a vertex that is discovered before it's parent.
 4 | 
 5 | <p align="center">
 6 | <img src="images/1920px-Tree_edges.svg.png" width=50%>
 7 | </p>
 8 | 
 9 | # [Domination Relationship](https://en.wikipedia.org/wiki/Control_flow_graph)
10 | 
11 | - A block M **dominates** a block N if every path from the entry that reaches block N has to pass through block M.
12 |   - The *entry block* (through which control enters into the flow graph) **dominates** all blocks.
13 | - In the reverse direction, block M **postdominates** block N if every path from N to the exit has to pass through block M.
14 |   - The *exit block* (through which all control flow leaves) **postdominates** all blocks.
15 | 
16 | # [Use-define chain / Definition-use chain](https://en.wikipedia.org/wiki/Use-define_chain)
17 | 
18 | * A Use-Definition Chain (UD Chain) is a data structure that consists of a use, U, of a variable, and all the definitions, D, of that variable that can reach that use without any other intervening definitions.
19 | 
20 | * A counterpart of a UD Chain is a Definition-Use Chain (DU Chain), which consists of a definition, D, of a variable and all the uses, U, reachable from that definition without any other intervening definitions.
21 | 
22 | Both UD and DU chains are created by using a form of static code analysis known as data flow analysis.
23 | 
24 | Knowing the use-def and def-use chains for a program or subprogram is a prerequisite for many compiler optimizations, including constant propagation and common subexpression elimination.
25 | 


--------------------------------------------------------------------------------
/paper_notes/programming-language/Glossary/programming_paradigm.md:
--------------------------------------------------------------------------------
 1 | # [Generic Programming](https://en.wikipedia.org/wiki/Generic_programming)
 2 | 
 3 | Generic programming describes a programming paradigm whereby:
 4 | 
 5 | 1. Fundamental requirements on types are abstracted from across concrete examples of algorithms and data structures and formalized as concepts.
 6 | 1. Generic functions implemented in terms of these concepts, typically using language genericity mechanisms, for example:
 7 | 
 8 |     - parametric polymorphism in ML, Scala, Haskell and Julia
 9 |     - templates in C++ and D
10 |     - parameterized types in the influential 1994 book Design Patterns.
11 | 
12 | # [Programming Language Support for Genericity](https://en.wikipedia.org/wiki/Generic_programming)
13 | 
14 | Genericity is implemented and supported differently in various programming languages.
15 | 
16 | 1. [Forth](https://en.wikipedia.org/wiki/Forth_(programming_language)): Exposing the compiler behaviour and therefore naturally offers genericity capacities.
17 | 
18 | # [Ploymorphism](https://en.wikipedia.org/wiki/Polymorphism_(computer_science))
19 | 
20 | The provision of a single interface to entities of different types or the use of a single symbol to represent multiple different types.
21 | 
22 | - **Ad hoc polymorphism**: defines a common interface for an arbitrary set of individually specified types.
23 |   -  function overloading or operator overloading
24 | - **Parametric polymorphism**: when one or more types are not specified by name but by abstract symbols that can represent any type.
25 | - **Subtyping** (also called subtype polymorphism or inclusion polymorphism): when a name denotes instances of many different classes related by some common superclass.
26 | 
27 | _**Julia is a dynamically typed language and doesn't need to make all type decisions at compile time, many traditional difficulties encountered in static parametric type systems can be relatively easily handled**_.
28 | 


--------------------------------------------------------------------------------
/paper_notes/programming-language/Glossary/type.md:
--------------------------------------------------------------------------------
 1 | # [Nominal Typing / Nominal Subtyping](https://en.wikipedia.org/wiki/Nominal_type_system)
 2 | 
 3 | **Nominal typing** means that two variables are type-compatible if and only if their declarations name the same type.
 4 | 
 5 | **Nominal subtyping** means that one type is a subtype of another if and only if it is explicitly declared to be so in its definition.
 6 | 
 7 | ## [Abstract Type](https://en.wikipedia.org/wiki/Abstract_type)
 8 | 
 9 | 1. An abstract type is a type _**in a nominative type system**_ that cannot be instantiated directly.
10 | 1. A type that is not abstract – which can be instantiated – is called a concrete type.
11 | 1. Every instance of an abstract type is an instance of some concrete subtype. Abstract types are also known as existential types.
12 | 
13 | # [Boxing](https://en.wikipedia.org/wiki/Object_type_%28object-oriented_programming%29#Boxing)
14 | 
15 | Boxing is the process of placing a primitive type within an object so that the primitive can be used as a reference object.
16 | 
17 | * Repeated boxing and unboxing of objects can have a severe performance impact, because boxing dynamically allocates new objects and unboxing (if the boxed value is no longer used) then makes them eligible for garbage collection.
18 | * The boxed object is always a copy of the value object, and is _**usually immutable**_.
19 | 
20 | ## [Autoboxing](https://en.wikipedia.org/wiki/Object_type_%28object-oriented_programming%29#Autoboxing)
21 | 
22 | Autoboxing is the term for getting a reference type out of a value type just through type conversion (either implicit or explicit). The compiler automatically supplies the extra source code that creates the object.
23 | 


--------------------------------------------------------------------------------
/paper_notes/programming-language/Nominative_and_structure_type.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/Nominative_and_structure_type.md


--------------------------------------------------------------------------------
/paper_notes/programming-language/PL_Design.md:
--------------------------------------------------------------------------------
1 | # [Boxing](https://en.wikipedia.org/wiki/Object_type_%28object-oriented_programming%29#Boxing)
2 | 


--------------------------------------------------------------------------------
/paper_notes/programming-language/README.md:
--------------------------------------------------------------------------------
1 | [TBD]
2 | 


--------------------------------------------------------------------------------
/paper_notes/programming-language/SSA/README.md:
--------------------------------------------------------------------------------
1 | [TBD]
2 | 


--------------------------------------------------------------------------------
/paper_notes/programming-language/SSA/Simple_and_Efficient_Construction_of_Static_Single_Assignment_Form.md:
--------------------------------------------------------------------------------
1 | # Simple and Eﬃcient Construction of Static Single Assignment Form
2 | 


--------------------------------------------------------------------------------
/paper_notes/programming-language/SSA/images/SSA_example1.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/SSA/images/SSA_example1.1.png


--------------------------------------------------------------------------------
/paper_notes/programming-language/SSA/images/SSA_example1.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/SSA/images/SSA_example1.2.png


--------------------------------------------------------------------------------
/paper_notes/programming-language/SSA/images/SSA_example1.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/SSA/images/SSA_example1.3.png


--------------------------------------------------------------------------------
/paper_notes/programming-language/abstract_binding_tree.md:
--------------------------------------------------------------------------------
1 | # Abstract Binding Tree
2 | 
3 | 
4 | # Reference
5 | 
6 | 1. [Understanding typing judgments](https://www.hedonisticlearning.com/posts/understanding-typing-judgments.html)


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/images/mm_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/images/mm_example.png


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/images/nested_tensorarray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/images/nested_tensorarray.png


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/images/tensor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/images/tensor.png


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/images/transformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/images/transformer.png


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/ir.tex:
--------------------------------------------------------------------------------
1 | \section{Lower to the internal representation}
2 | 


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/main.pdf


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass {article}
 2 | 
 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands
 4 | \title{Concepts, Constructs and the Design Rationale}
 5 | %\author{Ying Cao}
 6 | \date{\today}
 7 | 
 8 | \begin{document}
 9 | 
10 | \maketitle
11 | \tableofcontents
12 | \newpage
13 | \input{concepts.tex}
14 | \input{shape_operation.tex}
15 | \input{tensorarray_creation.tex}
16 | \input{item_access.tex}
17 | \input{constructs.tex}
18 | \input{nn.tex}
19 | \input{vectorization.tex}
20 | %\input{ir.tex}
21 | %\input{optimization.tex}
22 | \input{transformer.tex}
23 | \end{document}
24 | 


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/nn.tex:
--------------------------------------------------------------------------------
 1 | \section{Neural network specialized tensor, tensor array operations}
 2 | 
 3 | \subsection{\textbf{\textit{embedding}}}
 4 | 
 5 | Embedding is parallel slicing.
 6 | 
 7 | \begin{lstlisting}[language=Python]
 8 | embedding(X:Vector[int], Y:Tensor[T], dim:int) -> Tensor[T]
 9 | \end{lstlisting}
10 | 
11 | \begin{enumerate}
12 | 
13 | \item shape function: $S(\mathbf{Z}) = \Gamma (S(\mathbf{X}), S(\mathbf{Y}), \text{dim})$
14 | 
15 | \begin{equation*}
16 |   \begin{aligned}
17 |     S(\mathbf{Z}) &= \Gamma (S(\mathbf{X}), S(\mathbf{Y}), \text{dim}) \\
18 |      & = (S(\mathbf{X})[0]) + \text{del}(S(\mathbf{Y}), \text{dim})
19 |   \end{aligned}
20 | \end{equation*}
21 | 
22 | \item computation:
23 | 
24 | \begin{equation*}
25 |   \begin{aligned}
26 |     &\mathbf{Z} = \text{reshape}(\mathbf{Z}, (N, -1)) \\
27 |     &\textbf{foreach} \quad (i, x) \quad \textbf{in} \quad \mathbf{X}_{N} \\
28 |     &\quad \quad \mathbf{Z}[i] = \text{slice}(\mathbf{Y}, \mathbf{X}[i], \text{dim}, \textit{keep\_dim}=\text{false}) \\
29 |     &\mathbf{Z} = \text{reshape}(\mathbf{Z}, (N)+\text{del}(S(\mathbf{Y}), \text{dim})) \\
30 |   \end{aligned}
31 | \end{equation*}
32 | 
33 | \begin{itemize}
34 |   \item iteration domain: $0 \le i \le \text{size}(\mathbf{X}, 0) - 1$
35 |   \item access function:
36 |   \begin{enumerate}
37 |     \item $f_1: \{ i \rightarrow \mathbf{Z}[i] \}$
38 |     \item $f_2: $ [TBD]
39 |     \item $f_3: \{i \rightarrow \mathbf{X}[i]\}$
40 |   \end{enumerate}
41 | \end{itemize}
42 | 
43 | \item differentiation rule:
44 | \end{enumerate}
45 | 


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/optimization.tex:
--------------------------------------------------------------------------------
1 | \section{General Settings of important optimizations}
2 | 


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/tensorarray_creation.tex:
--------------------------------------------------------------------------------
 1 | \section{TensorArray creation}
 2 | 
 3 | 
 4 | \subsection{Conversion from Tensor}
 5 | 
 6 | \subsubsection{\textbf{\textit{slices}}}
 7 | 
 8 | \begin{lstlisting}[language=Python]
 9 | slices(X:Tensor, dim:int) -> TensorArray[Tensor]
10 | \end{lstlisting}
11 | 
12 | \subsubsection{\textbf{\textit{chunks}}}
13 | 
14 | \subsection{\textbf\textit{Construction from Looping}}
15 | 


--------------------------------------------------------------------------------
/paper_notes/tensor_operations/vectorization.tex:
--------------------------------------------------------------------------------
1 | \section{Optimization capability as an explicit interface}
2 | 
3 | \subsection{Vectorized function: \textit{\textbf{vectorize}}}
4 | 


--------------------------------------------------------------------------------
/paper_notes/type-systems/README.md:
--------------------------------------------------------------------------------
1 | # Unclassified References
2 | 
3 | 1. [Understanding typing judgments](https://www.hedonisticlearning.com/posts/understanding-typing-judgments.html)
4 | 1. [Advanced Programming Languages Spring 2019](https://www.cs.cornell.edu/courses/cs6110/2019sp/schedule.html)
5 | 1. [Examples: Well-formed types](https://www.cs.tufts.edu/comp/105-2020f/slide-cache/slides-5c42e15842b87e76e1275a6931e00e3c.pdf)
6 | 1. [A Tutorial on Type Theory, Foundations of Programming Languages, and Formal Verification](http://jgaltidor.github.io/typetheory_paper.pdf)
7 | 1. [Type-preserving compilation via dependently typed syntax in Agda](https://www.cse.chalmers.se/~abela/talkTYPES2020.pdf)
8 | 1. [A Type System for Well-Founded Recursion](https://people.mpi-sws.org/~dreyer/papers/recursion/tr/main.pdf)


--------------------------------------------------------------------------------
/paper_notes/type-systems/notations/README.md:
--------------------------------------------------------------------------------
1 | [TBD]
2 | 


--------------------------------------------------------------------------------
/paper_notes/type-systems/notations/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/type-systems/notations/main.pdf


--------------------------------------------------------------------------------
/paper_notes/type-systems/notations/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{article}
 2 | 
 3 | \input{structure.tex}
 4 | \title{Notations}
 5 | 
 6 | \begin{document}
 7 | 
 8 | % \maketitle % Print the title
 9 | % \tableofcontents
10 | 
11 | \noindent
12 | \linespread{1.2}
13 | \selectfont
14 | \setlength{\topskip}{0ex}
15 | \setlength{\parskip}{1ex}
16 | \setlength{\lineskip}{1em}
17 | 
18 | \section{Terms, Types and Kinds}
19 | \input{contents/kinding.tex}
20 | 
21 | \end{document}
22 | 


--------------------------------------------------------------------------------
/reinforcement_learning/README.md:
--------------------------------------------------------------------------------
1 | [TBD]
2 | 


--------------------------------------------------------------------------------
/reinforcement_learning/basic_concepts/basic_concepts_about_reinforcement_learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/reinforcement_learning/basic_concepts/basic_concepts_about_reinforcement_learning.pdf


--------------------------------------------------------------------------------
/reinforcement_learning/basic_concepts/basic_concepts_about_reinforcement_learning.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/reinforcement_learning/basic_concepts/basic_concepts_about_reinforcement_learning.ppt


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic1.one-hot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic1.one-hot.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic10.highway.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic10.highway.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic11.generate_text_from_language_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic11.generate_text_from_language_model.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic12.encoder_decoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic12.encoder_decoder.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic13.neural_turing_machine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic13.neural_turing_machine.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic2.word_embedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic2.word_embedding.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic3.rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic3.rnn.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic4.bp_through_all_nodes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic4.bp_through_all_nodes.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic5.bp_through_shortcut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic5.bp_through_shortcut.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic6.lstm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic6.lstm.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic7.gru.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic7.gru.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic8.deep_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic8.deep_rnn.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/pic/pic9.residual_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic9.residual_block.png


--------------------------------------------------------------------------------
/text_generation_for_gitchat/text_generation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/text_generation.pdf


--------------------------------------------------------------------------------
/text_generation_for_gitchat/trans_2_html.sh:
--------------------------------------------------------------------------------
1 | pandoc README.md -t html -s -o text_generation.html --mathjax=https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML
2 | 
3 | 


--------------------------------------------------------------------------------
/tiled_efficient_attention/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/README.pdf


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/cal_p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/cal_p.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/chunk_form_parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/chunk_form_parallelism.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/chunk_recurrent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/chunk_recurrent.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/first_kv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/first_kv.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/fused_chunk_gla_fwd_kernel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/fused_chunk_gla_fwd_kernel.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/fwd_decay_cumsum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/fwd_decay_cumsum.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/fwd_inner_chunk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/fwd_inner_chunk.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/gated_linear_attention.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/gated_linear_attention.pptx


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/gated_linear_attention_layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/gated_linear_attention_layer.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/gla_data_accessed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/gla_data_accessed.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/gla_equation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/gla_equation.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/last_decay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/last_decay.png


--------------------------------------------------------------------------------
/tiled_efficient_attention/figures/~$gated_linear_attention.pptx:
--------------------------------------------------------------------------------
1 | Ying Cao                                               Y i n g   C a o                                                                                             


--------------------------------------------------------------------------------
/tiled_efficient_attention/main.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from model import GatedLinearAttention, GLAConfig
 4 | 
 5 | if __name__ == "__main__":
 6 | 
 7 |     batch, num_head, length, hidden = 32, 4, 2048, 2048
 8 | 
 9 |     config = GLAConfig(d_model=hidden, n_head=num_head)
10 |     print(config)
11 | 
12 |     GLA = GatedLinearAttention(config,
13 |                                mode="fused_chunk").cuda().to(torch.bfloat16)
14 | 
15 |     x = torch.randn((batch, length, hidden),
16 |                     dtype=torch.bfloat16,
17 |                     device="cuda",
18 |                     requires_grad=False)
19 | 
20 |     y, _ = GLA(x)
21 |     print(y.shape)
22 | 


--------------------------------------------------------------------------------
/tiled_efficient_attention/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration import GLAConfig
2 | from .gla import GatedLinearAttention
3 | 
4 | __all__ = [
5 |     'GLAConfig',
6 |     'GatedLinearAttention',
7 | ]
8 | 


--------------------------------------------------------------------------------